"""Stage 4 — Multi-agent Validation. Three Claude critic agents review prompts in parallel: - Agent A: Real buyer critique (does this sound like real persona phrasing?) - Agent B: Methodology critic (statistical balance, distribution, vocabulary) - Agent C: Vendor exploit hunter (which prompts are too easy to game by content marketing?) Prompts flagged by N+ agents (default: 2) are removed. """ from __future__ import annotations import argparse import asyncio import json import os from pathlib import Path from anthropic import AsyncAnthropic from config import CONFIG AGENT_A_PROMPT = """You are reviewing a list of prompts that buyer personas would supposedly type into ChatGPT/Perplexity/Gemini when researching purchases in **{category}**. Your job: identify prompts that DON'T sound natural for any realistic Polish e-commerce buyer. Flag prompts that: 1. Are too formal/academic (no buyer phrases queries like a research paper) 2. Are too long (real users don't type 30-word queries) 3. Are too short / generic (single words or 2-word phrases) 4. Use vocabulary no real Polish buyer would use 5. Are buyer-impossible (e.g., asking about specs only B2B buyer would care about, in a B2C context) Here are the {prompt_count} prompts to review: {prompts_list} Output JSON array of flagged prompt IDs (use the index as ID, 0-indexed): ```json {{ "flagged_indices": [3, 7, 12], "reasons": {{ "3": "Too formal — no real buyer types like this", "7": "Single word, no buying intent", "12": "B2B language in B2C context" }} }} ``` Only output JSON. No prose.""" AGENT_B_PROMPT = """You are a methodology critic for a Polish e-commerce AI visibility ranking project. Review this prompt list for **statistical and structural issues**: Target distribution per the methodology: - buying: 30% (weight 2.0) - comparison: 25% (weight 1.5) - specific_need: 20% (weight 1.5) - informational: 15% (weight 0.3) - brand_direct: 10% (weight 0.3) Total prompts: {prompt_count} Flag issues: 1. Type distribution off by >10% from target 2. Vocabulary too repetitive (same phrases recurring) 3. Subcategory bias (e.g., 80% prompts about prezenty, 20% about everything else) 4. Length distribution unreasonable (all prompts are very long or very short) 5. Missing realistic buyer scenarios (e.g., no prompts about specific occasions, sizes, attributes) Prompts list: {prompts_list} Output: ```json {{ "flagged_indices": [...], "reasons": {{...}}, "structural_issues": [ "Type 'comparison' is over-represented at 35% (target 25%)", "20+ prompts mention 'prezent dla mamy' — too repetitive" ] }} ``` Only JSON output.""" AGENT_C_PROMPT = """You are a vendor exploit hunter for a Polish e-commerce AI visibility ranking. Your job: identify prompts that are TOO EASY for a vendor to game by content marketing fluff. A prompt is "exploitable" if: 1. The answer can be dominated by writing one good blog post 2. The answer comes primarily from Wikipedia (vendors can edit Wikipedia) 3. The answer is brand-agnostic (any vendor can position to win it via SEO content) 4. The prompt would be answered by listing Wikipedia / blog content rather than specific brand recommendations We WANT prompts where: - AI must recommend specific brands (with real reviews, real authority, multi-source citation) - Prompt requires real product positioning, not just content production - Multiple sources (reviews, Reddit, brand sites) need to align for ranking Flag prompts that are too gameable: {prompts_list} Output: ```json {{ "flagged_indices": [...], "reasons": {{ "5": "Generic 'co to świeca sojowa' — easily gamed by Wikipedia + blog post", "12": "Brand-agnostic 'jak działa świeca sojowa' — content marketing fluff target" }} }} ``` Only JSON output.""" async def run_agent(client: AsyncAnthropic, prompt: str) -> dict: """Single agent call.""" response = await client.messages.create( model=CONFIG.critic_models["real_buyer_critique"], max_tokens=4000, messages=[{"role": "user", "content": prompt}], ) text = response.content[0].text.strip() if text.startswith("```json"): text = text[7:] if text.endswith("```"): text = text[:-3] return json.loads(text.strip()) async def run_three_critics(prompts: list[dict], category_display_name: str) -> dict: client = AsyncAnthropic(api_key=os.environ["ANTHROPIC_API_KEY"]) # Format prompts for review prompts_text = "\n".join( f"{i}. [{p['type']}] {p['prompt']}" for i, p in enumerate(prompts) ) agent_a = AGENT_A_PROMPT.format( category=category_display_name, prompt_count=len(prompts), prompts_list=prompts_text, ) agent_b = AGENT_B_PROMPT.format( prompt_count=len(prompts), prompts_list=prompts_text, ) agent_c = AGENT_C_PROMPT.format( prompts_list=prompts_text, ) # Run 3 agents in parallel print("[Stage 4] Running 3 critic agents in parallel...") results = await asyncio.gather( run_agent(client, agent_a), run_agent(client, agent_b), run_agent(client, agent_c), ) return { "agent_a_real_buyer": results[0], "agent_b_methodology": results[1], "agent_c_exploit_hunter": results[2], } def aggregate_flags(critic_results: dict, total_prompts: int) -> dict: """Count how many agents flagged each prompt index.""" flag_counts: dict[int, list[str]] = {} for agent_name, result in critic_results.items(): for idx in result.get("flagged_indices", []): if idx not in flag_counts: flag_counts[idx] = [] reason = result.get("reasons", {}).get(str(idx), "no reason given") flag_counts[idx].append(f"{agent_name}: {reason}") flagged_for_removal = [ idx for idx, reasons in flag_counts.items() if len(reasons) >= CONFIG.flagged_by_n_critics_to_remove ] return { "flag_counts_by_prompt": flag_counts, "flagged_for_removal": sorted(flagged_for_removal), "removal_threshold_critics": CONFIG.flagged_by_n_critics_to_remove, "total_prompts": total_prompts, "total_removed": len(flagged_for_removal), "total_kept": total_prompts - len(flagged_for_removal), } def main(): parser = argparse.ArgumentParser(description="Multi-agent validation of prompts.") parser.add_argument("--category", required=True) parser.add_argument("--display-name", required=True) args = parser.parse_args() data_dir = Path(__file__).parent.parent.parent / "data" / args.category validated_file = data_dir / "validated_prompts.json" if not validated_file.exists(): raise FileNotFoundError(f"Run 3_reality_checker.py first. Missing: {validated_file}") with open(validated_file, "r", encoding="utf-8") as f: validated_data = json.load(f) # Filter out reality-check failures first candidates = [p for p in validated_data["validated_prompts"] if p["reality_signal"] != "fail"] print(f"[Stage 4] Reviewing {len(candidates)} prompts (post-reality-check)...") critic_results = asyncio.run(run_three_critics(candidates, args.display_name)) aggregation = aggregate_flags(critic_results, len(candidates)) output = { "category": args.category, "input_count": len(candidates), "critic_results": critic_results, "aggregation": aggregation, "kept_prompts": [ p for i, p in enumerate(candidates) if i not in aggregation["flagged_for_removal"] ], } output_file = data_dir / "critic_review.json" with open(output_file, "w", encoding="utf-8") as f: json.dump(output, f, ensure_ascii=False, indent=2) print(f"[Stage 4] ✅ Saved {output_file}") print(f"[Stage 4] Removed: {aggregation['total_removed']}, Kept: {aggregation['total_kept']}") if __name__ == "__main__": main()