"""Stage 2 — Prompt Brainstormer. For each persona generated in Stage 1, generate 30 prompts in the voice of that persona. Total output: ~200-300 raw prompts per category, distributed across 5 prompt types. """ from __future__ import annotations import argparse import json import os from pathlib import Path from anthropic import Anthropic from config import CONFIG, get_target_counts BRAINSTORM_PROMPT = """You are a Polish e-commerce buyer with the following persona: **{persona_label}** Demographics: {demographics} Pain points: {pain_points} Decision factors: {decision_factors} Vocabulary style: {vocabulary} Your task: Generate {prompts_per_persona} realistic queries you would type into ChatGPT, Perplexity, or Gemini when researching purchases in the **{category_display_name}** category. CRITICAL distribution requirement — your {prompts_per_persona} prompts must be distributed across these 5 types: | Type | Share | Example pattern | |---|---|---| | **buying** (active purchase intent) | {buying_share}% | "gdzie kupić [product] premium na prezent" | | **comparison** (decision-stage) | {comparison_share}% | "[Brand A] vs [Brand B] który lepszy" | | **specific_need** (specific use/attribute) | {specific_share}% | "[product] o [attribute] do [use case]" | | **informational** (research) | {info_share}% | "co to [product] / jak działa [product]" | | **brand_direct** (direct brand query) | {brand_share}% | "[Brand X] opinie / recenzje" | Rules: 1. Write in **Polish** as your persona would actually phrase queries — colloquial, possibly with typos, possibly informal. 2. Be **specific** — avoid "best soy candles" generic. Add context: occasion, recipient, attribute, comparison. 3. Distribute across types per the percentages — don't put all 30 in "buying." 4. Each prompt must be 5-15 words typical. Avoid 1-word ("świeczki") or 30-word essays. 5. Use real brand names from this list when relevant: {sample_brands} Output format — JSON array of objects, each with: - `prompt`: the query text - `type`: one of `buying`, `comparison`, `specific_need`, `informational`, `brand_direct` - `persona_id`: "{persona_id}" ```json [ {{"prompt": "gdzie kupić premium świecę sojową na prezent dla mamy", "type": "buying", "persona_id": "{persona_id}"}}, ... ] ``` Only output the JSON array. No prose.""" def load_personas(category_slug: str) -> dict: data_dir = Path(__file__).parent.parent.parent / "data" / category_slug personas_file = data_dir / "personas.json" if not personas_file.exists(): raise FileNotFoundError( f"Personas not found: {personas_file}. Run 1_persona_generator.py first." ) with open(personas_file, "r", encoding="utf-8") as f: return json.load(f) def load_brand_catalog(category_slug: str) -> dict: data_dir = Path(__file__).parent.parent.parent / "data" / category_slug catalog_file = data_dir / "brand_catalog.json" with open(catalog_file, "r", encoding="utf-8") as f: return json.load(f) def brainstorm_for_persona( client: Anthropic, persona: dict, category_display_name: str, sample_brands: str, prompts_per_persona: int, ) -> list[dict]: """Generate prompts for one persona.""" distribution = CONFIG.type_distribution prompt = BRAINSTORM_PROMPT.format( persona_label=persona["label"], persona_id=persona["id"], demographics=json.dumps(persona["demographics"], ensure_ascii=False), pain_points="; ".join(persona["pain_points"]), decision_factors="; ".join(persona["decision_factors_ranked"]), vocabulary=json.dumps(persona["vocabulary"], ensure_ascii=False), category_display_name=category_display_name, prompts_per_persona=prompts_per_persona, sample_brands=sample_brands, buying_share=int(distribution["buying"] * 100), comparison_share=int(distribution["comparison"] * 100), specific_share=int(distribution["specific_need"] * 100), info_share=int(distribution["informational"] * 100), brand_share=int(distribution["brand_direct"] * 100), ) response = client.messages.create( model=CONFIG.brainstormer_model, max_tokens=8000, messages=[{"role": "user", "content": prompt}], ) text = response.content[0].text.strip() if text.startswith("```json"): text = text[7:] if text.endswith("```"): text = text[:-3] text = text.strip() return json.loads(text) def brainstorm_all(category_slug: str, category_display_name: str) -> dict: personas_data = load_personas(category_slug) catalog = load_brand_catalog(category_slug) sample_brands = ", ".join(b["name"] for b in catalog["brands"][:8]) client = Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"]) all_prompts = [] for persona in personas_data["personas"]: print(f"[Stage 2] Brainstorming for {persona['label']}...") prompts = brainstorm_for_persona( client=client, persona=persona, category_display_name=category_display_name, sample_brands=sample_brands, prompts_per_persona=CONFIG.prompts_per_persona, ) all_prompts.extend(prompts) return { "category": category_slug, "total_raw_prompts": len(all_prompts), "personas_processed": len(personas_data["personas"]), "prompts_per_persona": CONFIG.prompts_per_persona, "type_distribution_target": CONFIG.type_distribution, "raw_prompts": all_prompts, } def save_raw_prompts(category_slug: str, data: dict) -> Path: output_dir = Path(__file__).parent.parent.parent / "data" / category_slug output_dir.mkdir(parents=True, exist_ok=True) output_file = output_dir / "raw_prompts.json" with open(output_file, "w", encoding="utf-8") as f: json.dump(data, f, ensure_ascii=False, indent=2) return output_file def main(): parser = argparse.ArgumentParser(description="Brainstorm raw prompts for category.") parser.add_argument("--category", required=True) parser.add_argument("--display-name", required=True) args = parser.parse_args() print(f"[Stage 2] Brainstorming prompts for {args.category}...") data = brainstorm_all(args.category, args.display_name) output_path = save_raw_prompts(args.category, data) print(f"[Stage 2] ✅ Saved {data['total_raw_prompts']} raw prompts to {output_path}") print(f"[Stage 2] Target distribution: {get_target_counts()}") if __name__ == "__main__": main()