DATA — Public reference datasets for methodology:
- data/README.md: schema + format definitions for brand catalogs
- data/swiece-sojowe-pl/brand_catalog.json: 35 tracked brands (33 manufacturers + 2 importers) + 5 excluded marketplaces/resellers
- data/swiece-sojowe-pl/brand_catalog.md: human-readable companion
- data/swiece-sojowe-pl/market_metadata.json: GMV estimate, personas, seasonality, expected dynamics
TOOLS — 6-stage prompt curation pipeline (Python 3.12+):
- tools/prompt_curation/README.md: process documentation + cost estimates
- tools/prompt_curation/config.py: tunable parameters per stage
- tools/prompt_curation/.env.example: required API keys template
- tools/prompt_curation/requirements.txt: dependencies
- tools/prompt_curation/1_persona_generator.py: Claude generates 7 buyer personas
- tools/prompt_curation/2_prompt_brainstormer.py: per persona × 30 prompts in voice
- tools/prompt_curation/3_reality_checker.py: Google Trends + Reddit cross-check
- tools/prompt_curation/4_validation_agents.py: 3 critic agents async (real_buyer/methodology/exploit_hunter)
- tools/prompt_curation/5_pilot_test_runner.py: sample × 3 LLM models pre-flight
- tools/prompt_curation/6_human_review_export.py: CSV export for founder approval
- tools/prompt_curation/7_finalize.py: post-approval → closed prompts/{cat}/v{N}.json
- tools/prompt_curation/pipeline.py: orchestrator (stages 1–6, then human review, then 7)
GITIGNORE — Fixed .env.* exclusion to allow .env.example.
This commit completes Faza 1. Stages outputs (data/{cat}/personas.json,
raw_prompts.json, validated_prompts.json, critic_review.json, pilot_test_results.json,
for_human_review.csv) are runtime artifacts — public when committed, derived from
public methodology + public brand catalog. Final approved prompt strings in
prompts/{cat}/v{N}.json remain CLOSED (gitignored, anti-Goodhart's Law).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
159 lines
5.6 KiB
Python
159 lines
5.6 KiB
Python
"""Stage 1 — Persona Generator.
|
|
|
|
Generate buyer personas for a category using Claude Sonnet.
|
|
Each persona includes demographics, pain points, decision factors, vocabulary.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
from pathlib import Path
|
|
|
|
from anthropic import Anthropic
|
|
|
|
from config import CONFIG
|
|
|
|
PERSONA_PROMPT = """You are a market researcher specializing in Polish e-commerce buyer personas.
|
|
|
|
Generate {num_personas} distinct buyer personas for the category: **{category_display_name}**.
|
|
|
|
Each persona should represent a realistic, distinct segment of buyers in this category. Avoid generic "millennial professional" — be specific.
|
|
|
|
For each persona, provide:
|
|
1. **Name/label** — short descriptor (e.g., "30+ kobieta kupująca prezent dla mamy")
|
|
2. **Demographics** — age range, gender, location type (city/suburb/rural), income bracket
|
|
3. **Pain points** — what they're trying to solve when buying in this category
|
|
4. **Decision factors** — ranked list of what matters most (price, ingredients, brand, reviews, etc.)
|
|
5. **Vocabulary** — how they actually talk: formal/colloquial, technical/lay, Polish-specific phrases they use
|
|
6. **Search behavior** — do they research deeply or buy impulsively? Which platforms do they trust?
|
|
|
|
Categories context:
|
|
- Polish e-commerce market
|
|
- Buyers using AI assistants (ChatGPT, Perplexity, Gemini, Claude) increasingly to discover and compare brands
|
|
- This is for {category_display_name} — a {market_size} market with brands like {sample_brands}
|
|
|
|
Output as valid JSON in this exact schema:
|
|
|
|
```json
|
|
{{
|
|
"category": "{category_slug}",
|
|
"personas": [
|
|
{{
|
|
"id": "persona_1",
|
|
"label": "30+ kobieta kupująca prezent dla mamy",
|
|
"demographics": {{
|
|
"age_range": "30-45",
|
|
"gender": "female",
|
|
"location_type": "city",
|
|
"income_bracket": "middle to upper-middle"
|
|
}},
|
|
"pain_points": [
|
|
"...",
|
|
"..."
|
|
],
|
|
"decision_factors_ranked": [
|
|
"...",
|
|
"..."
|
|
],
|
|
"vocabulary": {{
|
|
"register": "informal but respectful",
|
|
"polish_phrases": ["...", "..."],
|
|
"tech_savvy": "medium"
|
|
}},
|
|
"search_behavior": "researches 2-3 options before deciding, trusts Reddit and friend recommendations"
|
|
}}
|
|
]
|
|
}}
|
|
```
|
|
|
|
Only output the JSON. No prose before or after."""
|
|
|
|
|
|
def load_category_context(category_slug: str, data_dir: Path) -> dict:
|
|
"""Load market metadata + sample brands for the category."""
|
|
metadata_file = data_dir / category_slug / "market_metadata.json"
|
|
catalog_file = data_dir / category_slug / "brand_catalog.json"
|
|
|
|
if not metadata_file.exists():
|
|
raise FileNotFoundError(f"Market metadata not found: {metadata_file}")
|
|
if not catalog_file.exists():
|
|
raise FileNotFoundError(f"Brand catalog not found: {catalog_file}")
|
|
|
|
with open(metadata_file, "r", encoding="utf-8") as f:
|
|
metadata = json.load(f)
|
|
with open(catalog_file, "r", encoding="utf-8") as f:
|
|
catalog = json.load(f)
|
|
|
|
sample_brands = ", ".join(b["name"] for b in catalog["brands"][:5])
|
|
market_size = metadata["market_size"]["tier_classification"]
|
|
|
|
return {
|
|
"metadata": metadata,
|
|
"catalog": catalog,
|
|
"sample_brands": sample_brands,
|
|
"market_size": market_size,
|
|
}
|
|
|
|
|
|
def generate_personas(category_slug: str, category_display_name: str, num_personas: int = None) -> dict:
|
|
"""Generate personas via Claude API."""
|
|
if num_personas is None:
|
|
num_personas = CONFIG.num_personas
|
|
|
|
data_dir = Path(__file__).parent.parent.parent / "data"
|
|
context = load_category_context(category_slug, data_dir)
|
|
|
|
client = Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])
|
|
|
|
prompt = PERSONA_PROMPT.format(
|
|
num_personas=num_personas,
|
|
category_slug=category_slug,
|
|
category_display_name=category_display_name,
|
|
market_size=context["market_size"],
|
|
sample_brands=context["sample_brands"],
|
|
)
|
|
|
|
response = client.messages.create(
|
|
model=CONFIG.persona_model,
|
|
max_tokens=4000,
|
|
messages=[{"role": "user", "content": prompt}],
|
|
)
|
|
|
|
# Extract JSON from response
|
|
text = response.content[0].text.strip()
|
|
if text.startswith("```json"):
|
|
text = text[7:]
|
|
if text.endswith("```"):
|
|
text = text[:-3]
|
|
text = text.strip()
|
|
|
|
personas_data = json.loads(text)
|
|
return personas_data
|
|
|
|
|
|
def save_personas(category_slug: str, personas: dict) -> Path:
|
|
"""Save personas to data/{category}/personas.json."""
|
|
data_dir = Path(__file__).parent.parent.parent / "data" / category_slug
|
|
data_dir.mkdir(parents=True, exist_ok=True)
|
|
output_file = data_dir / "personas.json"
|
|
with open(output_file, "w", encoding="utf-8") as f:
|
|
json.dump(personas, f, ensure_ascii=False, indent=2)
|
|
return output_file
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Generate buyer personas for a Citee category.")
|
|
parser.add_argument("--category", required=True, help="Category slug (e.g., 'swiece-sojowe-pl')")
|
|
parser.add_argument("--display-name", required=True, help="Human-readable category name (e.g., 'Świece sojowe PL')")
|
|
parser.add_argument("--num-personas", type=int, default=None, help=f"Number of personas to generate (default: {CONFIG.num_personas})")
|
|
args = parser.parse_args()
|
|
|
|
print(f"[Stage 1] Generating {args.num_personas or CONFIG.num_personas} personas for {args.category}...")
|
|
personas = generate_personas(args.category, args.display_name, args.num_personas)
|
|
output_path = save_personas(args.category, personas)
|
|
print(f"[Stage 1] ✅ Saved {len(personas['personas'])} personas to {output_path}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|