"""Stage 6 — Human Review Export. Export remaining candidate prompts to a CSV that the founder + category expert can review in a spreadsheet. Each row has columns for accept/reject/edit decisions. """ from __future__ import annotations import argparse import csv import json from pathlib import Path from config import CONFIG, get_target_counts def load_kept_prompts(category_slug: str) -> list[dict]: data_dir = Path(__file__).parent.parent.parent / "data" / category_slug # Take output from Stage 4 (filtered by critics) — Stage 5 was just a sample test critic_file = data_dir / "critic_review.json" if not critic_file.exists(): raise FileNotFoundError(f"Run 4_validation_agents.py first. Missing: {critic_file}") with open(critic_file, "r", encoding="utf-8") as f: critic_data = json.load(f) return critic_data["kept_prompts"] def export_to_csv(category_slug: str, prompts: list[dict]) -> Path: output_file = ( Path(__file__).parent.parent.parent / "data" / category_slug / "for_human_review.csv" ) target_counts = get_target_counts() # Group prompts by type for easier review by_type: dict[str, list[dict]] = {} for p in prompts: by_type.setdefault(p["type"], []).append(p) with open(output_file, "w", encoding="utf-8-sig", newline="") as f: writer = csv.writer(f, delimiter=";", quoting=csv.QUOTE_ALL) writer.writerow([ "row_id", "type", "type_target_count", "prompt", "persona_id", "decision", # APPROVE / REJECT / EDIT "edited_prompt", # If decision == EDIT, write new version here "notes", ]) row_id = 0 for ptype, type_prompts in sorted(by_type.items()): target = target_counts.get(ptype, 0) for p in type_prompts: row_id += 1 writer.writerow([ row_id, ptype, target, p["prompt"], p.get("persona_id", ""), "", # decision — fill in "", # edited prompt — fill if needed "", # notes ]) return output_file def export_summary_md(category_slug: str, prompts: list[dict]) -> Path: """Write human-readable summary.""" output_file = ( Path(__file__).parent.parent.parent / "data" / category_slug / "for_human_review_summary.md" ) target_counts = get_target_counts() by_type: dict[str, int] = {} for p in prompts: by_type[p["type"]] = by_type.get(p["type"], 0) + 1 with open(output_file, "w", encoding="utf-8") as f: f.write(f"# Human Review — {category_slug}\n\n") f.write(f"**Total candidates after Stages 1-5: {len(prompts)}**\n\n") f.write(f"**Target final pool: {CONFIG.final_pool_size}**\n\n") f.write("## Distribution check\n\n") f.write("| Type | Candidates | Target | Status |\n") f.write("|---|---|---|---|\n") for ptype in ["buying", "comparison", "specific_need", "informational", "brand_direct"]: count = by_type.get(ptype, 0) target = target_counts.get(ptype, 0) status = "✅" if count >= target * 1.2 else ("⚠️" if count >= target else "❌ too few") f.write(f"| {ptype} | {count} | {target} | {status} |\n") f.write("\n## Review process\n\n") f.write("1. Open `for_human_review.csv` in spreadsheet\n") f.write("2. For each row, fill `decision` column with: `APPROVE`, `REJECT`, or `EDIT`\n") f.write("3. If `EDIT`, write new version in `edited_prompt` column\n") f.write(f"4. Aim to APPROVE ~{CONFIG.final_pool_size} prompts total, balanced per target distribution\n") f.write("5. Save as `for_human_review_decided.csv`\n") f.write("6. Run `python 7_finalize.py --category {category_slug}` to produce final closed pool\n\n") f.write("## Tips\n\n") f.write("- If a type has too few candidates, you may need to edit some from over-represented types to fit\n") f.write("- Watch for repetitive vocabulary — if 5 prompts say 'gdzie kupić premium prezent' similar, vary or reject most\n") f.write("- For brand_direct prompts, ensure each major brand from `brand_catalog.json` has at least 1 prompt directed at it\n") return output_file def main(): parser = argparse.ArgumentParser(description="Export prompts for human review.") parser.add_argument("--category", required=True) args = parser.parse_args() print(f"[Stage 6] Exporting prompts for human review: {args.category}...") prompts = load_kept_prompts(args.category) csv_path = export_to_csv(args.category, prompts) summary_path = export_summary_md(args.category, prompts) print(f"[Stage 6] ✅ Exported {len(prompts)} prompts") print(f"[Stage 6] CSV: {csv_path}") print(f"[Stage 6] Summary: {summary_path}") print() print("Next: open the CSV, fill decision column (APPROVE/REJECT/EDIT), save as 'for_human_review_decided.csv',") print(f"then run: python 7_finalize.py --category {args.category}") if __name__ == "__main__": main()