"""Stage 7 — Finalize. Read the human-decided CSV (`for_human_review_decided.csv`) and produce the final closed prompt pool: `prompts/{category}/v{N}.json` (gitignored). """ from __future__ import annotations import argparse import csv import json from datetime import datetime, timezone from pathlib import Path from config import CONFIG, get_target_counts def load_decided_csv(category_slug: str) -> list[dict]: decided_file = ( Path(__file__).parent.parent.parent / "data" / category_slug / "for_human_review_decided.csv" ) if not decided_file.exists(): raise FileNotFoundError( f"Decided CSV not found: {decided_file}. " f"Did you fill out 'for_human_review.csv' and rename to 'for_human_review_decided.csv'?" ) rows = [] with open(decided_file, "r", encoding="utf-8-sig", newline="") as f: reader = csv.DictReader(f, delimiter=";") for row in reader: rows.append(row) return rows def determine_next_version(category_slug: str) -> str: """Find next version number (v1, v2, ...) for closed prompts.""" prompts_dir = Path(__file__).parent.parent.parent / "prompts" / category_slug if not prompts_dir.exists(): prompts_dir.mkdir(parents=True, exist_ok=True) return "v1" existing_versions = [] for f in prompts_dir.glob("v*.json"): try: num = int(f.stem.replace("v", "")) existing_versions.append(num) except ValueError: continue next_num = (max(existing_versions) + 1) if existing_versions else 1 return f"v{next_num}" def finalize(category_slug: str) -> Path: rows = load_decided_csv(category_slug) approved_prompts = [] rejected_count = 0 edited_count = 0 for row in rows: decision = (row.get("decision") or "").strip().upper() if decision == "REJECT": rejected_count += 1 continue if decision == "EDIT": edited_text = (row.get("edited_prompt") or "").strip() if not edited_text: print(f"[Stage 7] ⚠ Row {row.get('row_id')} marked EDIT but no edited_prompt — skipping") continue approved_prompts.append({ "prompt": edited_text, "type": row["type"], "persona_id": row.get("persona_id", ""), "edited_from": row.get("prompt", ""), }) edited_count += 1 elif decision == "APPROVE": approved_prompts.append({ "prompt": row["prompt"], "type": row["type"], "persona_id": row.get("persona_id", ""), }) # Empty decision = treat as not yet decided, skip # Distribution check by_type: dict[str, int] = {} for p in approved_prompts: by_type[p["type"]] = by_type.get(p["type"], 0) + 1 target_counts = get_target_counts() distribution_warnings = [] for ptype, target in target_counts.items(): actual = by_type.get(ptype, 0) if actual < target * 0.7: distribution_warnings.append(f"{ptype}: only {actual} approved (target {target})") elif actual > target * 1.3: distribution_warnings.append(f"{ptype}: too many — {actual} approved (target {target})") version = determine_next_version(category_slug) output_dir = Path(__file__).parent.parent.parent / "prompts" / category_slug output_dir.mkdir(parents=True, exist_ok=True) output_file = output_dir / f"{version}.json" final_pool = { "category": category_slug, "version": version, "finalized_at": datetime.now(timezone.utc).isoformat(), "total_prompts": len(approved_prompts), "distribution": by_type, "target_distribution": target_counts, "rejected_count": rejected_count, "edited_count": edited_count, "distribution_warnings": distribution_warnings, "prompts": approved_prompts, } with open(output_file, "w", encoding="utf-8") as f: json.dump(final_pool, f, ensure_ascii=False, indent=2) return output_file, final_pool def main(): parser = argparse.ArgumentParser(description="Finalize prompt pool from human-decided CSV.") parser.add_argument("--category", required=True) args = parser.parse_args() print(f"[Stage 7] Finalizing prompt pool for {args.category}...") output_file, summary = finalize(args.category) print(f"[Stage 7] ✅ Saved {output_file}") print(f"[Stage 7] Total approved: {summary['total_prompts']}") print(f"[Stage 7] Rejected: {summary['rejected_count']}, Edited: {summary['edited_count']}") print(f"[Stage 7] Distribution: {summary['distribution']}") if summary["distribution_warnings"]: print() print("⚠ Distribution warnings:") for w in summary["distribution_warnings"]: print(f" - {w}") print() print("Consider re-running Stage 6 to add/remove prompts to fix distribution.") else: print() print(f"✅ Distribution looks good! Final pool ready at:") print(f" {output_file}") print() print("This file is in `prompts/` directory which is gitignored — exact strings remain CLOSED.") print("Public reference (illustrative examples) lives in `prompts/example-{category}.md`.") if __name__ == "__main__": main()