citee-methodology/tools/prompt_curation/6_human_review_export.py

"""Stage 6 — Human Review Export.

Export remaining candidate prompts to a CSV that the founder + category expert
can review in a spreadsheet. Each row has columns for accept/reject/edit decisions.
"""
from __future__ import annotations

import argparse
import csv
import json
from pathlib import Path

from config import CONFIG, get_target_counts


def load_kept_prompts(category_slug: str) -> list[dict]:
    data_dir = Path(__file__).parent.parent.parent / "data" / category_slug

    # Take output from Stage 4 (filtered by critics) — Stage 5 was just a sample test
    critic_file = data_dir / "critic_review.json"
    if not critic_file.exists():
        raise FileNotFoundError(f"Run 4_validation_agents.py first. Missing: {critic_file}")

    with open(critic_file, "r", encoding="utf-8") as f:
        critic_data = json.load(f)

    return critic_data["kept_prompts"]


def export_to_csv(category_slug: str, prompts: list[dict]) -> Path:
    output_file = (
        Path(__file__).parent.parent.parent / "data" / category_slug / "for_human_review.csv"
    )

    target_counts = get_target_counts()

    # Group prompts by type for easier review
    by_type: dict[str, list[dict]] = {}
    for p in prompts:
        by_type.setdefault(p["type"], []).append(p)

    with open(output_file, "w", encoding="utf-8-sig", newline="") as f:
        writer = csv.writer(f, delimiter=";", quoting=csv.QUOTE_ALL)
        writer.writerow([
            "row_id",
            "type",
            "type_target_count",
            "prompt",
            "persona_id",
            "decision",  # APPROVE / REJECT / EDIT
            "edited_prompt",  # If decision == EDIT, write new version here
            "notes",
        ])

        row_id = 0
        for ptype, type_prompts in sorted(by_type.items()):
            target = target_counts.get(ptype, 0)
            for p in type_prompts:
                row_id += 1
                writer.writerow([
                    row_id,
                    ptype,
                    target,
                    p["prompt"],
                    p.get("persona_id", ""),
                    "",  # decision — fill in
                    "",  # edited prompt — fill if needed
                    "",  # notes
                ])

    return output_file


def export_summary_md(category_slug: str, prompts: list[dict]) -> Path:
    """Write human-readable summary."""
    output_file = (
        Path(__file__).parent.parent.parent / "data" / category_slug / "for_human_review_summary.md"
    )

    target_counts = get_target_counts()
    by_type: dict[str, int] = {}
    for p in prompts:
        by_type[p["type"]] = by_type.get(p["type"], 0) + 1

    with open(output_file, "w", encoding="utf-8") as f:
        f.write(f"# Human Review — {category_slug}\n\n")
        f.write(f"**Total candidates after Stages 1-5: {len(prompts)}**\n\n")
        f.write(f"**Target final pool: {CONFIG.final_pool_size}**\n\n")
        f.write("## Distribution check\n\n")
        f.write("| Type | Candidates | Target | Status |\n")
        f.write("|---|---|---|---|\n")
        for ptype in ["buying", "comparison", "specific_need", "informational", "brand_direct"]:
            count = by_type.get(ptype, 0)
            target = target_counts.get(ptype, 0)
            status = "✅" if count >= target * 1.2 else ("⚠️" if count >= target else "❌ too few")
            f.write(f"| {ptype} | {count} | {target} | {status} |\n")

        f.write("\n## Review process\n\n")
        f.write("1. Open `for_human_review.csv` in spreadsheet\n")
        f.write("2. For each row, fill `decision` column with: `APPROVE`, `REJECT`, or `EDIT`\n")
        f.write("3. If `EDIT`, write new version in `edited_prompt` column\n")
        f.write(f"4. Aim to APPROVE ~{CONFIG.final_pool_size} prompts total, balanced per target distribution\n")
        f.write("5. Save as `for_human_review_decided.csv`\n")
        f.write("6. Run `python 7_finalize.py --category {category_slug}` to produce final closed pool\n\n")

        f.write("## Tips\n\n")
        f.write("- If a type has too few candidates, you may need to edit some from over-represented types to fit\n")
        f.write("- Watch for repetitive vocabulary — if 5 prompts say 'gdzie kupić premium prezent' similar, vary or reject most\n")
        f.write("- For brand_direct prompts, ensure each major brand from `brand_catalog.json` has at least 1 prompt directed at it\n")

    return output_file


def main():
    parser = argparse.ArgumentParser(description="Export prompts for human review.")
    parser.add_argument("--category", required=True)
    args = parser.parse_args()

    print(f"[Stage 6] Exporting prompts for human review: {args.category}...")
    prompts = load_kept_prompts(args.category)
    csv_path = export_to_csv(args.category, prompts)
    summary_path = export_summary_md(args.category, prompts)

    print(f"[Stage 6] ✅ Exported {len(prompts)} prompts")
    print(f"[Stage 6] CSV: {csv_path}")
    print(f"[Stage 6] Summary: {summary_path}")
    print()
    print("Next: open the CSV, fill decision column (APPROVE/REJECT/EDIT), save as 'for_human_review_decided.csv',")
    print(f"then run: python 7_finalize.py --category {args.category}")


if __name__ == "__main__":
    main()