citee-methodology/methodology.json

{
  "version": "1.0.0",
  "released": "2026-05-03",
  "name": "Citee Index Methodology",
  "description": "Public methodology for Citee Index — the first open public ranking of brand visibility in AI search results (ChatGPT, Perplexity, Gemini, Claude).",
  "license": "MIT",
  "repository": "https://git.lmwcommerce.com/citee/citee-methodology",
  "mirror": "https://github.com/lmwcommerce/citee-methodology",
  "homepage": "https://citee.ai/methodology",

  "philosophy": {
    "approach": "algorithm-first",
    "principles": [
      "Open methodology, public versioning (every change committed publicly)",
      "Reproducibility — anyone can replicate scores from raw query log",
      "No pay-to-play — ranked brands never pay Citee directly. Hard rule in ToS.",
      "Subjective opinion disclaimer — scores are expressions of opinion based on observed AI model outputs (First Amendment shield, Gartner v. NetScout 2020)",
      "No retroactive changes — methodology updates apply to FUTURE cycles only (FIDE 2024 backlash lesson)",
      "Confidence intervals — overlapping CIs reported as 'tied', no false precision",
      "Annual transparency report — manipulation patterns detected, anti-gaming actions taken"
    ]
  },

  "scoring": {
    "formula": "CiteeScore(brand, category, country, month) = sum(mention_score_per_model * model_weight) * (1 + cross_signal_bonus)",
    "normalization": "Raw score 0-120 normalized to 0-100 per category (top brand = 100, others proportional)",
    "ranking": "Sort by CiteeScore descending. Brands with overlapping confidence intervals reported as tied."
  },

  "models": {
    "weighting_basis": "Each model weighted by its share of AI search traffic per region. Weights revised quarterly using 3 public data sources (OpenRouter rankings, Similarweb free tier, Statcounter/IAB Polska/Mobirank reports) plus first-party Free Checker telemetry.",
    "weights": {
      "PL": {
        "chatgpt": {
          "weight": 0.45,
          "model_version": "gpt-4o-search-2026-04",
          "rationale": "Largest user share PL based on OpenRouter + Similarweb data"
        },
        "perplexity": {
          "weight": 0.25,
          "model_version": "sonar-pro-2026-03",
          "rationale": "Growing power user segment, search-native architecture"
        },
        "gemini": {
          "weight": 0.20,
          "model_version": "gemini-2.0-pro",
          "rationale": "Google embed + AI Overviews coverage"
        },
        "claude": {
          "weight": 0.10,
          "model_version": "claude-sonnet-2026-q1",
          "rationale": "Niche but growing, added Q4 2026 in pilot",
          "status": "added_q4_2026"
        }
      }
    },
    "pilot_models": ["chatgpt", "perplexity", "gemini"],
    "claude_addition_planned": "2026-Q4"
  },

  "mention_score_per_model": {
    "formula": "mention_score = (position * 0.4) + (prominence * 0.3) + (sentiment * 0.15) + (citation_depth * 0.15)",
    "range": "0.0 - 1.0",
    "components": {
      "position": {
        "weight": 0.4,
        "scale": {
          "rank_1": 1.0,
          "rank_2": 0.7,
          "rank_3": 0.5,
          "rank_4_to_10": 0.3,
          "not_mentioned": 0.0
        }
      },
      "prominence": {
        "weight": 0.3,
        "scale": {
          "passing_mention": 0.3,
          "listed_with_description": 0.6,
          "actively_recommended": 1.0
        }
      },
      "sentiment": {
        "weight": 0.15,
        "scale": {
          "positive": 0.2,
          "neutral": 0.0,
          "negative_or_caveated": -0.3
        }
      },
      "citation_depth": {
        "weight": 0.15,
        "scale": {
          "direct_link_to_brand_site": 1.0,
          "mention_only_no_link": 0.5
        }
      }
    }
  },

  "prompt_types": {
    "rationale": "Different prompt types reflect different stages of buyer funnel. Buying intent prompts weighted higher because they correlate with revenue impact.",
    "weights": {
      "buying": {
        "weight": 2.0,
        "examples_pattern": "Where to buy [category] premium / Best place to buy [category]",
        "share_of_pool": "30%"
      },
      "comparison": {
        "weight": 1.5,
        "examples_pattern": "Best [category] / Top [category] handmade / [Brand A] vs [Brand B]",
        "share_of_pool": "25%"
      },
      "specific_need": {
        "weight": 1.5,
        "examples_pattern": "[Category] with [specific attribute] / [Category] for [specific use case]",
        "share_of_pool": "20%"
      },
      "informational": {
        "weight": 0.3,
        "examples_pattern": "What is [category] / How does [category] work",
        "share_of_pool": "15%"
      },
      "brand_direct": {
        "weight": 0.3,
        "examples_pattern": "[Brand X] reviews / Opinions about [Brand X]",
        "share_of_pool": "10%"
      }
    },
    "pool_size_per_category": 100,
    "pool_rotation": "20% of prompts rotate quarterly. Distribution by type published. Exact strings remain CLOSED to prevent Goodhart's Law (when a measure becomes a target, it ceases to be a measure)."
  },

  "cross_signals": {
    "rationale": "Cross-signals provide reality check — does the brand exist outside AI training data? Brand with high AI score but zero cross-signals may indicate content spam farm rather than real entity.",
    "max_total_bonus": 0.20,
    "signals": {
      "wikidata_entry": {
        "bonus": 0.05,
        "criteria": "Brand has Wikidata entry, minimum 5 triples (instance_of, country, founder OR founded_date, official_website, ISNI), entry age >= 90 days",
        "anti_gaming": "Entries < 90 days old excluded to prevent rapid-deployment manipulation"
      },
      "trustpilot_or_opineo": {
        "bonus": 0.05,
        "criteria": "Reviews count > 50, average rating > 4.0, no review bombing detected (review burst > 50 in 30 days = excluded)"
      },
      "reddit_organic_mentions": {
        "bonus": 0.05,
        "criteria": "Organic mentions in niche subreddit > 10, account_age + karma weighted, sock puppet detection applied (new accounts < 30 days excluded)"
      },
      "google_ai_overviews_presence": {
        "bonus": 0.05,
        "criteria": "Brand cited in Google AI Overviews response for at least one tracked prompt in category, verified via SerpAPI"
      }
    }
  },

  "anti_gaming": {
    "public_thresholds": {
      "rank_jump_flag": "Brand jumping > 30 ranks in single cycle triggers anomaly review and one-cycle score freeze",
      "fresh_wikidata_excluded": "< 90 days",
      "review_bombing_excluded": "> 50 reviews in 30 days from new accounts",
      "sock_puppet_excluded": "Reddit accounts < 30 days old or karma < threshold"
    },
    "private_thresholds": {
      "rationale": "Specific burst detection thresholds, sock puppet karma cutoffs, and pattern matching rules remain CLOSED to prevent gaming. Available to legal/regulatory authorities upon request.",
      "categories": [
        "burst_detection_thresholds",
        "sock_puppet_karma_cutoffs",
        "review_bombing_pattern_signatures",
        "prompt_injection_detection_signatures"
      ]
    },
    "honeypot_brand": {
      "active": true,
      "rationale": "Fictional brand inserted at predetermined ranking position to detect AI training data circular logic and unauthorized scraping. If model cites honeypot brand, evidence of training on Citee data without attribution.",
      "details": "CLOSED — disclosure would defeat purpose"
    },
    "prompt_injection_defense": {
      "scrape_filters": [
        "Strip CSS hidden text (display:none, visibility:hidden, color:white-on-white)",
        "Strip off-screen positioned content (left:-9999px, etc.)",
        "Strip font-size:0 and opacity:0 elements",
        "Detect and exclude content in noscript that contradicts visible content"
      ],
      "consequence": "Brands using prompt injection excluded from current cycle + publicly named in annual transparency report"
    }
  },

  "statistical_methodology": {
    "queries_per_cycle": {
      "prompts_per_category": 100,
      "models": "3 in pilot (ChatGPT, Perplexity, Gemini), 4 from Q4 2026 (+ Claude)",
      "repetitions_per_prompt": 2,
      "total_per_category_per_cycle": "100 * 3 * 2 = 600 (pilot), 100 * 4 * 2 = 800 (post Q4 2026)"
    },
    "confidence_intervals": "95% CI computed via bootstrap resampling. Brands with overlapping CIs reported as tied — no false precision.",
    "minimum_brands_per_category": 20,
    "tied_score_handling": "If CI(A) overlaps CI(B), both reported at same rank with '=' indicator"
  },

  "scan_cadence": {
    "tier_1_large_markets": {
      "frequency": "monthly",
      "criteria": ">1000 brands visible, >100M PLN GMV"
    },
    "tier_2_medium_markets": {
      "frequency": "quarterly",
      "criteria": "100-1000 brands, 10-100M PLN GMV"
    },
    "tier_3_niche_markets": {
      "frequency": "semi-annually",
      "criteria": "<100 brands, <10M PLN GMV"
    },
    "current_pilot_tier": "all categories in pilot are Tier 2 (quarterly)"
  },

  "publication_policy": {
    "validation_period_before_first_publication": "3 months / 3 cycles minimum",
    "first_public_ranking": "August 2026 (target)",
    "format": "Hybrid — Top 10 public HTML (SEO indexed), full ranking 100 brands as PDF behind email gate",
    "ai_crawler_policy": {
      "robots_txt_disallow": ["GPTBot", "ClaudeBot", "PerplexityBot", "CCBot", "Google-Extended"],
      "endpoints_protected": ["/api/ranking-full", "/index/*/full.pdf"],
      "rationale": "Prevents AI training data circular logic. Hybrid approach (top 10 public, ogon protected) balances SEO with measurement integrity."
    },
    "right_to_reply": "Each brand profile page includes 'Brand response' section. Brands can submit response (moderated for factual accuracy) within 30 days of cycle publication."
  },

  "monetization_policy": {
    "ranked_brands_pay_zero": true,
    "rationale": "Issuer-pays model fundamentally compromises ranking credibility (Moody's $864M settlement, Forbes 30 Under 30 fraud roundup). Citee Index revenue comes from indirect channels only.",
    "approved_revenue_sources": [
      "Citee Pro SaaS (199-449 PLN/mo) — paid by shops optimizing their visibility, NOT by ranked brands",
      "Industry Reports (999-2999 PLN/quarter) — paid by agencies, media, market research firms",
      "Sponsored Custom Research (9990-29990 PLN) — commissioned by media/agency for category research, NOT brand-specific"
    ],
    "prohibited": [
      "Brand profile upgrades (paid premium listing)",
      "Verified badges (annual fee for ranking participation)",
      "Awards sponsored by ranked brands",
      "Any direct payment from ranked entity to Citee"
    ]
  },

  "categories_pilot_2026": {
    "country": "PL",
    "tier": "Tier 2 (quarterly scan)",
    "list": [
      "kosmetyki-naturalne",
      "suplementy-nutricosmetyki",
      "diety-pudelkowe",
      "premium-pet-food",
      "kawa-specialty",
      "czekolada-rzemieslnicza",
      "kursy-programowania-bootcampy",
      "kliniki-estetyczne-dermo",
      "fitness-studios-premium",
      "kosmetyki-meskie",
      "swiece-sojowe"
    ],
    "expansion_plan": {
      "Q3_2026": "Add Tier 1 PL categories (kosmetyki ogólne, odzież dziecięca, dom & ogród, elektronika audio, biuro)",
      "Q4_2026": "DACH expansion — pilot 5 categories DE",
      "2027_Q1": "CEE expansion (CZ, SK, HU, RO)"
    }
  },

  "changelog_reference": "See CHANGELOG.md for version history. Methodology evolves through public commits with rationale. NO retroactive changes — modifications apply to FUTURE cycles only."
}