Foundational public methodology for the first open public ranking of brand visibility in AI search results (ChatGPT, Perplexity, Gemini, Claude). This release establishes the framework — no rankings have been computed or published yet. First scan cycle: late May 2026 (private validation). First public ranking publication target: August 2026, after 3 validation cycles. Includes: - methodology.json: machine-readable formulas, weights, policies - README.md: human-readable overview + open/closed boundary - CHANGELOG.md: versioning policy + v1.0.0 release notes - taxonomy.md: tier system + 11 PL pilot categories - LICENSE: MIT - .gitignore: closed operational data (exact prompts, anti-gaming thresholds) - prompts/README.md: 6-stage prompt curation process - prompts/example-swiece-sojowe-pl.md: illustrative framework for first category Strategic principles: - Algorithm-first, no advisory board - Open methodology + closed exact prompts (Goodhart's Law defense) - No retroactive changes (FIDE 2024 lesson) - No pay-to-play, hard rule (Moody's / Forbes 30 Under 30 lessons) - Subjective opinion disclaimer (Gartner v. NetScout 2020 First Amendment shield) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
270 lines
11 KiB
JSON
270 lines
11 KiB
JSON
{
|
|
"version": "1.0.0",
|
|
"released": "2026-05-03",
|
|
"name": "Citee Index Methodology",
|
|
"description": "Public methodology for Citee Index — the first open public ranking of brand visibility in AI search results (ChatGPT, Perplexity, Gemini, Claude).",
|
|
"license": "MIT",
|
|
"repository": "https://git.lmwcommerce.com/citee/citee-methodology",
|
|
"mirror": "https://github.com/lmwcommerce/citee-methodology",
|
|
"homepage": "https://citee.ai/methodology",
|
|
|
|
"philosophy": {
|
|
"approach": "algorithm-first",
|
|
"principles": [
|
|
"Open methodology, public versioning (every change committed publicly)",
|
|
"Reproducibility — anyone can replicate scores from raw query log",
|
|
"No pay-to-play — ranked brands never pay Citee directly. Hard rule in ToS.",
|
|
"Subjective opinion disclaimer — scores are expressions of opinion based on observed AI model outputs (First Amendment shield, Gartner v. NetScout 2020)",
|
|
"No retroactive changes — methodology updates apply to FUTURE cycles only (FIDE 2024 backlash lesson)",
|
|
"Confidence intervals — overlapping CIs reported as 'tied', no false precision",
|
|
"Annual transparency report — manipulation patterns detected, anti-gaming actions taken"
|
|
]
|
|
},
|
|
|
|
"scoring": {
|
|
"formula": "CiteeScore(brand, category, country, month) = sum(mention_score_per_model * model_weight) * (1 + cross_signal_bonus)",
|
|
"normalization": "Raw score 0-120 normalized to 0-100 per category (top brand = 100, others proportional)",
|
|
"ranking": "Sort by CiteeScore descending. Brands with overlapping confidence intervals reported as tied."
|
|
},
|
|
|
|
"models": {
|
|
"weighting_basis": "Each model weighted by its share of AI search traffic per region. Weights revised quarterly using 3 public data sources (OpenRouter rankings, Similarweb free tier, Statcounter/IAB Polska/Mobirank reports) plus first-party Free Checker telemetry.",
|
|
"weights": {
|
|
"PL": {
|
|
"chatgpt": {
|
|
"weight": 0.45,
|
|
"model_version": "gpt-4o-search-2026-04",
|
|
"rationale": "Largest user share PL based on OpenRouter + Similarweb data"
|
|
},
|
|
"perplexity": {
|
|
"weight": 0.25,
|
|
"model_version": "sonar-pro-2026-03",
|
|
"rationale": "Growing power user segment, search-native architecture"
|
|
},
|
|
"gemini": {
|
|
"weight": 0.20,
|
|
"model_version": "gemini-2.0-pro",
|
|
"rationale": "Google embed + AI Overviews coverage"
|
|
},
|
|
"claude": {
|
|
"weight": 0.10,
|
|
"model_version": "claude-sonnet-2026-q1",
|
|
"rationale": "Niche but growing, added Q4 2026 in pilot",
|
|
"status": "added_q4_2026"
|
|
}
|
|
}
|
|
},
|
|
"pilot_models": ["chatgpt", "perplexity", "gemini"],
|
|
"claude_addition_planned": "2026-Q4"
|
|
},
|
|
|
|
"mention_score_per_model": {
|
|
"formula": "mention_score = (position * 0.4) + (prominence * 0.3) + (sentiment * 0.15) + (citation_depth * 0.15)",
|
|
"range": "0.0 - 1.0",
|
|
"components": {
|
|
"position": {
|
|
"weight": 0.4,
|
|
"scale": {
|
|
"rank_1": 1.0,
|
|
"rank_2": 0.7,
|
|
"rank_3": 0.5,
|
|
"rank_4_to_10": 0.3,
|
|
"not_mentioned": 0.0
|
|
}
|
|
},
|
|
"prominence": {
|
|
"weight": 0.3,
|
|
"scale": {
|
|
"passing_mention": 0.3,
|
|
"listed_with_description": 0.6,
|
|
"actively_recommended": 1.0
|
|
}
|
|
},
|
|
"sentiment": {
|
|
"weight": 0.15,
|
|
"scale": {
|
|
"positive": 0.2,
|
|
"neutral": 0.0,
|
|
"negative_or_caveated": -0.3
|
|
}
|
|
},
|
|
"citation_depth": {
|
|
"weight": 0.15,
|
|
"scale": {
|
|
"direct_link_to_brand_site": 1.0,
|
|
"mention_only_no_link": 0.5
|
|
}
|
|
}
|
|
}
|
|
},
|
|
|
|
"prompt_types": {
|
|
"rationale": "Different prompt types reflect different stages of buyer funnel. Buying intent prompts weighted higher because they correlate with revenue impact.",
|
|
"weights": {
|
|
"buying": {
|
|
"weight": 2.0,
|
|
"examples_pattern": "Where to buy [category] premium / Best place to buy [category]",
|
|
"share_of_pool": "30%"
|
|
},
|
|
"comparison": {
|
|
"weight": 1.5,
|
|
"examples_pattern": "Best [category] / Top [category] handmade / [Brand A] vs [Brand B]",
|
|
"share_of_pool": "25%"
|
|
},
|
|
"specific_need": {
|
|
"weight": 1.5,
|
|
"examples_pattern": "[Category] with [specific attribute] / [Category] for [specific use case]",
|
|
"share_of_pool": "20%"
|
|
},
|
|
"informational": {
|
|
"weight": 0.3,
|
|
"examples_pattern": "What is [category] / How does [category] work",
|
|
"share_of_pool": "15%"
|
|
},
|
|
"brand_direct": {
|
|
"weight": 0.3,
|
|
"examples_pattern": "[Brand X] reviews / Opinions about [Brand X]",
|
|
"share_of_pool": "10%"
|
|
}
|
|
},
|
|
"pool_size_per_category": 100,
|
|
"pool_rotation": "20% of prompts rotate quarterly. Distribution by type published. Exact strings remain CLOSED to prevent Goodhart's Law (when a measure becomes a target, it ceases to be a measure)."
|
|
},
|
|
|
|
"cross_signals": {
|
|
"rationale": "Cross-signals provide reality check — does the brand exist outside AI training data? Brand with high AI score but zero cross-signals may indicate content spam farm rather than real entity.",
|
|
"max_total_bonus": 0.20,
|
|
"signals": {
|
|
"wikidata_entry": {
|
|
"bonus": 0.05,
|
|
"criteria": "Brand has Wikidata entry, minimum 5 triples (instance_of, country, founder OR founded_date, official_website, ISNI), entry age >= 90 days",
|
|
"anti_gaming": "Entries < 90 days old excluded to prevent rapid-deployment manipulation"
|
|
},
|
|
"trustpilot_or_opineo": {
|
|
"bonus": 0.05,
|
|
"criteria": "Reviews count > 50, average rating > 4.0, no review bombing detected (review burst > 50 in 30 days = excluded)"
|
|
},
|
|
"reddit_organic_mentions": {
|
|
"bonus": 0.05,
|
|
"criteria": "Organic mentions in niche subreddit > 10, account_age + karma weighted, sock puppet detection applied (new accounts < 30 days excluded)"
|
|
},
|
|
"google_ai_overviews_presence": {
|
|
"bonus": 0.05,
|
|
"criteria": "Brand cited in Google AI Overviews response for at least one tracked prompt in category, verified via SerpAPI"
|
|
}
|
|
}
|
|
},
|
|
|
|
"anti_gaming": {
|
|
"public_thresholds": {
|
|
"rank_jump_flag": "Brand jumping > 30 ranks in single cycle triggers anomaly review and one-cycle score freeze",
|
|
"fresh_wikidata_excluded": "< 90 days",
|
|
"review_bombing_excluded": "> 50 reviews in 30 days from new accounts",
|
|
"sock_puppet_excluded": "Reddit accounts < 30 days old or karma < threshold"
|
|
},
|
|
"private_thresholds": {
|
|
"rationale": "Specific burst detection thresholds, sock puppet karma cutoffs, and pattern matching rules remain CLOSED to prevent gaming. Available to legal/regulatory authorities upon request.",
|
|
"categories": [
|
|
"burst_detection_thresholds",
|
|
"sock_puppet_karma_cutoffs",
|
|
"review_bombing_pattern_signatures",
|
|
"prompt_injection_detection_signatures"
|
|
]
|
|
},
|
|
"honeypot_brand": {
|
|
"active": true,
|
|
"rationale": "Fictional brand inserted at predetermined ranking position to detect AI training data circular logic and unauthorized scraping. If model cites honeypot brand, evidence of training on Citee data without attribution.",
|
|
"details": "CLOSED — disclosure would defeat purpose"
|
|
},
|
|
"prompt_injection_defense": {
|
|
"scrape_filters": [
|
|
"Strip CSS hidden text (display:none, visibility:hidden, color:white-on-white)",
|
|
"Strip off-screen positioned content (left:-9999px, etc.)",
|
|
"Strip font-size:0 and opacity:0 elements",
|
|
"Detect and exclude content in noscript that contradicts visible content"
|
|
],
|
|
"consequence": "Brands using prompt injection excluded from current cycle + publicly named in annual transparency report"
|
|
}
|
|
},
|
|
|
|
"statistical_methodology": {
|
|
"queries_per_cycle": {
|
|
"prompts_per_category": 100,
|
|
"models": "3 in pilot (ChatGPT, Perplexity, Gemini), 4 from Q4 2026 (+ Claude)",
|
|
"repetitions_per_prompt": 2,
|
|
"total_per_category_per_cycle": "100 * 3 * 2 = 600 (pilot), 100 * 4 * 2 = 800 (post Q4 2026)"
|
|
},
|
|
"confidence_intervals": "95% CI computed via bootstrap resampling. Brands with overlapping CIs reported as tied — no false precision.",
|
|
"minimum_brands_per_category": 20,
|
|
"tied_score_handling": "If CI(A) overlaps CI(B), both reported at same rank with '=' indicator"
|
|
},
|
|
|
|
"scan_cadence": {
|
|
"tier_1_large_markets": {
|
|
"frequency": "monthly",
|
|
"criteria": ">1000 brands visible, >100M PLN GMV"
|
|
},
|
|
"tier_2_medium_markets": {
|
|
"frequency": "quarterly",
|
|
"criteria": "100-1000 brands, 10-100M PLN GMV"
|
|
},
|
|
"tier_3_niche_markets": {
|
|
"frequency": "semi-annually",
|
|
"criteria": "<100 brands, <10M PLN GMV"
|
|
},
|
|
"current_pilot_tier": "all categories in pilot are Tier 2 (quarterly)"
|
|
},
|
|
|
|
"publication_policy": {
|
|
"validation_period_before_first_publication": "3 months / 3 cycles minimum",
|
|
"first_public_ranking": "August 2026 (target)",
|
|
"format": "Hybrid — Top 10 public HTML (SEO indexed), full ranking 100 brands as PDF behind email gate",
|
|
"ai_crawler_policy": {
|
|
"robots_txt_disallow": ["GPTBot", "ClaudeBot", "PerplexityBot", "CCBot", "Google-Extended"],
|
|
"endpoints_protected": ["/api/ranking-full", "/index/*/full.pdf"],
|
|
"rationale": "Prevents AI training data circular logic. Hybrid approach (top 10 public, ogon protected) balances SEO with measurement integrity."
|
|
},
|
|
"right_to_reply": "Each brand profile page includes 'Brand response' section. Brands can submit response (moderated for factual accuracy) within 30 days of cycle publication."
|
|
},
|
|
|
|
"monetization_policy": {
|
|
"ranked_brands_pay_zero": true,
|
|
"rationale": "Issuer-pays model fundamentally compromises ranking credibility (Moody's $864M settlement, Forbes 30 Under 30 fraud roundup). Citee Index revenue comes from indirect channels only.",
|
|
"approved_revenue_sources": [
|
|
"Citee Pro SaaS (199-449 PLN/mo) — paid by shops optimizing their visibility, NOT by ranked brands",
|
|
"Industry Reports (999-2999 PLN/quarter) — paid by agencies, media, market research firms",
|
|
"Sponsored Custom Research (9990-29990 PLN) — commissioned by media/agency for category research, NOT brand-specific"
|
|
],
|
|
"prohibited": [
|
|
"Brand profile upgrades (paid premium listing)",
|
|
"Verified badges (annual fee for ranking participation)",
|
|
"Awards sponsored by ranked brands",
|
|
"Any direct payment from ranked entity to Citee"
|
|
]
|
|
},
|
|
|
|
"categories_pilot_2026": {
|
|
"country": "PL",
|
|
"tier": "Tier 2 (quarterly scan)",
|
|
"list": [
|
|
"kosmetyki-naturalne",
|
|
"suplementy-nutricosmetyki",
|
|
"diety-pudelkowe",
|
|
"premium-pet-food",
|
|
"kawa-specialty",
|
|
"czekolada-rzemieslnicza",
|
|
"kursy-programowania-bootcampy",
|
|
"kliniki-estetyczne-dermo",
|
|
"fitness-studios-premium",
|
|
"kosmetyki-meskie",
|
|
"swiece-sojowe"
|
|
],
|
|
"expansion_plan": {
|
|
"Q3_2026": "Add Tier 1 PL categories (kosmetyki ogólne, odzież dziecięca, dom & ogród, elektronika audio, biuro)",
|
|
"Q4_2026": "DACH expansion — pilot 5 categories DE",
|
|
"2027_Q1": "CEE expansion (CZ, SK, HU, RO)"
|
|
}
|
|
},
|
|
|
|
"changelog_reference": "See CHANGELOG.md for version history. Methodology evolves through public commits with rationale. NO retroactive changes — modifications apply to FUTURE cycles only."
|
|
}
|