{
  "task": "Within-carrier kappa across PDF and TXT sources for the same carrier \u2014 corrected (no drop rule)",
  "n_carriers_with_both_sources": 192,
  "n_carriers_analyzed": 189,
  "sector_grid": "20 NAICS-2 sectors, deterministic keyword matching (see compute_analysis_a_v2.py)",
  "category_set": [
    "in",
    "out"
  ],
  "drop_rule": "NONE \u2014 all 20 sectors per carrier are kept (standard 2x2 kappa)",
  "bootstrap_iters": 1000,
  "random_seed": 20260518,
  "confusion_matrix": {
    "both_in": 384,
    "pdf_in_txt_out": 888,
    "pdf_out_txt_in": 210,
    "both_out": 2298
  },
  "pooled": {
    "n_cells": 3780,
    "raw_observed_agreement": 0.7095,
    "raw_disagreement_rate": 0.2905,
    "pdf_marginal_in_rate": 0.3365,
    "txt_marginal_in_rate": 0.1571,
    "pdf_to_txt_coverage_ratio": 2.1414,
    "expected_agreement_by_chance": 0.6121,
    "cohens_kappa": 0.2511,
    "kappa_ci_lo_95": 0.2216,
    "kappa_ci_hi_95": 0.2821,
    "gwets_ac1": 0.5376,
    "ac1_ci_lo_95": 0.5101,
    "ac1_ci_hi_95": 0.5652,
    "landis_koch": "fair (0.21-0.40)"
  },
  "methodology_note": "An earlier internal computation (analysis-a-study1.json) applied a drop rule excluding both-out cells. That rule biased Cohen's kappa downward by removing 60.8% of cells (the chance-correction baseline) and produced kappa=-0.30. The corrected analysis here retains all cells and reports kappa=+0.25 (fair agreement)."
}