"""Backfill: enqueue publicly-fetchable open missing_precedents for auto-fetch. The citation graph records cited-but-absent rulings in ``missing_precedents``. The ones with a public source — Supreme serial (בג"ץ/בר"מ/עע"מ NNNN/YY) → Tier-0 supremedecisions; district/Supreme with a נט-format triple → Tier-1 נט המשפט — can be fetched + ingested automatically. ועדת-ערר (needs Nevo) and serial cases with no public record are left for the chair. This stamps a ``court_fetch_jobs`` row for each fetchable gap; the court-fetch drainer (``drain_court_fetch.py`` / pm2 cron) then fetches, ingests, and closes the gap. Idempotent (upsert on the canonical case number). scripts/backfill_missing_precedents.py # dry-run (report only) scripts/backfill_missing_precedents.py --apply # enqueue """ import asyncio import os import sys sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "mcp-server", "src")) from legal_mcp.services import court_citation, db async def main() -> int: apply = "--apply" in sys.argv gaps = await db.list_missing_precedents(status="open", limit=2000) enq = skipped = 0 by_tier: dict[str, int] = {} for g in gaps: cit = court_citation.classify(g.get("citation", "")) net = bool(cit.file_number and cit.month and cit.year) # Fetchable: Supreme serial (Tier-0) or anything with a נט triple (Tier-1). if cit.tier == "supreme" or (cit.tier == "admin" and net): route = "Tier-0/supreme" if (cit.tier == "supreme" and not net) else "Tier-1/net" by_tier[route] = by_tier.get(route, 0) + 1 if apply: await db.court_fetch_job_upsert( case_number_norm=cit.case_number_norm, citation_raw=g.get("citation", ""), tier=cit.tier, court=cit.court_prefix, ) enq += 1 else: skipped += 1 verb = "enqueued" if apply else "would enqueue" print(f"{verb}: {enq} (routes: {by_tier})", flush=True) print(f"skipped (ועדת-ערר/serial-no-record/unrecognized): {skipped}", flush=True) if not apply: print("dry-run — re-run with --apply to enqueue.", flush=True) return 0 if __name__ == "__main__": sys.exit(asyncio.run(main()))