#!/usr/bin/env python3 """Retroactive cull of the legal-principles corpus via the 3-model panel (#152, Phase C). The corpus grew to ~5,243 principles (18.8/decision) under the old single-model auto-approve. This re-adjudicates EVERY existing 'original' principle with the SAME regime the extractor now uses going forward (chaim 2026-06-19): • 3 judges (Claude local + DeepSeek + Gemini) vote keep + score on each principle. • Approval rule: 3 votes→survive · 2 & score≥0.85→survive · 2 & <0.85→chair (pending_review) · ≤1→reject. • Per DECISION, survivors are capped to HALACHA_PANEL_MAX_NEW (=5) by score; the rest are rejected (over-cap). All logic is shared with the extractor via services/panel_extraction (G2). The cull is REVERSIBLE: a CSV backup of every (id, old_status) is written before any write, and a rejected principle's canonical is also set 'rejected' (recoverable). Throttled by usage_limits (stops gracefully at the soft ceiling, resumable). cd ~/legal-ai/mcp-server HOME=/home/chaim .venv/bin/python ../scripts/cull_principles.py --sample 5 # dry-run, 5 decisions HOME=/home/chaim .venv/bin/python ../scripts/cull_principles.py --dry-run # all, dry-run HOME=/home/chaim .venv/bin/python ../scripts/cull_principles.py --apply # full, throttled """ from __future__ import annotations import argparse import asyncio import csv import os import random import sys from collections import Counter from datetime import datetime, timezone from uuid import UUID sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "mcp-server", "src")) from legal_mcp import config # noqa: E402 from legal_mcp.services import db, panel_extraction as pe # noqa: E402 try: from legal_mcp.services import usage_limits except Exception: # pragma: no cover usage_limits = None AUDIT_DIR = os.path.join(os.path.dirname(__file__), "..", "data", "audit") _JUDGE_CONCURRENCY = 4 async def _decisions(limit, sample): """case_law ids that have 'original' principles, with source metadata.""" pool = await db.get_pool() rows = await pool.fetch( "SELECT cl.id, cl.case_number, cl.source_kind, cl.is_binding, " " count(*) AS n " "FROM halachot h JOIN case_law cl ON cl.id = h.case_law_id " "WHERE h.instance_type = 'original' AND h.review_status <> 'rejected' " "GROUP BY cl.id, cl.case_number, cl.source_kind, cl.is_binding " "ORDER BY n DESC", ) items = [dict(r) for r in rows] if sample and sample < len(items): items = random.sample(items, sample) if limit: items = items[:limit] return items async def _principles(case_law_id): pool = await db.get_pool() rows = await pool.fetch( "SELECT id, rule_statement, supporting_quote, reasoning_summary, " " canonical_id, review_status " "FROM halachot WHERE case_law_id = $1 AND instance_type = 'original' " "AND review_status <> 'rejected' ORDER BY halacha_index", case_law_id, ) return [dict(r) for r in rows] def _throttled(): if usage_limits is None: return False, "no usage_limits" u = usage_limits.subscription_usage() if u is None: return False, "usage read failed" over, _r, detail = usage_limits.ceiling_status(u) return over, detail async def _judge_decision(dec, sem): principles = await _principles(dec["id"]) if not principles: return [] async def one(p): async with sem: v = await pe.panel_keep_score( p["rule_statement"], p["supporting_quote"], p.get("reasoning_summary") or "", source_kind=dec["source_kind"] or "external_upload", is_binding=bool(dec["is_binding"]), ) return {**p, **v} judged = await asyncio.gather(*[one(p) for p in principles]) return pe.apply_cap(list(judged)) async def _apply_decision(judged, reviewer): pool = await db.get_pool() async with pool.acquire() as conn: async with conn.transaction(): for j in judged: fv = j["final_verdict"] if fv == "approved": await conn.execute( "UPDATE halachot SET review_status='approved', reviewed_at=now(), " "reviewer=$2, updated_at=now() WHERE id=$1", j["id"], reviewer) elif fv == "pending_review": await conn.execute( "UPDATE halachot SET review_status='pending_review', reviewer=$2, " "updated_at=now() WHERE id=$1", j["id"], reviewer) else: # rejected — also reject its canonical (reversible) await conn.execute( "UPDATE halachot SET review_status='rejected', reviewed_at=now(), " "reviewer=$2, updated_at=now() WHERE id=$1", j["id"], reviewer) if j.get("canonical_id"): await conn.execute( "UPDATE canonical_halachot SET review_status='rejected', " "updated_at=now() WHERE id=$1", j["canonical_id"]) async def _run(apply, limit, sample, throttle, verbose): decisions = await _decisions(limit, sample) mode = "APPLY" if apply else "DRY-RUN" print(f"[{mode}] {len(decisions)} decisions with principles " f"(throttle={'on' if throttle else 'off'})\n", flush=True) if not decisions: print("nothing to do.") return 0 stamp = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ") os.makedirs(AUDIT_DIR, exist_ok=True) audit = os.path.join(AUDIT_DIR, f"principle-cull-{'apply' if apply else 'dryrun'}-{stamp}.csv") reviewer = f"cull:panel v{config.HALACHA_PANEL_SCORE_FLOOR} cap{config.HALACHA_PANEL_MAX_NEW}" sem = asyncio.Semaphore(_JUDGE_CONCURRENCY) tally = Counter() n_in = n_out = 0 stopped = False with open(audit, "w", newline="", encoding="utf-8") as fh: w = csv.writer(fh) w.writerow(["case_number", "halacha_id", "old_status", "final_verdict", "votes", "score", "canonical_id", "rule"]) for k, dec in enumerate(decisions, 1): if throttle: over, detail = _throttled() if over: print(f"\n⏸ usage ceiling ({detail}) — stopping at {k-1}/{len(decisions)}. " f"Re-run to resume.", flush=True) stopped = True break judged = await _judge_decision(dec, sem) survivors = sum(1 for j in judged if j["final_verdict"] in ("approved", "pending_review")) n_in += len(judged) n_out += survivors for j in judged: tally[j["final_verdict"]] += 1 w.writerow([dec["case_number"], str(j["id"]), j["review_status"], j["final_verdict"], j["votes"], j["score"], str(j.get("canonical_id") or ""), (j["rule_statement"] or "")[:160]]) if apply and judged: await _apply_decision(judged, reviewer) print(f"[{k}/{len(decisions)}] {dec['case_number']:<16} " f"{len(judged)}→{survivors} survive", flush=True) if verbose: for j in judged: mark = {"approved": "✓", "pending_review": "→chair", "rejected": "✗"}[j["final_verdict"]] print(f" {mark} v={j['votes']} s={j['score']} {(j['rule_statement'] or '')[:80]}") print(f"\n── {mode} summary{' (stopped early)' if stopped else ''} ──") print(f" principles judged: {n_in} → survive: {n_out} ({n_in - n_out} rejected)") for v, c in tally.most_common(): print(f" {v:<16} {c}") print(f"\naudit CSV: {audit}") if not apply: print("dry-run — no DB writes. Re-run with --apply to commit (reversible).") return 0 def main(): p = argparse.ArgumentParser(description="Retroactive principle cull via 3-model panel (#152)") p.add_argument("--apply", action="store_true", help="write verdicts (reversible, CSV-backed)") p.add_argument("--dry-run", action="store_true", help="explicit dry-run (default)") p.add_argument("--limit", type=int, default=None) p.add_argument("--sample", type=int, default=None, help="random sample of N decisions") p.add_argument("--no-throttle", action="store_true") p.add_argument("--verbose", action="store_true") a = p.parse_args() return asyncio.run(_run(a.apply, a.limit, a.sample, not a.no_throttle, a.verbose)) if __name__ == "__main__": raise SystemExit(main())