#!/usr/bin/env python3 """Safety-net audit for panel-approved halachot (selective-prediction monitoring). A panel auto-approval is reversible and low-harm, but not infallible. The literature (Trust-or-Escalate; selective prediction) prescribes MONITORING the auto-decision error rate over time rather than trusting it blindly. This samples panel-approved halachot, RE-RUNS the same 3-judge KEEP vote, and surfaces any where the panel now leans DROP — the candidate false-keeps a human should glance at. Zero standing load on the chair: it just produces a short weekly list. Report-only by default. ``--flag`` sends the flips back to ``pending_review`` (with an audit reviewer note) so they re-enter the chair queue. cd ~/legal-ai/mcp-server .venv/bin/python ../scripts/halacha_panel_audit.py --sample 15 .venv/bin/python ../scripts/halacha_panel_audit.py --sample 15 --flag """ from __future__ import annotations import argparse import asyncio import httpx from legal_mcp.services import db from halacha_panel_approve import ( # noqa: E402 — single source of truth for judges KEEP_SYSTEM, _bool, _keep_user, judge_claude, judge_deepseek, judge_gemini, ) def _majority(votes: list[bool]) -> bool | None: vs = [v for v in votes if v is not None] if len(vs) < 2: return None y, n = sum(vs), len(vs) - sum(vs) return True if y > n else (False if n > y else None) async def main(args: argparse.Namespace) -> int: pool = await db.get_pool() # sample panel-approved halachot (ORDER BY random is fine for a small audit) rows = await pool.fetch( "SELECT h.id, h.rule_statement, h.reasoning_summary, h.supporting_quote, " " cl.case_number " "FROM halachot h LEFT JOIN case_law cl ON cl.id = h.case_law_id " "WHERE h.review_status='approved' AND h.reviewer LIKE 'panel:%' " "ORDER BY md5(h.id::text || $1) LIMIT $2", args.seed, args.sample, ) print(f"auditing {len(rows)} panel-approved halachot (re-running the KEEP vote)\n", flush=True) flips = [] sem = asyncio.Semaphore(args.concurrency) async with httpx.AsyncClient() as client: async def one(r): async with sem: user = _keep_user(dict(r)) c, ds, gm = await asyncio.gather( judge_claude(KEEP_SYSTEM, user), judge_deepseek(client, KEEP_SYSTEM, user), judge_gemini(client, KEEP_SYSTEM, user), ) votes = [_bool(c, "keep"), _bool(ds, "keep"), _bool(gm, "keep")] if _majority(votes) is False: # panel now leans DROP → candidate false-keep flips.append((r, votes)) tasks = [one(r) for r in rows] for i in range(0, len(tasks), args.concurrency): await asyncio.gather(*tasks[i : i + args.concurrency]) rate = len(flips) / len(rows) if rows else 0.0 print(f"=== AUDIT: {len(flips)}/{len(rows)} now lean DROP ({rate:.0%} candidate false-keeps) ===") for r, votes in flips: print(f"\n {r['case_number']} votes(c/ds/gm)={votes}") print(f" {r['rule_statement'][:140]}") if flips and args.flag: for r, _ in flips: await pool.execute( "UPDATE halachot SET review_status='pending_review', " "reviewer='panel-audit:reopened', updated_at=now() WHERE id=$1", r["id"]) print(f"\n→ flagged {len(flips)} back to pending_review for chair review.") elif flips: print("\n(report-only — pass --flag to reopen these for the chair)") return 0 if __name__ == "__main__": ap = argparse.ArgumentParser() ap.add_argument("--sample", type=int, default=15) ap.add_argument("--seed", default="audit", help="vary to draw a different sample") ap.add_argument("--flag", action="store_true", help="reopen flips to pending_review") ap.add_argument("--concurrency", type=int, default=6) raise SystemExit(asyncio.run(main(ap.parse_args())))