feat(halacha): panel safety-net audit (selective-prediction monitoring)
Periodic safety net for the multi-judge approval panel: samples panel-approved halachot, re-runs the same 3-judge KEEP vote, and surfaces any that now lean DROP — candidate false-keeps a human should glance at. Report-only by default; --flag reopens flips to pending_review. Baseline 0/15 on the 2026-06-07 batch. Closes the loop the literature prescribes (Trust-or-Escalate / selective prediction): monitor the auto-decision error rate rather than trusting it blindly. Reuses halacha_panel_approve's judges (single source of truth). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
93
scripts/halacha_panel_audit.py
Normal file
93
scripts/halacha_panel_audit.py
Normal file
@@ -0,0 +1,93 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Safety-net audit for panel-approved halachot (selective-prediction monitoring).
|
||||
|
||||
A panel auto-approval is reversible and low-harm, but not infallible. The
|
||||
literature (Trust-or-Escalate; selective prediction) prescribes MONITORING the
|
||||
auto-decision error rate over time rather than trusting it blindly. This samples
|
||||
panel-approved halachot, RE-RUNS the same 3-judge KEEP vote, and surfaces any
|
||||
where the panel now leans DROP — the candidate false-keeps a human should glance
|
||||
at. Zero standing load on the chair: it just produces a short weekly list.
|
||||
|
||||
Report-only by default. ``--flag`` sends the flips back to ``pending_review``
|
||||
(with an audit reviewer note) so they re-enter the chair queue.
|
||||
|
||||
cd ~/legal-ai/mcp-server
|
||||
.venv/bin/python ../scripts/halacha_panel_audit.py --sample 15
|
||||
.venv/bin/python ../scripts/halacha_panel_audit.py --sample 15 --flag
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
|
||||
import httpx
|
||||
|
||||
from legal_mcp.services import db
|
||||
from halacha_panel_approve import ( # noqa: E402 — single source of truth for judges
|
||||
KEEP_SYSTEM, _bool, _keep_user, judge_claude, judge_deepseek, judge_gemini,
|
||||
)
|
||||
|
||||
|
||||
def _majority(votes: list[bool]) -> bool | None:
|
||||
vs = [v for v in votes if v is not None]
|
||||
if len(vs) < 2:
|
||||
return None
|
||||
y, n = sum(vs), len(vs) - sum(vs)
|
||||
return True if y > n else (False if n > y else None)
|
||||
|
||||
|
||||
async def main(args: argparse.Namespace) -> int:
|
||||
pool = await db.get_pool()
|
||||
# sample panel-approved halachot (ORDER BY random is fine for a small audit)
|
||||
rows = await pool.fetch(
|
||||
"SELECT h.id, h.rule_statement, h.reasoning_summary, h.supporting_quote, "
|
||||
" cl.case_number "
|
||||
"FROM halachot h LEFT JOIN case_law cl ON cl.id = h.case_law_id "
|
||||
"WHERE h.review_status='approved' AND h.reviewer LIKE 'panel:%' "
|
||||
"ORDER BY md5(h.id::text || $1) LIMIT $2",
|
||||
args.seed, args.sample,
|
||||
)
|
||||
print(f"auditing {len(rows)} panel-approved halachot (re-running the KEEP vote)\n", flush=True)
|
||||
|
||||
flips = []
|
||||
sem = asyncio.Semaphore(args.concurrency)
|
||||
async with httpx.AsyncClient() as client:
|
||||
async def one(r):
|
||||
async with sem:
|
||||
user = _keep_user(dict(r))
|
||||
c, ds, gm = await asyncio.gather(
|
||||
judge_claude(KEEP_SYSTEM, user),
|
||||
judge_deepseek(client, KEEP_SYSTEM, user),
|
||||
judge_gemini(client, KEEP_SYSTEM, user),
|
||||
)
|
||||
votes = [_bool(c, "keep"), _bool(ds, "keep"), _bool(gm, "keep")]
|
||||
if _majority(votes) is False: # panel now leans DROP → candidate false-keep
|
||||
flips.append((r, votes))
|
||||
tasks = [one(r) for r in rows]
|
||||
for i in range(0, len(tasks), args.concurrency):
|
||||
await asyncio.gather(*tasks[i : i + args.concurrency])
|
||||
|
||||
rate = len(flips) / len(rows) if rows else 0.0
|
||||
print(f"=== AUDIT: {len(flips)}/{len(rows)} now lean DROP ({rate:.0%} candidate false-keeps) ===")
|
||||
for r, votes in flips:
|
||||
print(f"\n {r['case_number']} votes(c/ds/gm)={votes}")
|
||||
print(f" {r['rule_statement'][:140]}")
|
||||
|
||||
if flips and args.flag:
|
||||
for r, _ in flips:
|
||||
await pool.execute(
|
||||
"UPDATE halachot SET review_status='pending_review', "
|
||||
"reviewer='panel-audit:reopened', updated_at=now() WHERE id=$1", r["id"])
|
||||
print(f"\n→ flagged {len(flips)} back to pending_review for chair review.")
|
||||
elif flips:
|
||||
print("\n(report-only — pass --flag to reopen these for the chair)")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("--sample", type=int, default=15)
|
||||
ap.add_argument("--seed", default="audit", help="vary to draw a different sample")
|
||||
ap.add_argument("--flag", action="store_true", help="reopen flips to pending_review")
|
||||
ap.add_argument("--concurrency", type=int, default=6)
|
||||
raise SystemExit(asyncio.run(main(ap.parse_args())))
|
||||
Reference in New Issue
Block a user