legal-ai/scripts/halacha_rubric_distill.py

#!/usr/bin/env python3
"""Distil a better panel rubric from the chair's decisions — PROPOSE-ONLY (#133/FU-4).

The 3-judge KEEP panel (halacha_panel_approve.py) escalates every split to the
chair. FU-1 captured each round's votes+reasons; FU-2 captured the chair's
keep/drop ruling as a gold seed. This job joins the two — (panel ⋈ chair) — and
mines SYSTEMATIC failures: a judge that disagrees with the chair on an axis, a
recurring split the chair resolves the same way (e.g. obiter↔interpretive). It
then proposes a refined ``KEEP_SYSTEM`` v2 + abstract few-shot exemplars, written
as a DIFF report for the chair to review.

CRITICAL — this is the ACTIVE-LEARNING signal, not an echo chamber:
  - The only ground-truth is the chair's human ruling (db.panel_rounds_vs_chair
    reads the chair-live gold seeds, never the panel's own votes).
  - The proposal is NEVER auto-applied (INV-LRN1). KEEP_SYSTEM lives in code;
    adopting v2 is a human edit through a normal PR. This script writes a report
    to data/learning/ and touches nothing else.
  - Exemplars stay ABSTRACT patterns, never copied case holdings (INV-LRN5).

    cd ~/legal-ai/mcp-server
    .venv/bin/python ../scripts/halacha_rubric_distill.py            # propose
    .venv/bin/python ../scripts/halacha_rubric_distill.py --no-llm   # stats only
"""
from __future__ import annotations

import argparse
import asyncio
import difflib
import json
from datetime import datetime, timezone
from pathlib import Path

from legal_mcp.services import claude_session, db
# single source of truth for the rubric under refinement
from halacha_panel_approve import KEEP_SYSTEM  # noqa: E402

# Below this many chair-resolved pairs the patterns are noise — report and stop.
MIN_PAIRS = 12
# Cap evidence shipped to the model / report (keep the prompt + report tight).
MAX_EVIDENCE = 24

_JUDGES = ("claude", "deepseek", "gemini")


def analyze_pairs(pairs: list[dict]) -> dict:
    """Pure, deterministic mining of the (panel ⋈ chair) pairs — no DB, no LLM.

    Each pair carries the chair's keep/drop (``chair_keep``), the panel verdict
    + applied action, and each judge's vote+reason. Returns the systematic-
    failure metrics and a capped bundle of disagreement evidence for the model.
    """
    n = len(pairs)
    judge_stats = {j: {"voted": 0, "agree": 0, "disagree": 0} for j in _JUDGES}
    false_keep: list[dict] = []   # panel auto-KEPT, chair DROPPED
    false_drop: list[dict] = []   # panel auto-DROPPED, chair KEPT
    splits_resolved: list[dict] = []
    for p in pairs:
        chair = p.get("chair_keep")
        if chair is None:
            continue
        for j in _JUDGES:
            v = p.get(f"{j}_vote")
            if v is None:
                continue
            judge_stats[j]["voted"] += 1
            judge_stats[j]["agree" if bool(v) == bool(chair) else "disagree"] += 1
        action = (p.get("applied_action") or "").strip()
        verdict = (p.get("verdict") or "").strip()
        ev = {
            "rule_statement": p.get("rule_statement") or "",
            "verdict": verdict,
            "applied_action": action,
            "chair_keep": bool(chair),
            "reasons": {j: p.get(f"{j}_reason") or "" for j in _JUDGES},
            "votes": {j: p.get(f"{j}_vote") for j in _JUDGES},
        }
        # Panel acted automatically (kept) but the chair disagreed → dangerous.
        if action in ("approved", "nli_cleared") and chair is False:
            false_keep.append(ev)
        elif action == "rejected" and chair is True:
            false_drop.append(ev)
        if verdict in ("split", "incomplete"):
            splits_resolved.append(ev)

    for j in _JUDGES:
        s = judge_stats[j]
        s["disagree_rate"] = round(s["disagree"] / s["voted"], 3) if s["voted"] else None

    # Evidence the model needs to see: every false auto-decision (highest value)
    # then chair-resolved splits, capped.
    evidence = (false_keep + false_drop + splits_resolved)[:MAX_EVIDENCE]
    return {
        "n_pairs": n,
        "judge_stats": judge_stats,
        "n_false_keep": len(false_keep),
        "n_false_drop": len(false_drop),
        "n_splits_resolved": len(splits_resolved),
        "evidence": evidence,
    }


def _proposal_prompt(analysis: dict) -> str:
    """Build the model prompt: current rubric + failure evidence → v2 proposal."""
    ev_lines = []
    for i, e in enumerate(analysis["evidence"], 1):
        votes = ", ".join(f"{j}={e['votes'][j]}" for j in _JUDGES)
        reasons = " | ".join(f"{j}: {e['reasons'][j]}" for j in _JUDGES if e["reasons"][j])
        ev_lines.append(
            f"{i}. הכרעת-יו\"ר: {'שמירה' if e['chair_keep'] else 'דחייה'} | "
            f"ורדיקט-פאנל: {e['verdict']} ({e['applied_action'] or 'הוסלם'}) | "
            f"הצבעות: {votes}\n   כלל: {e['rule_statement'][:200]}\n   נימוקי-שופטים: {reasons}"
        )
    evidence_block = "\n".join(ev_lines) or "(אין מספיק ראיות-מחלוקת)"
    return (
        "להלן רובריקת-ההכרעה הנוכחית של פאנל-שופטים שמסווג 'הלכות' שחולצו מפסיקה "
        "כראויות-לשמירה (keep) או לא. מצורפים מקרים שבהם השופטים נחלקו או טעו ביחס "
        "להכרעת-היו\"ר (האמת היחידה). זהה את **דפוסי-הכשל השיטתיים** והצע שיפור מינימלי "
        "לרובריקה.\n\n"
        f"## הרובריקה הנוכחית (KEEP_SYSTEM)\n{KEEP_SYSTEM}\n\n"
        f"## סטטיסטיקת-כשל\n"
        f"זוגות: {analysis['n_pairs']} | false-keep: {analysis['n_false_keep']} | "
        f"false-drop: {analysis['n_false_drop']} | פיצולים-שהוכרעו: {analysis['n_splits_resolved']}\n"
        f"שיעור-מחלוקת-עם-היו\"ר לכל שופט: "
        + ", ".join(f"{j}={analysis['judge_stats'][j]['disagree_rate']}" for j in _JUDGES)
        + f"\n\n## ראיות-מחלוקת\n{evidence_block}\n\n"
        "החזר JSON בלבד (ללא markdown) בסכמה:\n"
        '{"patterns": ["<דפוס-כשל שיטתי 1>", ...], '
        '"keep_system_v2": "<נוסח מלא מוצע לרובריקה — מופשט, בר-הכללה, בלי מהות-תיק>", '
        '"exemplars": [{"pattern":"<תבנית מופשטת>","label":"keep|drop","why":"<קצר>"}]}\n'
        "אזהרה: ה-exemplars והנוסח חייבים להיות **מופשטים** — אסור להעתיק ניסוח-כלל "
        "ספציפי או מהות-תיק (INV-LRN5). אם הראיות לא מספיקות לדפוס ברור — החזר "
        '{"patterns": [], "keep_system_v2": "", "exemplars": []}.'
    )


def _render_report(analysis: dict, proposal: dict | None, ts: str) -> str:
    js = analysis["judge_stats"]
    lines = [
        f"# הצעת-זיקוק לרובריקת-הפאנל (FU-4) — {ts}",
        "",
        "> **PROPOSE-ONLY (INV-LRN1).** המסמך הזה הוא הצעה לעיון-היו\"ר בלבד. "
        "`KEEP_SYSTEM` חי בקוד (`scripts/halacha_panel_approve.py`); אימוץ v2 = "
        "עריכה אנושית דרך PR רגיל. אף שורת-קוד לא שונתה אוטומטית.",
        "> הסיגנל היחיד = הכרעת-היו\"ר על מחלוקות-הפאנל (לא הצבעות-הפאנל — echo-chamber).",
        "",
        "## סטטיסטיקת-כשל",
        "",
        "| מדד | ערך |",
        "|---|---|",
        f"| זוגות (panel ⋈ chair) | {analysis['n_pairs']} |",
        f"| false-keep (פאנל שמר, יו\"ר דחה) | {analysis['n_false_keep']} |",
        f"| false-drop (פאנל דחה, יו\"ר שמר) | {analysis['n_false_drop']} |",
        f"| פיצולים שהוכרעו ע\"י היו\"ר | {analysis['n_splits_resolved']} |",
        "",
        "### שיעור-מחלוקת-עם-היו\"ר לכל שופט",
        "",
        "| judge | voted | disagree | rate |",
        "|---|---|---|---|",
    ]
    for j in _JUDGES:
        lines.append(f"| {j} | {js[j]['voted']} | {js[j]['disagree']} | {js[j]['disagree_rate']} |")
    lines.append("")

    if not proposal or not proposal.get("keep_system_v2"):
        lines += ["## הצעה", "", "_אין דפוס-כשל מובהק / אין מספיק ראיות — לא הוצעה רובריקה חדשה._", ""]
        return "\n".join(lines)

    patterns = proposal.get("patterns") or []
    lines += ["## דפוסי-כשל שזוהו", ""]
    lines += [f"- {p}" for p in patterns] or ["- (—)"]
    lines += ["", "## diff מוצע ל-KEEP_SYSTEM", "", "```diff"]
    diff = difflib.unified_diff(
        KEEP_SYSTEM.replace(". ", ".\n").splitlines(),
        proposal["keep_system_v2"].replace(". ", ".\n").splitlines(),
        fromfile="KEEP_SYSTEM (current)", tofile="KEEP_SYSTEM (proposed v2)", lineterm="",
    )
    lines += list(diff)
    lines += ["```", "", "## few-shot exemplars מוצעים (מופשטים — INV-LRN5)", ""]
    for ex in proposal.get("exemplars") or []:
        lines.append(f"- **{ex.get('label','')}** — {ex.get('pattern','')} _( {ex.get('why','')} )_")
    lines += ["", "---", "_להחלת ההצעה: ערוך ידנית את `KEEP_SYSTEM` ופתח PR. אין auto-apply (INV-LRN1)._"]
    return "\n".join(lines)


async def main(args: argparse.Namespace) -> int:
    pairs = await db.panel_rounds_vs_chair(limit=args.limit or 2000)
    analysis = analyze_pairs(pairs)
    print(f"pairs={analysis['n_pairs']} false_keep={analysis['n_false_keep']} "
          f"false_drop={analysis['n_false_drop']} splits={analysis['n_splits_resolved']}",
          flush=True)

    if analysis["n_pairs"] < MIN_PAIRS:
        print(f"insufficient data (<{MIN_PAIRS} chair-resolved pairs) — no proposal. "
              "Seeds accrue as the chair reviews panel-judged halachot (FU-2).", flush=True)
        proposal = None
    elif args.no_llm:
        proposal = None
        print("--no-llm: stats only, no rubric proposal.", flush=True)
    else:
        try:
            proposal = await claude_session.query_json(
                _proposal_prompt(analysis), system=None, tools="",
            )
        except Exception as e:
            print(f"LLM proposal failed ({e}); writing stats-only report.", flush=True)
            proposal = None
        if proposal and not isinstance(proposal, dict):
            proposal = None

    ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
    out_dir = Path(__file__).resolve().parents[1] / "data" / "learning"
    out_dir.mkdir(parents=True, exist_ok=True)
    report = _render_report(analysis, proposal, ts)
    out_path = out_dir / f"rubric-proposal-{ts}.md"
    out_path.write_text(report, encoding="utf-8")
    print(f"wrote {out_path}", flush=True)
    if proposal and proposal.get("keep_system_v2"):
        print("→ rubric v2 PROPOSED — review the diff and apply via PR if sound (INV-LRN1).",
              flush=True)
    return 0


if __name__ == "__main__":
    ap = argparse.ArgumentParser(description="Propose a panel-rubric refinement from chair decisions (FU-4).")
    ap.add_argument("--limit", type=int, default=0, help="max (panel ⋈ chair) pairs to mine")
    ap.add_argument("--no-llm", action="store_true", help="deterministic stats only, skip the rubric proposal")
    raise SystemExit(asyncio.run(main(ap.parse_args())))