feat(principles): canonical_statement synthesis service + throttled backfill (Phase E groundwork, #152)

Grounded (INV-AH) multi-instance synthesis with drift guard + chair gate (pending_review, G10). Single path used by backfill, MCP tool, nightly drain. HELD from production run pending the principles-redesign (rename+cull, #152). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-19 10:57:48 +00:00
parent db93735ed6
commit 338a8a947f
14 changed files with 1250 additions and 74 deletions
--- a/mcp-server/src/legal_mcp/services/panel_extraction.py
+++ b/mcp-server/src/legal_mcp/services/panel_extraction.py
@@ -0,0 +1,243 @@
+"""Tri-model panel extraction regime (legal-principles-redesign, #152).
+
+The shared core (G2) for BOTH the going-forward extractor (Phase B) and the
+retroactive cull (Phase C). chaim 2026-06-19:
+
+  1. THREE models (Claude local + DeepSeek + Gemini) deep-analyze a decision and
+     each PROPOSES candidate principles, each with a 0-1 score.
+  2. Candidates are matched ACROSS models by embedding cosine → a "merged
+     candidate" carries: votes (# distinct models that proposed it) and score
+     (mean of the voters' scores).
+  3. Approval rule:
+        votes == 3                          → approved (even if score < floor)
+        votes >= 2 AND score >= SCORE_FLOOR  → approved
+        votes == 2 AND score <  SCORE_FLOOR  → pending_review (chair, G10)
+        votes <= 1                           → rejected (dropped)
+  4. The CALLER applies the corpus-dedup (V41 link → frees a slot) and the
+     MAX_NEW cap (top-N approved-new by score). This module is corpus-agnostic
+     and DB-free so it is unit-testable and reused identically by B and C.
+
+Terminology (#152): a principle from a binding higher court is a הלכה; one from
+the appeals committee (internal_committee) is a כלל פרשני (interpretive rule) —
+the committee applies law, it does not make binding precedent. The extract prompt
+adapts to ``source_kind`` and, for the committee, demands genuine novelty.
+"""
+from __future__ import annotations
+
+import logging
+import math
+
+import httpx
+
+from legal_mcp import config
+from legal_mcp.services import embeddings, panel_judges
+
+logger = logging.getLogger(__name__)
+
+_RULE_TYPES = ("holding", "interpretive", "procedural")  # citable kinds only
+
+
+def _extract_system(source_kind: str, is_binding: bool, max_candidates: int) -> str:
+    if source_kind == "internal_committee":
+        nature = (
+            "המקור הוא החלטת ועדת-ערר. ועדת ערר מיישמת דין קיים ואינה יוצרת הלכה מחייבת. "
+            "חלץ אך ורק כללים פרשניים חדשים לגמרי שהוועדה גיבשה — לא יישום של הלכה ידועה, "
+            "לא חזרה על דין מוכר, ולא תיאור עובדות. אם אין כלל פרשני חדש אמיתי — החזר []."
+        )
+    elif is_binding:
+        nature = (
+            "המקור הוא פסק-דין של בית-משפט מחוזי/עליון. חלץ הלכות — כללים משפטיים "
+            "בני-הכללה והסתמכות שהפסק קובע או מאמץ ומיישם."
+        )
+    else:
+        nature = (
+            "המקור הוא פסיקה משכנעת (לא-מחייבת). חלץ עקרונות משפטיים בני-הכללה בלבד."
+        )
+    return (
+        "אתה משפטן בכיר בוועדת ערר לתכנון ובנייה, מנתח פסיקה לבסיס-ידע בר-ציטוט. "
+        f"{nature}\n\n"
+        "כללי-ברזל:\n"
+        "• רק עיקרון כללי בר-הכללה והסתמכות — לא החלה תלוית-עובדות/צדדים/סכומים, "
+        "לא אמרת-אגב (סוגיה שלא הוכרעה), לא חזרה מילולית על הציטוט ללא הפשטה.\n"
+        "• כל עיקרון חייב עיגון: ציטוט מילולי מהמקור התומך בו (INV-AH).\n"
+        f"• החזר עד {max_candidates} המועמדים החזקים ביותר בלבד; מוטב מעט ואיכותי.\n\n"
+        "פלט — JSON array בלבד, ללא markdown:\n"
+        "[{\n"
+        '  "rule_statement": "<העיקרון, כללי ובלתי-תלוי-תיק>",\n'
+        '  "supporting_quote": "<ציטוט מילולי מהמקור>",\n'
+        '  "reasoning_summary": "<מדוע זה עיקרון בר-הסתמכות>",\n'
+        '  "rule_type": "holding|interpretive|procedural",\n'
+        '  "score": 0.0-1.0\n'
+        "}]\n"
+        "אם אין עקרונות ראויים — החזר []."
+    )
+
+
+def _coerce_list(reply) -> list[dict]:
+    """A judge may return a list, or {"principles":[...]}/{"items":[...]}, or junk."""
+    if isinstance(reply, list):
+        items = reply
+    elif isinstance(reply, dict):
+        for k in ("principles", "items", "halachot", "results", "candidates"):
+            if isinstance(reply.get(k), list):
+                items = reply[k]
+                break
+        else:
+            items = [reply] if reply.get("rule_statement") else []
+    else:
+        return []
+    out = []
+    for it in items:
+        if not isinstance(it, dict):
+            continue
+        rule = (it.get("rule_statement") or "").strip()
+        quote = (it.get("supporting_quote") or "").strip()
+        if not rule or not quote:
+            continue
+        rt = (it.get("rule_type") or "interpretive").strip().lower()
+        try:
+            score = float(it.get("score", 0.0))
+        except (TypeError, ValueError):
+            score = 0.0
+        out.append({
+            "rule_statement": rule,
+            "supporting_quote": quote,
+            "reasoning_summary": (it.get("reasoning_summary") or "").strip(),
+            "rule_type": rt if rt in _RULE_TYPES else "interpretive",
+            "score": max(0.0, min(1.0, score)),
+        })
+    return out
+
+
+def _cosine(a: list[float], b: list[float]) -> float:
+    dot = sum(x * y for x, y in zip(a, b))
+    na = math.sqrt(sum(x * x for x in a))
+    nb = math.sqrt(sum(y * y for y in b))
+    return 0.0 if na == 0 or nb == 0 else dot / (na * nb)
+
+
+def classify(votes: int, score: float) -> str:
+    """The chair's approval rule → 'approved' | 'pending_review' | 'rejected'."""
+    floor = config.HALACHA_PANEL_SCORE_FLOOR
+    if votes >= 3:
+        return "approved"
+    if votes == 2:
+        return "approved" if score >= floor else "pending_review"
+    return "rejected"
+
+
+def cluster_candidates(
+    per_model: dict[str, list[dict]], embs: dict[int, list[float]],
+) -> list[dict]:
+    """Greedy cross-model clustering. ``per_model`` maps judge→its candidate list;
+    ``embs`` maps id(candidate)→embedding. Each cluster merges near-duplicate
+    proposals: votes = # distinct models present, score = mean of each model's
+    BEST score in the cluster, representative = highest-scoring member.
+
+    Pure (no I/O) given the embeddings — unit-testable.
+    """
+    match = config.HALACHA_PANEL_MATCH_COSINE
+    clusters: list[dict] = []
+    # deterministic order: model order, then model-local order
+    flat: list[tuple[str, dict]] = []
+    for m in panel_judges.JUDGE_NAMES:
+        for c in per_model.get(m, []):
+            flat.append((m, c))
+
+    for model, cand in flat:
+        emb = embs.get(id(cand))
+        placed = False
+        if emb is not None:
+            for cl in clusters:
+                if cl["_emb"] is not None and _cosine(cl["_emb"], emb) >= match:
+                    cl["members"].append({"model": model, **cand})
+                    prev = cl["per_model_score"].get(model, -1.0)
+                    cl["per_model_score"][model] = max(prev, cand["score"])
+                    if cand["score"] > cl["score_rep"]:
+                        cl["score_rep"] = cand["score"]
+                        cl["rule_statement"] = cand["rule_statement"]
+                        cl["supporting_quote"] = cand["supporting_quote"]
+                        cl["reasoning_summary"] = cand["reasoning_summary"]
+                        cl["rule_type"] = cand["rule_type"]
+                        cl["_emb"] = emb
+                    placed = True
+                    break
+        if not placed:
+            clusters.append({
+                "rule_statement": cand["rule_statement"],
+                "supporting_quote": cand["supporting_quote"],
+                "reasoning_summary": cand["reasoning_summary"],
+                "rule_type": cand["rule_type"],
+                "members": [{"model": model, **cand}],
+                "per_model_score": {model: cand["score"]},
+                "score_rep": cand["score"],
+                "_emb": emb,
+            })
+
+    out = []
+    for cl in clusters:
+        pms = cl["per_model_score"]
+        votes = len(pms)
+        score = sum(pms.values()) / votes if votes else 0.0
+        out.append({
+            "rule_statement": cl["rule_statement"],
+            "supporting_quote": cl["supporting_quote"],
+            "reasoning_summary": cl["reasoning_summary"],
+            "rule_type": cl["rule_type"],
+            "votes": votes,
+            "score": round(score, 4),
+            "voters": sorted(pms.keys()),
+            "verdict": classify(votes, score),
+            "embedding": cl["_emb"],
+        })
+    # strongest first
+    out.sort(key=lambda c: (c["votes"], c["score"]), reverse=True)
+    return out
+
+
+async def _run_three(system: str, user: str, max_tokens: int) -> dict[str, object]:
+    async with httpx.AsyncClient() as client:
+        import asyncio
+        c, ds, gm = await asyncio.gather(
+            panel_judges.judge_claude(system, user, max_tokens=max_tokens),
+            panel_judges.judge_deepseek(client, system, user, max_tokens=max_tokens),
+            panel_judges.judge_gemini(client, system, user, max_tokens=max_tokens),
+        )
+    return {"claude": c, "deepseek": ds, "gemini": gm}
+
+
+async def panel_extract(
+    text: str,
+    *,
+    source_kind: str = "external_upload",
+    is_binding: bool = True,
+    propose_n: int | None = None,
+) -> list[dict]:
+    """Run the 3-model panel over a decision's text → merged candidate principles.
+
+    Returns clusters (strongest first), each:
+      {rule_statement, supporting_quote, reasoning_summary, rule_type,
+       votes, score, voters, verdict, embedding}
+    Does NOT dedup vs the corpus and does NOT apply the MAX_NEW cap — the caller
+    (extractor / cull) owns those (they need DB + differ B vs C).
+    """
+    propose_n = propose_n if propose_n is not None else config.HALACHA_PANEL_MAX_NEW + 3
+    system = _extract_system(source_kind, is_binding, propose_n)
+    user = f"--- תחילת המקור ---\n{text}\n--- סוף המקור ---"
+    replies = await _run_three(system, user, max_tokens=8000)
+
+    per_model: dict[str, list[dict]] = {}
+    for name in panel_judges.JUDGE_NAMES:
+        per_model[name] = _coerce_list(replies.get(name))
+    if not any(per_model.values()):
+        logger.warning("panel_extract: all three judges returned no candidates")
+        return []
+
+    # embed every candidate's rule_statement for cross-model matching
+    flat = [c for m in panel_judges.JUDGE_NAMES for c in per_model[m]]
+    embs: dict[int, list[float]] = {}
+    if flat:
+        vecs = await embeddings.embed_texts([c["rule_statement"] for c in flat])
+        for c, v in zip(flat, vecs):
+            embs[id(c)] = list(v)
+    return cluster_candidates(per_model, embs)