"""Tri-model panel extraction regime (legal-principles-redesign, #152). The shared core (G2) for BOTH the going-forward extractor (Phase B) and the retroactive cull (Phase C). chaim 2026-06-19: 1. THREE models (Claude local + DeepSeek + Gemini) deep-analyze a decision and each PROPOSES candidate principles, each with a 0-1 score. 2. Candidates are matched ACROSS models by embedding cosine → a "merged candidate" carries: votes (# distinct models that proposed it) and score (mean of the voters' scores). 3. Approval rule: votes == 3 → approved (even if score < floor) votes >= 2 AND score >= SCORE_FLOOR → approved votes == 2 AND score < SCORE_FLOOR → pending_review (chair, G10) votes <= 1 → rejected (dropped) 4. The CALLER applies the corpus-dedup (V41 link → frees a slot) and the MAX_NEW cap (top-N approved-new by score). This module is corpus-agnostic and DB-free so it is unit-testable and reused identically by B and C. Terminology (#152): a principle from a binding higher court is a הלכה; one from the appeals committee (internal_committee) is a כלל פרשני (interpretive rule) — the committee applies law, it does not make binding precedent. The extract prompt adapts to ``source_kind`` and, for the committee, demands genuine novelty. """ from __future__ import annotations import logging import math import httpx from legal_mcp import config from legal_mcp.services import embeddings, panel_judges logger = logging.getLogger(__name__) _RULE_TYPES = ("holding", "interpretive", "procedural") # citable kinds only def _extract_system(source_kind: str, is_binding: bool, max_candidates: int) -> str: if source_kind == "internal_committee": nature = ( "המקור הוא החלטת ועדת-ערר. ועדת ערר מיישמת דין קיים ואינה יוצרת הלכה מחייבת. " "חלץ אך ורק כללים פרשניים חדשים לגמרי שהוועדה גיבשה — לא יישום של הלכה ידועה, " "לא חזרה על דין מוכר, ולא תיאור עובדות. אם אין כלל פרשני חדש אמיתי — החזר []." ) elif is_binding: nature = ( "המקור הוא פסק-דין של בית-משפט מחוזי/עליון. חלץ הלכות — כללים משפטיים " "בני-הכללה והסתמכות שהפסק קובע או מאמץ ומיישם." ) else: nature = ( "המקור הוא פסיקה משכנעת (לא-מחייבת). חלץ עקרונות משפטיים בני-הכללה בלבד." ) return ( "אתה משפטן בכיר בוועדת ערר לתכנון ובנייה, מנתח פסיקה לבסיס-ידע בר-ציטוט. " f"{nature}\n\n" "כללי-ברזל:\n" "• רק עיקרון כללי בר-הכללה והסתמכות — לא החלה תלוית-עובדות/צדדים/סכומים, " "לא אמרת-אגב (סוגיה שלא הוכרעה), לא חזרה מילולית על הציטוט ללא הפשטה.\n" "• כל עיקרון חייב עיגון: ציטוט מילולי מהמקור התומך בו (INV-AH).\n" f"• החזר עד {max_candidates} המועמדים החזקים ביותר בלבד; מוטב מעט ואיכותי.\n\n" "פלט — JSON array בלבד, ללא markdown:\n" "[{\n" ' "rule_statement": "<העיקרון, כללי ובלתי-תלוי-תיק>",\n' ' "supporting_quote": "<ציטוט מילולי מהמקור>",\n' ' "reasoning_summary": "<מדוע זה עיקרון בר-הסתמכות>",\n' ' "rule_type": "holding|interpretive|procedural",\n' ' "score": 0.0-1.0\n' "}]\n" "אם אין עקרונות ראויים — החזר []." ) def _coerce_list(reply) -> list[dict]: """A judge may return a list, or {"principles":[...]}/{"items":[...]}, or junk.""" if isinstance(reply, list): items = reply elif isinstance(reply, dict): for k in ("principles", "items", "halachot", "results", "candidates"): if isinstance(reply.get(k), list): items = reply[k] break else: items = [reply] if reply.get("rule_statement") else [] else: return [] out = [] for it in items: if not isinstance(it, dict): continue rule = (it.get("rule_statement") or "").strip() quote = (it.get("supporting_quote") or "").strip() if not rule or not quote: continue rt = (it.get("rule_type") or "interpretive").strip().lower() try: score = float(it.get("score", 0.0)) except (TypeError, ValueError): score = 0.0 out.append({ "rule_statement": rule, "supporting_quote": quote, "reasoning_summary": (it.get("reasoning_summary") or "").strip(), "rule_type": rt if rt in _RULE_TYPES else "interpretive", "score": max(0.0, min(1.0, score)), }) return out def _cosine(a: list[float], b: list[float]) -> float: dot = sum(x * y for x, y in zip(a, b)) na = math.sqrt(sum(x * x for x in a)) nb = math.sqrt(sum(y * y for y in b)) return 0.0 if na == 0 or nb == 0 else dot / (na * nb) def classify(votes: int, score: float) -> str: """The chair's approval rule → 'approved' | 'pending_review' | 'rejected'.""" floor = config.HALACHA_PANEL_SCORE_FLOOR if votes >= 3: return "approved" if votes == 2: return "approved" if score >= floor else "pending_review" return "rejected" def apply_cap(judged: list[dict], max_new: int | None = None) -> list[dict]: """Per-decision cap for the retroactive cull (#152, Phase C). ``judged`` = a decision's principles, each with a panel ``verdict`` + ``score``. Survivors (approved/pending_review) are ranked by score; those beyond ``max_new`` are downgraded to 'rejected' (over-cap). Already-rejected stay rejected. Returns a new list with ``final_verdict`` set on each (order preserved). Pure. """ max_new = config.HALACHA_PANEL_MAX_NEW if max_new is None else max_new survivors = [j for j in judged if j.get("verdict") in ("approved", "pending_review")] survivors.sort(key=lambda j: j.get("score", 0.0), reverse=True) keep_ids = {id(j) for j in survivors[:max_new]} out = [] for j in judged: v = j.get("verdict") if v in ("approved", "pending_review") and id(j) not in keep_ids: final = "rejected" # over the cap else: final = v out.append({**j, "final_verdict": final}) return out def cluster_candidates( per_model: dict[str, list[dict]], embs: dict[int, list[float]], ) -> list[dict]: """Greedy cross-model clustering. ``per_model`` maps judge→its candidate list; ``embs`` maps id(candidate)→embedding. Each cluster merges near-duplicate proposals: votes = # distinct models present, score = mean of each model's BEST score in the cluster, representative = highest-scoring member. Pure (no I/O) given the embeddings — unit-testable. """ match = config.HALACHA_PANEL_MATCH_COSINE clusters: list[dict] = [] # deterministic order: model order, then model-local order flat: list[tuple[str, dict]] = [] for m in panel_judges.JUDGE_NAMES: for c in per_model.get(m, []): flat.append((m, c)) for model, cand in flat: emb = embs.get(id(cand)) placed = False if emb is not None: for cl in clusters: if cl["_emb"] is not None and _cosine(cl["_emb"], emb) >= match: cl["members"].append({"model": model, **cand}) prev = cl["per_model_score"].get(model, -1.0) cl["per_model_score"][model] = max(prev, cand["score"]) if cand["score"] > cl["score_rep"]: cl["score_rep"] = cand["score"] cl["rule_statement"] = cand["rule_statement"] cl["supporting_quote"] = cand["supporting_quote"] cl["reasoning_summary"] = cand["reasoning_summary"] cl["rule_type"] = cand["rule_type"] cl["_emb"] = emb placed = True break if not placed: clusters.append({ "rule_statement": cand["rule_statement"], "supporting_quote": cand["supporting_quote"], "reasoning_summary": cand["reasoning_summary"], "rule_type": cand["rule_type"], "members": [{"model": model, **cand}], "per_model_score": {model: cand["score"]}, "score_rep": cand["score"], "_emb": emb, }) out = [] for cl in clusters: pms = cl["per_model_score"] votes = len(pms) score = sum(pms.values()) / votes if votes else 0.0 out.append({ "rule_statement": cl["rule_statement"], "supporting_quote": cl["supporting_quote"], "reasoning_summary": cl["reasoning_summary"], "rule_type": cl["rule_type"], "votes": votes, "score": round(score, 4), "voters": sorted(pms.keys()), "verdict": classify(votes, score), "embedding": cl["_emb"], }) # strongest first out.sort(key=lambda c: (c["votes"], c["score"]), reverse=True) return out def _keep_score_system(source_kind: str, is_binding: bool) -> str: if source_kind == "internal_committee": nature = ("המקור הוא החלטת ועדת-ערר (מיישמת דין, אינה יוצרת הלכה). ראוי-לשמירה = " "כלל פרשני חדש ובר-הכללה שהוועדה גיבשה; לא-ראוי = יישום תלוי-עובדות, " "חזרה על דין מוכר, אמרת-אגב, או חזרה מילולית על הציטוט.") else: nature = ("ראוי-לשמירה = עיקרון משפטי בר-הכללה והסתמכות (הלכה/פרשנות/כלל-פרוצדורלי); " "לא-ראוי = החלה תלוית-עובדות, אמרת-אגב, או חזרה מילולית על הציטוט.") return ( "אתה משפטן בכיר בוועדת ערר לתכנון ובנייה. הוכרע אם עיקרון שחולץ מפסיקה ראוי " f"להישמר כתקדים בר-ציטוט. {nature}\n" "תן גם ציון-ביטחון 0-1 לכך שזהו עיקרון בר-הסתמכות אמיתי.\n" 'החזר JSON בלבד: {"keep": true/false, "score": 0.0-1.0, "reason": "<משפט קצר>"}. ללא markdown.' ) async def panel_keep_score( rule_statement: str, supporting_quote: str, reasoning_summary: str = "", *, source_kind: str = "external_upload", is_binding: bool = True, ) -> dict: """Run the 3-judge panel on ONE existing principle (Phase C cull, #152). Each judge votes keep + score; votes = # keepers, score = mean of the keepers' scores (chaim: "ממוצע המצביעים"), verdict via the shared :func:`classify`. Returns {votes, score, verdict, voters, per_judge} — per_judge keeps raw replies for the active-learning round (FU-1). Used by the retroactive cull; the extractor uses :func:`panel_extract` instead. """ import asyncio system = _keep_score_system(source_kind, is_binding) user = (f"ניסוח העיקרון:\n{rule_statement}\n\n" f"היגיון:\n{reasoning_summary}\n\nציטוט תומך:\n{supporting_quote}") async with httpx.AsyncClient() as client: c, ds, gm = await asyncio.gather( panel_judges.judge_claude(system, user, max_tokens=300), panel_judges.judge_deepseek(client, system, user, max_tokens=300), panel_judges.judge_gemini(client, system, user, max_tokens=2000), ) raw = {"claude": c, "deepseek": ds, "gemini": gm} keepers, scores = [], [] for name, reply in raw.items(): if panel_judges.to_bool(reply, "keep"): keepers.append(name) try: scores.append(max(0.0, min(1.0, float(reply.get("score", 0.0))))) except (TypeError, ValueError): scores.append(0.0) votes = len(keepers) score = round(sum(scores) / votes, 4) if votes else 0.0 return {"votes": votes, "score": score, "verdict": classify(votes, score), "voters": sorted(keepers), "per_judge": raw} async def _run_three(system: str, user: str, max_tokens: int) -> dict[str, object]: async with httpx.AsyncClient() as client: import asyncio c, ds, gm = await asyncio.gather( panel_judges.judge_claude(system, user, max_tokens=max_tokens), panel_judges.judge_deepseek(client, system, user, max_tokens=max_tokens), panel_judges.judge_gemini(client, system, user, max_tokens=max_tokens), ) return {"claude": c, "deepseek": ds, "gemini": gm} async def panel_extract( text: str, *, source_kind: str = "external_upload", is_binding: bool = True, propose_n: int | None = None, ) -> list[dict]: """Run the 3-model panel over a decision's text → merged candidate principles. Returns clusters (strongest first), each: {rule_statement, supporting_quote, reasoning_summary, rule_type, votes, score, voters, verdict, embedding} Does NOT dedup vs the corpus and does NOT apply the MAX_NEW cap — the caller (extractor / cull) owns those (they need DB + differ B vs C). """ propose_n = propose_n if propose_n is not None else config.HALACHA_PANEL_MAX_NEW + 3 system = _extract_system(source_kind, is_binding, propose_n) user = f"--- תחילת המקור ---\n{text}\n--- סוף המקור ---" replies = await _run_three(system, user, max_tokens=8000) per_model: dict[str, list[dict]] = {} for name in panel_judges.JUDGE_NAMES: per_model[name] = _coerce_list(replies.get(name)) if not any(per_model.values()): logger.warning("panel_extract: all three judges returned no candidates") return [] # embed every candidate's rule_statement for cross-model matching flat = [c for m in panel_judges.JUDGE_NAMES for c in per_model[m]] embs: dict[int, list[float]] = {} if flat: vecs = await embeddings.embed_texts([c["rule_statement"] for c in flat]) for c, v in zip(flat, vecs): embs[id(c)] = list(v) return cluster_candidates(per_model, embs)