fix(halacha): split authority (derived) from rule_role — stop source-conflation (INV-DM7)

The extractor classified rule_type by SOURCE bindingness (higher-court→binding, committee→persuasive) instead of by rule KIND. The gold-set proved it: 'binding' appeared on 19/19 external rulings & 0 committees; 'persuasive' on 13/13 committees & 0 external — only 58% agreement with the human role tags. The two axes (authority vs rule role) were crammed into one enum. This splits them per INV-DM7: - authority (binding/persuasive) — DERIVED from case_law.precedent_level (עליון/מנהלי→binding, ועדת_ערר_מחוזית→persuasive), never stored, never LLM-guessed. New helper halacha_quality.derive_authority; surfaced read-only in list_halachot / goldset_list / search results. - rule_type — now the rule ROLE only: holding/interpretive/procedural/ application/obiter. Both extractor prompts unified to this vocabulary; _coerce_halacha no longer defaults rule_type from the source; legacy binding→holding / persuasive→interpretive fold for safety. UI: authority shown as a separate read-only badge (gold=מחייב / muted=משכנע) across the review queue, precedent detail, and gold-set; the gold-set role selector drops binding/persuasive and adds מהותי (holding). Migration: scripts/halacha_rule_role_backfill.py re-classifies the 276 pre-split binding/persuasive rows into a genuine role via local claude_session (run after deploy). Gold-set correct_type/ai_correct_type 'binding'→'holding' via SQL. Sources (≥3, per research-decision policy): OASIS LegalRuleML v1.0 (appliesAuthority/Strength as metadata orthogonal to rule logic) · SemEval-2023 Task 6 LegalEval (rhetorical roles by function, authority kept separate) · Bluebook signals (weight-of-authority is a separate dimension). Invariants: ESTABLISHES INV-DM7. Upholds G1 (normalize at source — extractor classifies role, system derives authority) and G2 (single source of truth — authority derived, not a parallel stored field). Tests: 211 pass + new derive_authority/coerce coverage. web-ui build + tsc clean. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-07 18:18:41 +00:00
parent 955675eb1f
commit 2e33cac043
16 changed files with 407 additions and 92 deletions
--- a/mcp-server/src/legal_mcp/services/db.py
+++ b/mcp-server/src/legal_mcp/services/db.py
@@ -664,8 +664,10 @@ CREATE TABLE IF NOT EXISTS halachot (
    case_law_id UUID REFERENCES case_law(id) ON DELETE CASCADE,
    halacha_index INTEGER NOT NULL,
    rule_statement TEXT NOT NULL,
-    rule_type TEXT DEFAULT 'binding',
-        -- binding | interpretive | procedural | obiter
+    rule_type TEXT DEFAULT 'interpretive',
+        -- rule ROLE only (INV-DM7): holding | interpretive | procedural |
+        -- application | obiter. authority (binding/persuasive) is DERIVED
+        -- from case_law.precedent_level, never stored here.
    reasoning_summary TEXT DEFAULT '',
    supporting_quote TEXT NOT NULL,
    page_reference TEXT DEFAULT '',
@@ -4052,7 +4054,7 @@ async def store_halachot(case_law_id: UUID, halachot: list[dict]) -> int:
                case_law_id,
                i,
                h["rule_statement"],
-                h.get("rule_type", "binding"),
+                h.get("rule_type", "interpretive"),
                h.get("reasoning_summary", ""),
                h["supporting_quote"],
                h.get("page_reference", ""),
@@ -4193,7 +4195,7 @@ async def store_halachot_for_chunk(
                       VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11,
                               $12, $13, $14, $15, $16, {reviewed_at_clause})""",
                    case_law_id, base + inserted, h["rule_statement"],
-                    h.get("rule_type", "binding"), h.get("reasoning_summary", ""),
+                    h.get("rule_type", "interpretive"), h.get("reasoning_summary", ""),
                    h["supporting_quote"], h.get("page_reference", ""),
                    h.get("practice_areas", []), h.get("subject_tags", []),
                    h.get("cites", []), confidence, h.get("quote_verified", False),
@@ -4299,6 +4301,8 @@ async def list_halachot(
        d = dict(r)
        if d.get("decision_date") is not None:
            d["decision_date"] = d["decision_date"].isoformat()
+        # authority is DERIVED from the source, never stored (INV-DM7)
+        d["authority"] = halacha_quality.derive_authority(d.get("precedent_level"))
        out.append(d)
    if cluster and out:
        await _annotate_clusters(pool, out)
@@ -4721,7 +4725,7 @@ async def goldset_list(batch: str = "default") -> list[dict]:
        "       g.ai_is_holding, g.ai_correct_type, g.ai_rationale, g.ai_generated_at, "
        "       h.rule_statement, h.supporting_quote, h.reasoning_summary, "
        "       h.rule_type, h.confidence, h.quality_flags, h.review_status, "
-        "       cl.case_number, cl.case_name, cl.source_type "
+        "       cl.case_number, cl.case_name, cl.source_type, cl.precedent_level "
        "FROM halacha_goldset g JOIN halachot h ON h.id = g.halacha_id "
        "LEFT JOIN case_law cl ON cl.id = h.case_law_id "
        "WHERE g.batch = $1 ORDER BY g.created_at, g.id", batch,
@@ -4735,6 +4739,8 @@ async def goldset_list(batch: str = "default") -> list[dict]:
            d["ai_generated_at"] = d["ai_generated_at"].isoformat()
        if d.get("confidence") is not None:
            d["confidence"] = float(d["confidence"])
+        # authority is DERIVED from the source, never stored (INV-DM7)
+        d["authority"] = halacha_quality.derive_authority(d.get("precedent_level"))
        out.append(d)
    return out

@@ -4792,7 +4798,7 @@ async def goldset_score(batch: str = "default") -> dict:
    for r in labeled:
        rule = r.get("rule_statement") or ""
        quote = r.get("supporting_quote") or ""
-        rtype = r.get("rule_type") or "binding"
+        rtype = r.get("rule_type") or "interpretive"
        qc = r["quote_complete"] if r["quote_complete"] is not None else True
        truly_bad = r["is_holding"] is False
        flags = halacha_quality.compute_quality_flags(rule, quote, "", qc, rtype)
@@ -4990,6 +4996,8 @@ async def search_precedent_library_semantic(
            _conf = float(d.get("confidence") or 0.0)
            d["score"] = float(d["score"]) + max(_conf * 0.06, 0.0)
            d["type"] = "halacha"
+            # authority is DERIVED from the source, never stored (INV-DM7)
+            d["authority"] = halacha_quality.derive_authority(d.get("precedent_level"))
            results.append(d)

    rows = await pool.fetch(chunk_sql, *c_params)
--- a/mcp-server/src/legal_mcp/services/halacha_extractor.py
+++ b/mcp-server/src/legal_mcp/services/halacha_extractor.py
@@ -76,8 +76,12 @@ EXTRACTABLE_SECTIONS = ("legal_analysis", "ruling", "conclusion")
 # wants to be able to cite "another committee reached the same conclusion"
 # even though it is not binding.
 #
-# The schema's rule_type field accepts six values:
-#   binding | interpretive | procedural | obiter | application | persuasive
+# The prompt branches on is_binding only to choose the EXTRACTION STRATEGY
+# (what to pull, how to phrase) — NOT the rule_type. rule_type is the rule
+# ROLE and uses the SAME five values for both sources (INV-DM7):
+#   holding | interpretive | procedural | application | obiter
+# The authority axis (binding/persuasive) is derived from the source, never
+# a rule_type value — so the model never classifies it.

 HALACHA_EXTRACTION_PROMPT_BINDING = """אתה משפטן בכיר המתמחה בדיני תכנון ובניה (ועדות ערר, היטל השבחה, פיצויים לפי סעיף 197 לחוק התכנון והבניה). תפקידך: לחלץ הלכות מחייבות מתוך פסק דין/החלטה משפטית של ערכאה עליונה (עליון / מנהלי).

@@ -101,10 +105,12 @@ HALACHA_EXTRACTION_PROMPT_BINDING = """אתה משפטן בכיר המתמחה

 הלכה אחת יכולה לחול על כמה תחומים — practice_areas הוא array ולא string יחיד.

-## סוגי הלכה (rule_type)
- binding — הלכה מחייבת שהוחלה על התיק.
- interpretive — פרשנות סעיף חוק/תכנית שאומצה.
- procedural — כלל פרוצדורלי (סמכות, מועדים, הליכי שמיעה).
+## סוג הכלל (rule_type) — מהות הכלל בלבד, לא סמכות-המקור
+**אל תסווג "מחייב/משכנע"** — דרגת-המחייבות נגזרת אוטומטית מזהות הערכאה. כאן בחר רק את **סוג הכלל**:
+- holding — עיקרון מהותי שהיה הכרחי להכרעה (ה-ratio; מבחן Wambaugh: שלילתו הייתה משנה את התוצאה).
+- interpretive — פרשנות הוראת-חוק/מונח/תכנית שאומצה.
+- procedural — כלל סדר-דין (סמכות, מועדים, זכות-עמידה, מיצוי הליכים, נטל).
+- application — החלת כלל על עובדות התיק (תלוי-עובדות; לרוב לא-הלכה בת-הכללה).
 - obiter — אמרת אגב חשובה (חלץ רק אם משמעותית; סמן confidence נמוך).

 ## פלט נדרש
@@ -112,7 +118,7 @@ HALACHA_EXTRACTION_PROMPT_BINDING = """אתה משפטן בכיר המתמחה
 [
  {
    "rule_statement": "ניסוח הכלל בלשון משפטית מדויקת בגוף שלישי, 1-3 משפטים.",
-    "rule_type": "binding",
+    "rule_type": "holding",
    "reasoning_summary": "תמצית ההיגיון: למה בית המשפט הגיע לכלל הזה (1-2 משפטים).",
    "supporting_quote": "ציטוט מילולי מדויק מהפסק התומך בכלל. חייב להופיע מילה במילה בטקסט הקלט.",
    "page_reference": "פס' 12 / עמ' 8 — ככל שניתן לזהות מהקלט.",
@@ -139,11 +145,11 @@ HALACHA_EXTRACTION_PROMPT_PERSUASIVE = """אתה משפטן בכיר המתמח

 המקור הזה **אינו** מקור להלכות מחייבות חדשות (binding rules). הלכות מחייבות מגיעות מהעליון/מנהלי. עם זאת, יש כאן ערך משמעותי שצריך לחלץ — איך הפנל הזה ניתח ויישם את הדין הקיים. כשנכתוב החלטה עתידית, נצטט מהמקור הזה כ"גם ועדת הערר ב-X הגיעה למסקנה דומה" — לא כסמכות מחייבת, אלא כתמיכה משכנעת.

-**יש לחלץ:**
+**יש לחלץ** (סווג לפי **סוג הכלל** בלבד — אל תסווג "מחייב/משכנע", דרגת-המחייבות נגזרת אוטומטית):
 - **יישום של הלכה ידועה** (rule_type=`application`) — הפנל החיל הלכה ידועה (של עליון/מנהלי) על עובדות הנידונות. תצטט את ניסוח הכלל **כפי שהוצג כאן** (לא בהכרח כפי שנקבע במקור) ואת התוצאה.
 - **עקרון פרשני שאומץ** (rule_type=`interpretive`) — איך הפנל פירש סעיף חוק / תכנית, באופן שניתן לאמץ.
 - **כלל פרוצדורלי** (rule_type=`procedural`) — קביעות בנושאי סמכות, מועדים, הליך.
- **מסקנה מנומקת ומשכנעת** (rule_type=`persuasive`) — מסקנה שלמה של הפנל בסוגיה, עם ההיגיון התומך, ניתנת לציטוט כאסמכתא משכנעת.
+- **מסקנה מהותית מנומקת** (rule_type=`holding`) — מסקנה עקרונית שלמה של הפנל בסוגיה, עם ההיגיון התומך, בת-הכללה ובת-הסתמכות.

 **אין לחלץ:**
 - ממצאים עובדתיים ספציפיים לתיק או יישום על נסיבות התיק ("העורר לא הוכיח X", "במקרה דנן", שמות צדדים, סכומים קונקרטיים) — חלץ את העיקרון/היישום בניסוח בר-הכללה בלבד.
@@ -175,7 +181,7 @@ HALACHA_EXTRACTION_PROMPT_PERSUASIVE = """אתה משפטן בכיר המתמח
 ## כללי איכות
 1. **נאמנות מוחלטת לציטוט** — supporting_quote חייב להיות הדבקה מדויקת מהקלט. אם אין ציטוט מתאים — אל תוסיף את ההלכה.
 2. **מספר הלכות** — החלטה ארוכה של ועדת ערר יכולה להניב 2-8 פריטים (יישומים + מסקנות). אל תמתח את הרשימה. אם אין מה לחלץ — החזר [].
-3. **rule_type מדויק** — application = יישום הלכה ידועה. interpretive = פרשנות. procedural = פרוצדורה. persuasive = מסקנה כללית בעלת ערך כאסמכתא.
+3. **rule_type מדויק (סוג הכלל בלבד)** — application = יישום הלכה ידועה. interpretive = פרשנות. procedural = פרוצדורה. holding = מסקנה מהותית עקרונית. **לא** binding/persuasive (סמכות נגזרת אוטומטית).
 4. **לא לפצל יתר על המידה — קריטי** — כל פריט = שאלה משפטית מובחנת אחת. פנים שונים של אותה שאלה = פריט אחד (בחר את הניסוח הכללי ביותר). אל תחזיר את אותו עיקרון בכמה ניסוחים.
 5. **שפה** — עברית משפטית מקצועית, גוף שלישי.
 6. **subject_tags** — 2-5 תגיות בעברית, snake_case.
@@ -184,10 +190,15 @@ HALACHA_EXTRACTION_PROMPT_PERSUASIVE = """אתה משפטן בכיר המתמח


 _VALID_PRACTICE_AREAS = {"rishuy_uvniya", "betterment_levy", "compensation_197"}
+# rule_type holds the rule ROLE only — what KIND of statement it is (INV-DM7).
+# The authority axis (binding/persuasive) is DERIVED from the source, never a
+# rule_type value: see halacha_quality.derive_authority.
 _VALID_RULE_TYPES = {
-    "binding", "interpretive", "procedural", "obiter",
-    "application", "persuasive",
+    "holding", "interpretive", "procedural", "application", "obiter",
 }
+# Legacy authority-as-role values → fold to the nearest genuine role. Kept so
+# old LLM outputs (and pre-split rows re-fed) coerce safely.
+_LEGACY_RULE_TYPE_FOLD = {"binding": "holding", "persuasive": "interpretive"}


 def _normalize_for_comparison(text: str) -> str:
@@ -227,13 +238,14 @@ def _verify_quote(supporting_quote: str, full_text: str) -> bool:
    return False


-def _coerce_halacha(raw: dict, is_binding: bool = True) -> dict | None:
+def _coerce_halacha(raw: dict) -> dict | None:
    """Validate and normalize one LLM-returned halacha dict.

-    Returns ``None`` if the entry is missing required fields. ``is_binding``
-    only affects the default rule_type when the LLM returned an unknown
-    value — for binding sources we default to ``binding``, otherwise to
-    ``persuasive`` (never pretend an appeals committee created halacha).
+    Returns ``None`` if the entry is missing required fields. ``rule_type`` is
+    the rule ROLE only (INV-DM7) — it is NEVER defaulted from the source's
+    bindingness (that was the source-conflation this split removed). Legacy
+    authority values fold to the nearest role; unknown defaults to
+    ``interpretive`` (the most common role).
    """
    if not isinstance(raw, dict):
        return None
@@ -242,13 +254,10 @@ def _coerce_halacha(raw: dict, is_binding: bool = True) -> dict | None:
    if not rule_statement or not supporting_quote:
        return None

-    default_rule_type = "binding" if is_binding else "persuasive"
-    rule_type = (raw.get("rule_type") or default_rule_type).strip().lower()
+    rule_type = (raw.get("rule_type") or "").strip().lower()
+    rule_type = _LEGACY_RULE_TYPE_FOLD.get(rule_type, rule_type)
    if rule_type not in _VALID_RULE_TYPES:
-        rule_type = default_rule_type
-    # Guard: don't let a non-binding source produce 'binding' rule_type
-    if not is_binding and rule_type == "binding":
-        rule_type = "persuasive"
+        rule_type = "interpretive"

    practice_areas_raw = raw.get("practice_areas") or []
    if isinstance(practice_areas_raw, str):
@@ -580,7 +589,7 @@ async def _extract_impl(case_law_id: UUID, force: bool = False,
            return
        cleaned: list[dict] = []
        for raw in items:
-            coerced = _coerce_halacha(raw, is_binding=is_binding)
+            coerced = _coerce_halacha(raw)
            if coerced is None:
                continue
            coerced["quote_verified"] = _verify_quote(
@@ -597,10 +606,10 @@ async def _extract_impl(case_law_id: UUID, force: bool = False,
            coerced["quality_flags"] = flags
            if halacha_quality.FLAG_NON_DECISION in flags and coerced["rule_type"] != "obiter":
                coerced["rule_type"] = "obiter"
-            # #81.4 — a binding-labeled rule that reads as a case-application is
+            # #81.4 — a holding-labeled rule that reads as a case-application is
            # re-typed application (it carries FLAG_APPLICATION either way).
            elif (halacha_quality.FLAG_APPLICATION in flags
-                  and coerced["rule_type"] == "binding"):
+                  and coerced["rule_type"] == "holding"):
                coerced["rule_type"] = "application"
            cleaned.append(coerced)
        # #81.3 NLI entailment — one batched judge call per chunk (fail-open).
--- a/mcp-server/src/legal_mcp/services/halacha_quality.py
+++ b/mcp-server/src/legal_mcp/services/halacha_quality.py
@@ -18,6 +18,37 @@ from __future__ import annotations

 import re

+# ── Authority axis — DERIVED from the source, never LLM-classified (INV-DM7) ──
+#
+# A halacha's *authority* (binding vs persuasive) is a property of WHERE it came
+# from, not of the rule's content. It is therefore derived deterministically
+# from ``case_law.precedent_level`` and never stored on ``halachot`` or guessed
+# by the extractor — keeping it orthogonal to ``rule_type`` (the rule ROLE).
+# Higher courts (עליון/מנהלי) bind the appeals committee; another committee is
+# only persuasive. See docs/spec/02-data-model.md INV-DM7.
+
+AUTHORITY_BINDING = "binding"
+AUTHORITY_PERSUASIVE = "persuasive"
+
+_BINDING_LEVELS = {"עליון", "מנהלי"}
+_PERSUASIVE_LEVELS = {"ועדת_ערר_מחוזית"}
+
+
+def derive_authority(precedent_level: str | None) -> str | None:
+    """Map a source's precedent_level to its authority over the committee.
+
+    Returns ``"binding"`` for higher courts (עליון/מנהלי), ``"persuasive"`` for
+    another appeals committee (ועדת_ערר_מחוזית), or ``None`` when the level is
+    unknown/empty (never guesses). Pure — the single source of truth for the
+    authority axis (INV-DM7).
+    """
+    level = (precedent_level or "").strip()
+    if level in _BINDING_LEVELS:
+        return AUTHORITY_BINDING
+    if level in _PERSUASIVE_LEVELS:
+        return AUTHORITY_PERSUASIVE
+    return None
+
 # ── Hebrew text normalization (shared with the extractor's quote check) ──

 _HEB_QUOTE_VARIANTS = "\"'׳״‘’“”«»„′″"
@@ -337,7 +368,7 @@ def compute_quality_flags(
    supporting_quote: str,
    reasoning_summary: str = "",
    quote_verified: bool = True,
-    rule_type: str = "binding",
+    rule_type: str = "interpretive",
 ) -> list[str]:
    """Return the list of quality flags for one halacha (empty == clean).