2026-06-03 16:27:13 +00:00
4 changed files with 176 additions and 3 deletions
--- a/mcp-server/src/legal_mcp/config.py
+++ b/mcp-server/src/legal_mcp/config.py
@@ -163,6 +163,15 @@ HALACHA_NLI_ENABLED = os.environ.get("HALACHA_NLI_ENABLED", "true").lower() == "
 HALACHA_NLI_MODEL = os.environ.get("HALACHA_NLI_MODEL", HALACHA_EXTRACT_MODEL)
 HALACHA_NLI_EFFORT = os.environ.get("HALACHA_NLI_EFFORT", "low")

+# Halacha over-extraction consolidation (#81.5) — after a precedent finishes
+# extracting, a claude_session pass folds facets of the SAME legal question
+# (below the #82 dedup cosine) into one canonical; the rest are marked rejected
+# (reversible). Cross-chunk safety net for over-splitting. Runs through the local
+# CLI (zero cost); fails OPEN. 'high' effort — folding needs careful judgment.
+HALACHA_CONSOLIDATE_ENABLED = os.environ.get("HALACHA_CONSOLIDATE_ENABLED", "true").lower() == "true"
+HALACHA_CONSOLIDATE_MODEL = os.environ.get("HALACHA_CONSOLIDATE_MODEL", HALACHA_EXTRACT_MODEL)
+HALACHA_CONSOLIDATE_EFFORT = os.environ.get("HALACHA_CONSOLIDATE_EFFORT", "high")
+
 # Google Cloud Vision (OCR for scanned PDFs)
 GOOGLE_CLOUD_VISION_API_KEY = os.environ.get("GOOGLE_CLOUD_VISION_API_KEY", "")

--- a/mcp-server/src/legal_mcp/services/halacha_extractor.py
+++ b/mcp-server/src/legal_mcp/services/halacha_extractor.py
@@ -305,6 +305,71 @@ async def _nli_check(items: list[dict]) -> list[str]:
    return halacha_quality.parse_nli_verdicts(raw, len(items))


+def _consolidation_priority(r: dict):
+    """Canonical = the row to KEEP within a fold group (lower sorts first)."""
+    status_rank = {"approved": 0, "published": 0, "pending_review": 1}.get(
+        r.get("review_status"), 2)
+    return (
+        status_rank,
+        -float(r.get("confidence") or 0.0),
+        0 if r.get("quote_verified") else 1,
+        -len(r.get("rule_statement") or ""),
+        str(r["id"]),
+    )
+
+
+async def _consolidate_precedent(case_law_id: UUID) -> int:
+    """#81.5 — fold facets of the SAME legal question into one canonical.
+
+    Per-precedent claude_session pass (local CLI, zero cost). Keeps the best row
+    of each fold group; marks the rest ``rejected`` (reversible — out of the
+    active corpus AND the review queue, but recoverable). FOLD-ONLY. Fails OPEN:
+    any error / parse failure → 0 folds (never touches data on doubt).
+    """
+    if not config.HALACHA_CONSOLIDATE_ENABLED:
+        return 0
+    try:
+        rows = [
+            r for r in await db.list_halachot(case_law_id=case_law_id, limit=10_000)
+            if r.get("review_status") != "rejected"
+        ]
+        if len(rows) < 2:
+            return 0
+        by_idx = {r["halacha_index"]: r for r in rows}
+        raw = await claude_session.query_json(
+            halacha_quality.build_consolidation_prompt(rows),
+            system=halacha_quality.CONSOLIDATE_SYSTEM,
+            model=config.HALACHA_CONSOLIDATE_MODEL or None,
+            effort=config.HALACHA_CONSOLIDATE_EFFORT or None,
+        )
+        groups = halacha_quality.parse_fold_groups(raw)
+        if not groups:
+            return 0
+        canonicals: set[str] = set()
+        losers: set[str] = set()
+        for g in groups:
+            members = [by_idx[i] for i in g if i in by_idx]
+            if len(members) < 2:
+                continue
+            members.sort(key=_consolidation_priority)
+            canonicals.add(str(members[0]["id"]))
+            for m in members[1:]:
+                losers.add(str(m["id"]))
+        # Never reject a row that is the canonical of any group.
+        loser_ids = [i for i in losers if i not in canonicals]
+        if not loser_ids:
+            return 0
+        return await db.update_halachot_batch(
+            loser_ids, "rejected", reviewer="auto-consolidated (#81.5 facet-fold)",
+        )
+    except Exception as e:
+        logger.warning(
+            "halacha consolidation failed for %s (fail-open, no folds): %s",
+            case_law_id, e,
+        )
+        return 0
+
+
 async def _extract_chunk(
    chunk_text: str,
    section_type: str,
@@ -585,7 +650,10 @@ async def _extract_impl(case_law_id: UUID, force: bool = False,
        return {"status": "partial", "extracted": total, "stored": stored_total,
                "pending_chunks": still_pending, "total_chunks": len(chunks)}

-    # All chunks done.
+    # All chunks done. #81.5: fold cross-chunk facets of one legal question
+    # (the prompt dedups within a chunk; this catches across chunks).
+    folded = await _consolidate_precedent(case_law_id)
+
    stored = total
    verified = sum(1 for h in await db.list_halachot(case_law_id=case_law_id, limit=10_000)
                   if h.get("quote_verified"))
@@ -593,13 +661,14 @@ async def _extract_impl(case_law_id: UUID, force: bool = False,

    logger.info(
        "halacha_extractor: case_law=%s completed — %d halachot stored "
-        "(%d new this run), %d quote-verified, %d chunks",
-        case_law_id, total, stored_total, verified, len(chunks),
+        "(%d new this run), %d quote-verified, %d folded, %d chunks",
+        case_law_id, total, stored_total, verified, folded, len(chunks),
    )
    return {
        "status": "completed",
        "extracted": total,
        "verified": verified,
+        "folded": folded,
        "stored": stored,
        "stored_this_run": stored_total,
        "total_chunks": len(chunks),
--- a/mcp-server/src/legal_mcp/services/halacha_quality.py
+++ b/mcp-server/src/legal_mcp/services/halacha_quality.py
@@ -185,6 +185,66 @@ def parse_nli_verdicts(raw, n: int) -> list[str]:
    return out


+# ── Over-extraction consolidation (fold facets of one legal question) — #81.5 ──
+#
+# #82 dedup-on-insert removes near-EXACT dups (cosine ≥ 0.93). #81.5 handles the
+# remaining over-extraction: facets of the SAME legal question, phrased
+# differently, that sit BELOW the dedup threshold (the שפר 14-vs-4 / 403-17→89
+# granularity gap). A per-precedent claude_session pass groups such facets; the
+# extractor keeps one canonical per group and marks the rest rejected (reversible,
+# out of the active corpus + review queue). FOLD-ONLY — never merges distinct
+# legal questions, never invents. Fails OPEN (parse error → no folds).
+
+CONSOLIDATE_SYSTEM = (
+    "אתה מאחד פנים-כפולים של הלכות שחולצו מאותו פסק דין. בהינתן רשימה ממוספרת של הלכות, "
+    "זהה קבוצות של הלכות שהן **אותה שאלה משפטית** בניסוחים או פנים שונים. "
+    "כללים: (1) אַחֵד רק הלכות שעונות על אותה שאלה משפטית בדיוק; (2) **אל תאַחֵד** הלכות "
+    "שעונות על שאלות משפטיות שונות (גם אם קרובות בנושא); (3) הלכה ייחודית — אל תכלול בשום קבוצה. "
+    'החזר JSON array של קבוצות, כל קבוצה = array של מספרי-האינדקס שיש לאַחֵד (לפחות 2 חברים). '
+    "לדוגמה: [[2,5,9],[14,18]]. אם אין מה לאַחֵד החזר []. ללא markdown, ללא הסבר."
+)
+
+
+def build_consolidation_prompt(items: list[dict]) -> str:
+    """Numbered list of a precedent's halachot (index + rule + reasoning)."""
+    blocks = []
+    for h in items:
+        idx = h.get("halacha_index")
+        rule = (h.get("rule_statement") or "").strip()
+        reason = (h.get("reasoning_summary") or "").strip()
+        line = f"[{idx}] {rule}"
+        if reason:
+            line += f"  (היגיון: {reason})"
+        blocks.append(line)
+    return "\n".join(blocks)
+
+
+def parse_fold_groups(raw) -> list[list[int]]:
+    """Coerce judge output into a list of fold-groups (≥2 int indices each).
+
+    Fails SAFE: any malformed shape → [] (no folding). Non-int / <2-member
+    groups are dropped.
+    """
+    if not isinstance(raw, list):
+        return []
+    groups: list[list[int]] = []
+    for g in raw:
+        if not isinstance(g, list):
+            continue
+        members: list[int] = []
+        for x in g:
+            try:
+                members.append(int(x))
+            except (TypeError, ValueError):
+                continue
+        # dedup within group, preserve order
+        seen: set[int] = set()
+        members = [m for m in members if not (m in seen or seen.add(m))]
+        if len(members) >= 2:
+            groups.append(members)
+    return groups
+
+
 def compute_quality_flags(
    rule_statement: str,
    supporting_quote: str,
--- a/mcp-server/tests/test_halacha_quality.py
+++ b/mcp-server/tests/test_halacha_quality.py
@@ -146,3 +146,38 @@ def test_nli_check_empty():
    import asyncio
    from legal_mcp.services import halacha_extractor as he
    assert asyncio.run(he._nli_check([])) == []
+
+
+# ── #81.5 consolidation — pure prompt + fold-group parser ──
+
+def test_build_consolidation_prompt():
+    items = [
+        {"halacha_index": 3, "rule_statement": "כלל גימל", "reasoning_summary": "כי"},
+        {"halacha_index": 7, "rule_statement": "כלל זין", "reasoning_summary": ""},
+    ]
+    p = hq.build_consolidation_prompt(items)
+    assert "[3] כלל גימל" in p and "[7] כלל זין" in p and "היגיון: כי" in p
+
+
+@pytest.mark.parametrize("raw,expected", [
+    ([[2, 5, 9], [14, 18]], [[2, 5, 9], [14, 18]]),
+    ([[2, 5], [7]], [[2, 5]]),                  # singleton group dropped
+    ([["2", "5"]], [[2, 5]]),                    # string ints coerced
+    ([[2, 2, 5]], [[2, 5]]),                     # dedup within group
+    ([], []),                                    # nothing to fold
+    ("garbage", []),                             # non-list -> safe
+    (None, []),                                  # None -> safe
+    ([[1, "x"], [3, 4]], [[3, 4]]),             # drop group that falls below 2 valid
+])
+def test_parse_fold_groups(raw, expected):
+    assert hq.parse_fold_groups(raw) == expected
+
+
+def test_consolidation_priority_prefers_approved_then_confidence():
+    from legal_mcp.services import halacha_extractor as he
+    approved = {"id": "a", "review_status": "approved", "confidence": 0.7,
+                "quote_verified": True, "rule_statement": "x"}
+    pending_hi = {"id": "b", "review_status": "pending_review", "confidence": 0.95,
+                  "quote_verified": True, "rule_statement": "x"}
+    # approved sorts before higher-confidence pending → kept as canonical
+    assert min([approved, pending_hi], key=he._consolidation_priority)["id"] == "a"