feat(halacha): over-extraction consolidation — fold facets via claude_session (#81.5)

After a precedent finishes extracting, a claude_session pass folds facets of the SAME legal question (below #82's dedup cosine — the שפר 14-vs-4 / 403-17→89 granularity gap) into one canonical; the rest are marked 'rejected' (reversible: out of the active corpus AND the review queue, but recoverable). FOLD-ONLY — never merges distinct legal questions, never invents. - Engine: claude_session-as-judge (local CLI, zero cost), 'high' effort — folding needs careful judgment. One pass per precedent, runs in _extract_impl once all chunks are done (the prompt dedups within a chunk; this catches across chunks). - Pure, unit-tested helpers in halacha_quality: CONSOLIDATE_SYSTEM, build_consolidation_prompt, parse_fold_groups (fails SAFE → [] on any malformed shape; drops <2-member groups; coerces/dedups indices). - halacha_extractor._consolidate_precedent picks the canonical per group (approved>pending, higher confidence, quote_verified, longer) and rejects the rest via the existing update_halachot_batch (#84). Never rejects a canonical. Fails OPEN on any error (no CLI / parse fail → 0 folds, data untouched). - config: HALACHA_CONSOLIDATE_ENABLED/MODEL/EFFORT. Verified: suite 176 passed (10 new); integration vs dev DB — a 2-facet group folds to 1 canonical + 1 rejected (tagged), distinct rules untouched, claude error → 0 folds (fail-open). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-03 16:26:44 +00:00
parent 5efb8cf915
commit fb60dca796
4 changed files with 176 additions and 3 deletions
--- a/mcp-server/src/legal_mcp/services/halacha_extractor.py
+++ b/mcp-server/src/legal_mcp/services/halacha_extractor.py
@@ -305,6 +305,71 @@ async def _nli_check(items: list[dict]) -> list[str]:
    return halacha_quality.parse_nli_verdicts(raw, len(items))


+def _consolidation_priority(r: dict):
+    """Canonical = the row to KEEP within a fold group (lower sorts first)."""
+    status_rank = {"approved": 0, "published": 0, "pending_review": 1}.get(
+        r.get("review_status"), 2)
+    return (
+        status_rank,
+        -float(r.get("confidence") or 0.0),
+        0 if r.get("quote_verified") else 1,
+        -len(r.get("rule_statement") or ""),
+        str(r["id"]),
+    )
+
+
+async def _consolidate_precedent(case_law_id: UUID) -> int:
+    """#81.5 — fold facets of the SAME legal question into one canonical.
+
+    Per-precedent claude_session pass (local CLI, zero cost). Keeps the best row
+    of each fold group; marks the rest ``rejected`` (reversible — out of the
+    active corpus AND the review queue, but recoverable). FOLD-ONLY. Fails OPEN:
+    any error / parse failure → 0 folds (never touches data on doubt).
+    """
+    if not config.HALACHA_CONSOLIDATE_ENABLED:
+        return 0
+    try:
+        rows = [
+            r for r in await db.list_halachot(case_law_id=case_law_id, limit=10_000)
+            if r.get("review_status") != "rejected"
+        ]
+        if len(rows) < 2:
+            return 0
+        by_idx = {r["halacha_index"]: r for r in rows}
+        raw = await claude_session.query_json(
+            halacha_quality.build_consolidation_prompt(rows),
+            system=halacha_quality.CONSOLIDATE_SYSTEM,
+            model=config.HALACHA_CONSOLIDATE_MODEL or None,
+            effort=config.HALACHA_CONSOLIDATE_EFFORT or None,
+        )
+        groups = halacha_quality.parse_fold_groups(raw)
+        if not groups:
+            return 0
+        canonicals: set[str] = set()
+        losers: set[str] = set()
+        for g in groups:
+            members = [by_idx[i] for i in g if i in by_idx]
+            if len(members) < 2:
+                continue
+            members.sort(key=_consolidation_priority)
+            canonicals.add(str(members[0]["id"]))
+            for m in members[1:]:
+                losers.add(str(m["id"]))
+        # Never reject a row that is the canonical of any group.
+        loser_ids = [i for i in losers if i not in canonicals]
+        if not loser_ids:
+            return 0
+        return await db.update_halachot_batch(
+            loser_ids, "rejected", reviewer="auto-consolidated (#81.5 facet-fold)",
+        )
+    except Exception as e:
+        logger.warning(
+            "halacha consolidation failed for %s (fail-open, no folds): %s",
+            case_law_id, e,
+        )
+        return 0
+
+
 async def _extract_chunk(
    chunk_text: str,
    section_type: str,
@@ -585,7 +650,10 @@ async def _extract_impl(case_law_id: UUID, force: bool = False,
        return {"status": "partial", "extracted": total, "stored": stored_total,
                "pending_chunks": still_pending, "total_chunks": len(chunks)}

-    # All chunks done.
+    # All chunks done. #81.5: fold cross-chunk facets of one legal question
+    # (the prompt dedups within a chunk; this catches across chunks).
+    folded = await _consolidate_precedent(case_law_id)
+
    stored = total
    verified = sum(1 for h in await db.list_halachot(case_law_id=case_law_id, limit=10_000)
                   if h.get("quote_verified"))
@@ -593,13 +661,14 @@ async def _extract_impl(case_law_id: UUID, force: bool = False,

    logger.info(
        "halacha_extractor: case_law=%s completed — %d halachot stored "
-        "(%d new this run), %d quote-verified, %d chunks",
-        case_law_id, total, stored_total, verified, len(chunks),
+        "(%d new this run), %d quote-verified, %d folded, %d chunks",
+        case_law_id, total, stored_total, verified, folded, len(chunks),
    )
    return {
        "status": "completed",
        "extracted": total,
        "verified": verified,
+        "folded": folded,
        "stored": stored,
        "stored_this_run": stored_total,
        "total_chunks": len(chunks),