feat(halacha): NLI entailment validator via claude_session (#81.3) + task #86

#81.3 — a post-extraction validator that flags halachot whose rule_statement is NOT entailed by its supporting_quote (the model over-reaching beyond its source). - Engine: claude_session-as-judge (local CLI, zero API cost) per chaim's standing preference — one batched judge call per chunk, NOT a hosted NLI model. - Pure, unit-tested helpers in halacha_quality: NLI_SYSTEM, build_nli_prompt, parse_nli_verdicts (fails OPEN — any shape/label ambiguity → 'entailed'). - halacha_extractor._nli_check wraps the call; fails OPEN on any error (e.g. no CLI in the container) so a flaky judge never blocks a genuine halacha. - Non-entailed (neutral/contradiction) → quality_flag 'nli_unsupported' which blocks auto-approve (routes to pending_review) via the existing store gate. - config: HALACHA_NLI_ENABLED/MODEL/EFFORT (effort 'low' — entailment is simple). Verified: suite 166 passed (10 new); LIVE smoke test against the real claude CLI returned ['entailed','neutral'] for a supported vs unsupported rule. Also commits TaskMaster #86 (Nevo preamble/ratio: anti-contamination strip fix + gold-set benchmark) capturing today's strip_nevo_preamble findings. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-03 14:46:12 +00:00
parent e25507f9ad
commit f196bed564
5 changed files with 226 additions and 28 deletions
--- a/mcp-server/src/legal_mcp/services/halacha_extractor.py
+++ b/mcp-server/src/legal_mcp/services/halacha_extractor.py
@@ -284,6 +284,27 @@ def _coerce_halacha(raw: dict, is_binding: bool = True) -> dict | None:
    }


+async def _nli_check(items: list[dict]) -> list[str]:
+    """Entailment verdict per item (rule ⊨ quote) via claude_session — #81.3.
+
+    Local CLI, zero cost. FAILS OPEN: any error returns all-'entailed' so a
+    flaky/unavailable judge (e.g. in the container) never blocks a halacha.
+    """
+    if not items:
+        return []
+    try:
+        raw = await claude_session.query_json(
+            halacha_quality.build_nli_prompt(items),
+            system=halacha_quality.NLI_SYSTEM,
+            model=config.HALACHA_NLI_MODEL or None,
+            effort=config.HALACHA_NLI_EFFORT or None,
+        )
+    except Exception as e:
+        logger.warning("halacha NLI check failed (fail-open, no flags): %s", e)
+        return ["entailed"] * len(items)
+    return halacha_quality.parse_nli_verdicts(raw, len(items))
+
+
 async def _extract_chunk(
    chunk_text: str,
    section_type: str,
@@ -511,6 +532,12 @@ async def _extract_impl(case_law_id: UUID, force: bool = False,
            if halacha_quality.FLAG_NON_DECISION in flags and coerced["rule_type"] != "obiter":
                coerced["rule_type"] = "obiter"
            cleaned.append(coerced)
+        # #81.3 NLI entailment — one batched judge call per chunk (fail-open).
+        if config.HALACHA_NLI_ENABLED and cleaned:
+            verdicts = await _nli_check(cleaned)
+            for h, v in zip(cleaned, verdicts):
+                if v != "entailed" and halacha_quality.FLAG_NLI_UNSUPPORTED not in h["quality_flags"]:
+                    h["quality_flags"].append(halacha_quality.FLAG_NLI_UNSUPPORTED)
        if cleaned:
            embed_inputs = [
                f"{h['rule_statement']} — {h['reasoning_summary']}".strip(" —")