feat(halacha): NLI entailment validator via claude_session (#81.3) + task #86
#81.3 — a post-extraction validator that flags halachot whose rule_statement is NOT entailed by its supporting_quote (the model over-reaching beyond its source). - Engine: claude_session-as-judge (local CLI, zero API cost) per chaim's standing preference — one batched judge call per chunk, NOT a hosted NLI model. - Pure, unit-tested helpers in halacha_quality: NLI_SYSTEM, build_nli_prompt, parse_nli_verdicts (fails OPEN — any shape/label ambiguity → 'entailed'). - halacha_extractor._nli_check wraps the call; fails OPEN on any error (e.g. no CLI in the container) so a flaky judge never blocks a genuine halacha. - Non-entailed (neutral/contradiction) → quality_flag 'nli_unsupported' which blocks auto-approve (routes to pending_review) via the existing store gate. - config: HALACHA_NLI_ENABLED/MODEL/EFFORT (effort 'low' — entailment is simple). Verified: suite 166 passed (10 new); LIVE smoke test against the real claude CLI returned ['entailed','neutral'] for a supported vs unsupported rule. Also commits TaskMaster #86 (Nevo preamble/ratio: anti-contamination strip fix + gold-set benchmark) capturing today's strip_nevo_preamble findings. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -154,6 +154,15 @@ HALACHA_AUTO_APPROVE_THRESHOLD = float(
|
||||
# principle. Set > 1.0 to disable semantic dedup (exact-quote dedup still runs).
|
||||
HALACHA_DEDUP_COSINE = float(os.environ.get("HALACHA_DEDUP_COSINE", "0.93"))
|
||||
|
||||
# Halacha NLI entailment validator (#81.3) — after extraction, a claude_session
|
||||
# judge checks each halacha's rule_statement is entailed by its supporting_quote.
|
||||
# Non-entailed (neutral/contradiction) → quality flag 'nli_unsupported' that
|
||||
# blocks auto-approve. Runs through the local CLI (zero cost); fails OPEN if the
|
||||
# CLI is unavailable (e.g. container). 'low' effort — entailment is a simple call.
|
||||
HALACHA_NLI_ENABLED = os.environ.get("HALACHA_NLI_ENABLED", "true").lower() == "true"
|
||||
HALACHA_NLI_MODEL = os.environ.get("HALACHA_NLI_MODEL", HALACHA_EXTRACT_MODEL)
|
||||
HALACHA_NLI_EFFORT = os.environ.get("HALACHA_NLI_EFFORT", "low")
|
||||
|
||||
# Google Cloud Vision (OCR for scanned PDFs)
|
||||
GOOGLE_CLOUD_VISION_API_KEY = os.environ.get("GOOGLE_CLOUD_VISION_API_KEY", "")
|
||||
|
||||
|
||||
@@ -284,6 +284,27 @@ def _coerce_halacha(raw: dict, is_binding: bool = True) -> dict | None:
|
||||
}
|
||||
|
||||
|
||||
async def _nli_check(items: list[dict]) -> list[str]:
|
||||
"""Entailment verdict per item (rule ⊨ quote) via claude_session — #81.3.
|
||||
|
||||
Local CLI, zero cost. FAILS OPEN: any error returns all-'entailed' so a
|
||||
flaky/unavailable judge (e.g. in the container) never blocks a halacha.
|
||||
"""
|
||||
if not items:
|
||||
return []
|
||||
try:
|
||||
raw = await claude_session.query_json(
|
||||
halacha_quality.build_nli_prompt(items),
|
||||
system=halacha_quality.NLI_SYSTEM,
|
||||
model=config.HALACHA_NLI_MODEL or None,
|
||||
effort=config.HALACHA_NLI_EFFORT or None,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("halacha NLI check failed (fail-open, no flags): %s", e)
|
||||
return ["entailed"] * len(items)
|
||||
return halacha_quality.parse_nli_verdicts(raw, len(items))
|
||||
|
||||
|
||||
async def _extract_chunk(
|
||||
chunk_text: str,
|
||||
section_type: str,
|
||||
@@ -511,6 +532,12 @@ async def _extract_impl(case_law_id: UUID, force: bool = False,
|
||||
if halacha_quality.FLAG_NON_DECISION in flags and coerced["rule_type"] != "obiter":
|
||||
coerced["rule_type"] = "obiter"
|
||||
cleaned.append(coerced)
|
||||
# #81.3 NLI entailment — one batched judge call per chunk (fail-open).
|
||||
if config.HALACHA_NLI_ENABLED and cleaned:
|
||||
verdicts = await _nli_check(cleaned)
|
||||
for h, v in zip(cleaned, verdicts):
|
||||
if v != "entailed" and halacha_quality.FLAG_NLI_UNSUPPORTED not in h["quality_flags"]:
|
||||
h["quality_flags"].append(halacha_quality.FLAG_NLI_UNSUPPORTED)
|
||||
if cleaned:
|
||||
embed_inputs = [
|
||||
f"{h['rule_statement']} — {h['reasoning_summary']}".strip(" —")
|
||||
|
||||
@@ -134,6 +134,55 @@ FLAG_NON_DECISION = "non_decision"
|
||||
FLAG_TRUNCATED_QUOTE = "truncated_quote"
|
||||
FLAG_THIN_RESTATEMENT = "thin_restatement"
|
||||
FLAG_QUOTE_UNVERIFIED = "quote_unverified"
|
||||
FLAG_NLI_UNSUPPORTED = "nli_unsupported" # rule not entailed by its quote (#81.3)
|
||||
|
||||
|
||||
# ── NLI entailment check (rule_statement ⊨ supporting_quote) — #81.3 ──
|
||||
#
|
||||
# Pure prompt-builder + verdict-parser; the LLM call itself runs through
|
||||
# claude_session in halacha_extractor (local CLI, zero cost). A rule that the
|
||||
# quote does not actually support (neutral) or contradicts is the model
|
||||
# over-reaching beyond its source — flag it (blocks auto-approve). EVERYTHING
|
||||
# here fails OPEN: any parse ambiguity resolves to "entailed" so a flaky judge
|
||||
# never blocks a genuine halacha.
|
||||
|
||||
NLI_SYSTEM = (
|
||||
"אתה בודק היסק (entailment) משפטי. לכל זוג {כלל, ציטוט} החלט האם **הכלל נובע מהציטוט** — "
|
||||
"כלומר הציטוט תומך בכלל ואינו מרחיב מעבר למה שנכתב בו. שלוש תוויות בלבד:\n"
|
||||
"- entailed = הכלל נתמך במלואו בציטוט.\n"
|
||||
"- neutral = הציטוט אינו תומך בכלל (הכלל מרחיב/מוסיף מעבר לציטוט).\n"
|
||||
"- contradiction = הכלל סותר את הציטוט.\n"
|
||||
'החזר JSON array בלבד באורך מספר הזוגות, לדוגמה: ["entailed","neutral",...]. '
|
||||
"ללא markdown, ללא הסבר."
|
||||
)
|
||||
|
||||
_NLI_LABELS = {"entailed", "neutral", "contradiction"}
|
||||
|
||||
|
||||
def build_nli_prompt(items: list[dict]) -> str:
|
||||
"""Build the user message: a numbered list of {rule, quote} pairs."""
|
||||
blocks = []
|
||||
for i, h in enumerate(items, 1):
|
||||
rule = (h.get("rule_statement") or "").strip()
|
||||
quote = (h.get("supporting_quote") or "").strip()
|
||||
blocks.append(f"### זוג {i}\nכלל: {rule}\nציטוט: {quote}")
|
||||
return "\n\n".join(blocks)
|
||||
|
||||
|
||||
def parse_nli_verdicts(raw, n: int) -> list[str]:
|
||||
"""Coerce the judge's output into exactly ``n`` labels — fail-open.
|
||||
|
||||
Any shape mismatch / unknown label resolves to 'entailed' so a flaky or
|
||||
unavailable judge never blocks a halacha.
|
||||
"""
|
||||
if not isinstance(raw, list) or len(raw) != n:
|
||||
return ["entailed"] * n
|
||||
out: list[str] = []
|
||||
for item in raw:
|
||||
v = item.get("verdict") if isinstance(item, dict) else item
|
||||
v = str(v or "").strip().lower()
|
||||
out.append(v if v in _NLI_LABELS else "entailed")
|
||||
return out
|
||||
|
||||
|
||||
def compute_quality_flags(
|
||||
|
||||
Reference in New Issue
Block a user