After a precedent finishes extracting, a claude_session pass folds facets of the SAME legal question (below #82's dedup cosine — the שפר 14-vs-4 / 403-17→89 granularity gap) into one canonical; the rest are marked 'rejected' (reversible: out of the active corpus AND the review queue, but recoverable). FOLD-ONLY — never merges distinct legal questions, never invents. - Engine: claude_session-as-judge (local CLI, zero cost), 'high' effort — folding needs careful judgment. One pass per precedent, runs in _extract_impl once all chunks are done (the prompt dedups within a chunk; this catches across chunks). - Pure, unit-tested helpers in halacha_quality: CONSOLIDATE_SYSTEM, build_consolidation_prompt, parse_fold_groups (fails SAFE → [] on any malformed shape; drops <2-member groups; coerces/dedups indices). - halacha_extractor._consolidate_precedent picks the canonical per group (approved>pending, higher confidence, quote_verified, longer) and rejects the rest via the existing update_halachot_batch (#84). Never rejects a canonical. Fails OPEN on any error (no CLI / parse fail → 0 folds, data untouched). - config: HALACHA_CONSOLIDATE_ENABLED/MODEL/EFFORT. Verified: suite 176 passed (10 new); integration vs dev DB — a 2-facet group folds to 1 canonical + 1 rejected (tagged), distinct rules untouched, claude error → 0 folds (fail-open). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
184 lines
7.4 KiB
Python
184 lines
7.4 KiB
Python
from __future__ import annotations
|
|
|
|
import pytest
|
|
|
|
from legal_mcp.services import halacha_quality as hq
|
|
|
|
|
|
# ── non-decision / obiter ──
|
|
|
|
@pytest.mark.parametrize("text", [
|
|
"איני רואה לקבוע מסמרות בשאלה זו",
|
|
"אין צורך להכריע בטענה זו",
|
|
"למעלה מן הצורך נעיר כי",
|
|
"הערה זו ניתנת אגב אורחא",
|
|
])
|
|
def test_detect_non_decision_hits(text):
|
|
assert hq.detect_non_decision(text) is not None
|
|
|
|
|
|
@pytest.mark.parametrize("text", [
|
|
"בית המשפט קבע כי ההיתר בטל",
|
|
"ועדת הערר מוסמכת לדון בטענת סטייה מתכנית",
|
|
"",
|
|
])
|
|
def test_detect_non_decision_misses(text):
|
|
assert hq.detect_non_decision(text) is None
|
|
|
|
|
|
def test_non_decision_scans_all_fields():
|
|
# marker sits in the quote, not the abstracted rule
|
|
assert hq.detect_non_decision("כלל כללי", "", "וכאן אין צורך להכריע") is not None
|
|
|
|
|
|
# ── truncated quote ──
|
|
|
|
def test_truncated_dangling_letter():
|
|
assert hq.is_quote_truncated("ראוי כי תהיה השפעה על ה") is True
|
|
|
|
|
|
def test_truncated_empty():
|
|
assert hq.is_quote_truncated(" ") is True
|
|
|
|
|
|
@pytest.mark.parametrize("quote", [
|
|
"ועדת הערר היא הגוף המקצועי האמון על בחינת ההיבטים התכנוניים.",
|
|
"אין לועדה סמכות לסטות מתקנות התכנון והבניה", # no period, but full word
|
|
"ההיתר תואם את התכנית החלה על האיזור",
|
|
])
|
|
def test_not_truncated_complete_clauses(quote):
|
|
assert hq.is_quote_truncated(quote) is False
|
|
|
|
|
|
# ── thin restatement ──
|
|
|
|
def test_thin_restatement_near_copy():
|
|
quote = "ביטול היתר מחייב טעמים כבדי משקל של אינטרס ציבורי"
|
|
rule = "ביטול היתר מחייב טעמים כבדי משקל של אינטרס ציבורי"
|
|
assert hq.is_thin_restatement(rule, quote) is True
|
|
|
|
|
|
def test_not_thin_when_abstracted():
|
|
quote = "אין חולק כי אין לועדה סמכות לסטות מתקנות"
|
|
rule = ("ועדה מקומית לתכנון ובניה אינה מוסמכת לסטות מהוראות תקנות התכנון "
|
|
"והבניה, ובכלל זה מהוראות התוספת השנייה, ואין בידה ליתן היתר הסוטה מהן.")
|
|
assert hq.is_thin_restatement(rule, quote) is False
|
|
|
|
|
|
def test_thin_handles_empty():
|
|
assert hq.is_thin_restatement("", "something") is False
|
|
|
|
|
|
# ── aggregate flags + auto-approve gate semantics ──
|
|
|
|
def test_clean_halacha_no_flags():
|
|
rule = ("ועדת הערר מוסמכת לדון בערר על החלטה ליתן היתר בנייה גם כאשר נטען "
|
|
"כי ההיתר סוטה מתכנית, בהתאם למגמת תיקון 43 לחוק.")
|
|
quote = ("פרשנות מרחיבה המאפשרת הגשת ערר גם במקרה של מתן היתר כאשר נטען כי "
|
|
"ההיתר סוטה מתכנית הולמת את מגמת המחוקק בתיקון 43.")
|
|
assert hq.compute_quality_flags(rule, quote, "", quote_verified=True) == []
|
|
|
|
|
|
def test_flags_accumulate():
|
|
flags = hq.compute_quality_flags(
|
|
"כלל אגב אורחא על ה", "כלל אגב אורחא על ה",
|
|
quote_verified=False,
|
|
)
|
|
assert hq.FLAG_NON_DECISION in flags
|
|
assert hq.FLAG_TRUNCATED_QUOTE in flags
|
|
assert hq.FLAG_QUOTE_UNVERIFIED in flags
|
|
|
|
|
|
def test_normalize_text_quote_variants():
|
|
assert hq.normalize_text('עע"מ 317/10') == hq.normalize_text("עע״מ 317/10")
|
|
|
|
|
|
# ── #81.3 NLI entailment — pure prompt + parser ──
|
|
|
|
def test_build_nli_prompt_contains_pairs():
|
|
items = [
|
|
{"rule_statement": "כלל אלף", "supporting_quote": "ציטוט אלף"},
|
|
{"rule_statement": "כלל בית", "supporting_quote": "ציטוט בית"},
|
|
]
|
|
p = hq.build_nli_prompt(items)
|
|
assert "כלל אלף" in p and "ציטוט בית" in p
|
|
assert "זוג 1" in p and "זוג 2" in p
|
|
|
|
|
|
@pytest.mark.parametrize("raw,n,expected", [
|
|
(["entailed", "neutral"], 2, ["entailed", "neutral"]),
|
|
(["ENTAILED", "Contradiction"], 2, ["entailed", "contradiction"]), # case-insensitive
|
|
([{"verdict": "neutral"}, {"verdict": "entailed"}], 2, ["neutral", "entailed"]), # dict shape
|
|
(["entailed"], 2, ["entailed", "entailed"]), # length mismatch -> fail-open
|
|
(None, 2, ["entailed", "entailed"]), # non-list -> fail-open
|
|
(["bananas", "neutral"], 2, ["entailed", "neutral"]), # unknown label -> entailed
|
|
])
|
|
def test_parse_nli_verdicts(raw, n, expected):
|
|
assert hq.parse_nli_verdicts(raw, n) == expected
|
|
|
|
|
|
# ── _nli_check (async, via claude_session) — fail-open + verdict mapping ──
|
|
|
|
def test_nli_check_fail_open(monkeypatch):
|
|
import asyncio
|
|
from legal_mcp.services import halacha_extractor as he
|
|
|
|
async def boom(*a, **k):
|
|
raise RuntimeError("no claude CLI here")
|
|
monkeypatch.setattr(he.claude_session, "query_json", boom)
|
|
items = [{"rule_statement": "a", "supporting_quote": "b"}]
|
|
assert asyncio.run(he._nli_check(items)) == ["entailed"] # never blocks
|
|
|
|
|
|
def test_nli_check_maps_verdicts(monkeypatch):
|
|
import asyncio
|
|
from legal_mcp.services import halacha_extractor as he
|
|
|
|
async def fake(*a, **k):
|
|
return ["entailed", "neutral"]
|
|
monkeypatch.setattr(he.claude_session, "query_json", fake)
|
|
items = [{"rule_statement": "a", "supporting_quote": "b"},
|
|
{"rule_statement": "c", "supporting_quote": "d"}]
|
|
assert asyncio.run(he._nli_check(items)) == ["entailed", "neutral"]
|
|
|
|
|
|
def test_nli_check_empty():
|
|
import asyncio
|
|
from legal_mcp.services import halacha_extractor as he
|
|
assert asyncio.run(he._nli_check([])) == []
|
|
|
|
|
|
# ── #81.5 consolidation — pure prompt + fold-group parser ──
|
|
|
|
def test_build_consolidation_prompt():
|
|
items = [
|
|
{"halacha_index": 3, "rule_statement": "כלל גימל", "reasoning_summary": "כי"},
|
|
{"halacha_index": 7, "rule_statement": "כלל זין", "reasoning_summary": ""},
|
|
]
|
|
p = hq.build_consolidation_prompt(items)
|
|
assert "[3] כלל גימל" in p and "[7] כלל זין" in p and "היגיון: כי" in p
|
|
|
|
|
|
@pytest.mark.parametrize("raw,expected", [
|
|
([[2, 5, 9], [14, 18]], [[2, 5, 9], [14, 18]]),
|
|
([[2, 5], [7]], [[2, 5]]), # singleton group dropped
|
|
([["2", "5"]], [[2, 5]]), # string ints coerced
|
|
([[2, 2, 5]], [[2, 5]]), # dedup within group
|
|
([], []), # nothing to fold
|
|
("garbage", []), # non-list -> safe
|
|
(None, []), # None -> safe
|
|
([[1, "x"], [3, 4]], [[3, 4]]), # drop group that falls below 2 valid
|
|
])
|
|
def test_parse_fold_groups(raw, expected):
|
|
assert hq.parse_fold_groups(raw) == expected
|
|
|
|
|
|
def test_consolidation_priority_prefers_approved_then_confidence():
|
|
from legal_mcp.services import halacha_extractor as he
|
|
approved = {"id": "a", "review_status": "approved", "confidence": 0.7,
|
|
"quote_verified": True, "rule_statement": "x"}
|
|
pending_hi = {"id": "b", "review_status": "pending_review", "confidence": 0.95,
|
|
"quote_verified": True, "rule_statement": "x"}
|
|
# approved sorts before higher-confidence pending → kept as canonical
|
|
assert min([approved, pending_hi], key=he._consolidation_priority)["id"] == "a"
|