The extractor classified rule_type by SOURCE bindingness (higher-court→binding, committee→persuasive) instead of by rule KIND. The gold-set proved it: 'binding' appeared on 19/19 external rulings & 0 committees; 'persuasive' on 13/13 committees & 0 external — only 58% agreement with the human role tags. The two axes (authority vs rule role) were crammed into one enum. This splits them per INV-DM7: - authority (binding/persuasive) — DERIVED from case_law.precedent_level (עליון/מנהלי→binding, ועדת_ערר_מחוזית→persuasive), never stored, never LLM-guessed. New helper halacha_quality.derive_authority; surfaced read-only in list_halachot / goldset_list / search results. - rule_type — now the rule ROLE only: holding/interpretive/procedural/ application/obiter. Both extractor prompts unified to this vocabulary; _coerce_halacha no longer defaults rule_type from the source; legacy binding→holding / persuasive→interpretive fold for safety. UI: authority shown as a separate read-only badge (gold=מחייב / muted=משכנע) across the review queue, precedent detail, and gold-set; the gold-set role selector drops binding/persuasive and adds מהותי (holding). Migration: scripts/halacha_rule_role_backfill.py re-classifies the 276 pre-split binding/persuasive rows into a genuine role via local claude_session (run after deploy). Gold-set correct_type/ai_correct_type 'binding'→'holding' via SQL. Sources (≥3, per research-decision policy): OASIS LegalRuleML v1.0 (appliesAuthority/Strength as metadata orthogonal to rule logic) · SemEval-2023 Task 6 LegalEval (rhetorical roles by function, authority kept separate) · Bluebook signals (weight-of-authority is a separate dimension). Invariants: ESTABLISHES INV-DM7. Upholds G1 (normalize at source — extractor classifies role, system derives authority) and G2 (single source of truth — authority derived, not a parallel stored field). Tests: 211 pass + new derive_authority/coerce coverage. web-ui build + tsc clean. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
273 lines
11 KiB
Python
273 lines
11 KiB
Python
from __future__ import annotations
|
||
|
||
import pytest
|
||
|
||
from legal_mcp.services import halacha_quality as hq
|
||
|
||
|
||
# ── non-decision / obiter ──
|
||
|
||
@pytest.mark.parametrize("text", [
|
||
"איני רואה לקבוע מסמרות בשאלה זו",
|
||
"אין צורך להכריע בטענה זו",
|
||
"למעלה מן הצורך נעיר כי",
|
||
"הערה זו ניתנת אגב אורחא",
|
||
])
|
||
def test_detect_non_decision_hits(text):
|
||
assert hq.detect_non_decision(text) is not None
|
||
|
||
|
||
@pytest.mark.parametrize("text", [
|
||
"בית המשפט קבע כי ההיתר בטל",
|
||
"ועדת הערר מוסמכת לדון בטענת סטייה מתכנית",
|
||
"",
|
||
])
|
||
def test_detect_non_decision_misses(text):
|
||
assert hq.detect_non_decision(text) is None
|
||
|
||
|
||
def test_non_decision_scans_all_fields():
|
||
# marker sits in the quote, not the abstracted rule
|
||
assert hq.detect_non_decision("כלל כללי", "", "וכאן אין צורך להכריע") is not None
|
||
|
||
|
||
# ── truncated quote ──
|
||
|
||
def test_truncated_dangling_letter():
|
||
assert hq.is_quote_truncated("ראוי כי תהיה השפעה על ה") is True
|
||
|
||
|
||
def test_truncated_empty():
|
||
assert hq.is_quote_truncated(" ") is True
|
||
|
||
|
||
@pytest.mark.parametrize("quote", [
|
||
"ועדת הערר היא הגוף המקצועי האמון על בחינת ההיבטים התכנוניים.",
|
||
"אין לועדה סמכות לסטות מתקנות התכנון והבניה", # no period, but full word
|
||
"ההיתר תואם את התכנית החלה על האיזור",
|
||
])
|
||
def test_not_truncated_complete_clauses(quote):
|
||
assert hq.is_quote_truncated(quote) is False
|
||
|
||
|
||
# ── thin restatement ──
|
||
|
||
def test_thin_restatement_near_copy():
|
||
quote = "ביטול היתר מחייב טעמים כבדי משקל של אינטרס ציבורי"
|
||
rule = "ביטול היתר מחייב טעמים כבדי משקל של אינטרס ציבורי"
|
||
assert hq.is_thin_restatement(rule, quote) is True
|
||
|
||
|
||
def test_not_thin_when_abstracted():
|
||
quote = "אין חולק כי אין לועדה סמכות לסטות מתקנות"
|
||
rule = ("ועדה מקומית לתכנון ובניה אינה מוסמכת לסטות מהוראות תקנות התכנון "
|
||
"והבניה, ובכלל זה מהוראות התוספת השנייה, ואין בידה ליתן היתר הסוטה מהן.")
|
||
assert hq.is_thin_restatement(rule, quote) is False
|
||
|
||
|
||
def test_thin_handles_empty():
|
||
assert hq.is_thin_restatement("", "something") is False
|
||
|
||
|
||
# ── aggregate flags + auto-approve gate semantics ──
|
||
|
||
def test_clean_halacha_no_flags():
|
||
rule = ("ועדת הערר מוסמכת לדון בערר על החלטה ליתן היתר בנייה גם כאשר נטען "
|
||
"כי ההיתר סוטה מתכנית, בהתאם למגמת תיקון 43 לחוק.")
|
||
quote = ("פרשנות מרחיבה המאפשרת הגשת ערר גם במקרה של מתן היתר כאשר נטען כי "
|
||
"ההיתר סוטה מתכנית הולמת את מגמת המחוקק בתיקון 43.")
|
||
assert hq.compute_quality_flags(rule, quote, "", quote_verified=True) == []
|
||
|
||
|
||
def test_flags_accumulate():
|
||
flags = hq.compute_quality_flags(
|
||
"כלל אגב אורחא על ה", "כלל אגב אורחא על ה",
|
||
quote_verified=False,
|
||
)
|
||
assert hq.FLAG_NON_DECISION in flags
|
||
assert hq.FLAG_TRUNCATED_QUOTE in flags
|
||
assert hq.FLAG_QUOTE_UNVERIFIED in flags
|
||
|
||
|
||
def test_normalize_text_quote_variants():
|
||
assert hq.normalize_text('עע"מ 317/10') == hq.normalize_text("עע״מ 317/10")
|
||
|
||
|
||
# ── #81.3 NLI entailment — pure prompt + parser ──
|
||
|
||
def test_build_nli_prompt_contains_pairs():
|
||
items = [
|
||
{"rule_statement": "כלל אלף", "supporting_quote": "ציטוט אלף"},
|
||
{"rule_statement": "כלל בית", "supporting_quote": "ציטוט בית"},
|
||
]
|
||
p = hq.build_nli_prompt(items)
|
||
assert "כלל אלף" in p and "ציטוט בית" in p
|
||
assert "זוג 1" in p and "זוג 2" in p
|
||
|
||
|
||
@pytest.mark.parametrize("raw,n,expected", [
|
||
(["entailed", "neutral"], 2, ["entailed", "neutral"]),
|
||
(["ENTAILED", "Contradiction"], 2, ["entailed", "contradiction"]), # case-insensitive
|
||
([{"verdict": "neutral"}, {"verdict": "entailed"}], 2, ["neutral", "entailed"]), # dict shape
|
||
(["entailed"], 2, ["entailed", "entailed"]), # length mismatch -> fail-open
|
||
(None, 2, ["entailed", "entailed"]), # non-list -> fail-open
|
||
(["bananas", "neutral"], 2, ["entailed", "neutral"]), # unknown label -> entailed
|
||
])
|
||
def test_parse_nli_verdicts(raw, n, expected):
|
||
assert hq.parse_nli_verdicts(raw, n) == expected
|
||
|
||
|
||
# ── _nli_check (async, via claude_session) — fail-open + verdict mapping ──
|
||
|
||
def test_nli_check_fail_open(monkeypatch):
|
||
import asyncio
|
||
from legal_mcp.services import halacha_extractor as he
|
||
|
||
async def boom(*a, **k):
|
||
raise RuntimeError("no claude CLI here")
|
||
monkeypatch.setattr(he.claude_session, "query_json", boom)
|
||
items = [{"rule_statement": "a", "supporting_quote": "b"}]
|
||
assert asyncio.run(he._nli_check(items)) == ["entailed"] # never blocks
|
||
|
||
|
||
def test_nli_check_maps_verdicts(monkeypatch):
|
||
import asyncio
|
||
from legal_mcp.services import halacha_extractor as he
|
||
|
||
async def fake(*a, **k):
|
||
return ["entailed", "neutral"]
|
||
monkeypatch.setattr(he.claude_session, "query_json", fake)
|
||
items = [{"rule_statement": "a", "supporting_quote": "b"},
|
||
{"rule_statement": "c", "supporting_quote": "d"}]
|
||
assert asyncio.run(he._nli_check(items)) == ["entailed", "neutral"]
|
||
|
||
|
||
def test_nli_check_empty():
|
||
import asyncio
|
||
from legal_mcp.services import halacha_extractor as he
|
||
assert asyncio.run(he._nli_check([])) == []
|
||
|
||
|
||
# ── #81.5 consolidation — pure prompt + fold-group parser ──
|
||
|
||
def test_build_consolidation_prompt():
|
||
items = [
|
||
{"halacha_index": 3, "rule_statement": "כלל גימל", "reasoning_summary": "כי"},
|
||
{"halacha_index": 7, "rule_statement": "כלל זין", "reasoning_summary": ""},
|
||
]
|
||
p = hq.build_consolidation_prompt(items)
|
||
assert "[3] כלל גימל" in p and "[7] כלל זין" in p and "היגיון: כי" in p
|
||
|
||
|
||
@pytest.mark.parametrize("raw,expected", [
|
||
([[2, 5, 9], [14, 18]], [[2, 5, 9], [14, 18]]),
|
||
([[2, 5], [7]], [[2, 5]]), # singleton group dropped
|
||
([["2", "5"]], [[2, 5]]), # string ints coerced
|
||
([[2, 2, 5]], [[2, 5]]), # dedup within group
|
||
([], []), # nothing to fold
|
||
("garbage", []), # non-list -> safe
|
||
(None, []), # None -> safe
|
||
([[1, "x"], [3, 4]], [[3, 4]]), # drop group that falls below 2 valid
|
||
])
|
||
def test_parse_fold_groups(raw, expected):
|
||
assert hq.parse_fold_groups(raw) == expected
|
||
|
||
|
||
def test_consolidation_priority_prefers_approved_then_confidence():
|
||
from legal_mcp.services import halacha_extractor as he
|
||
approved = {"id": "a", "review_status": "approved", "confidence": 0.7,
|
||
"quote_verified": True, "rule_statement": "x"}
|
||
pending_hi = {"id": "b", "review_status": "pending_review", "confidence": 0.95,
|
||
"quote_verified": True, "rule_statement": "x"}
|
||
# approved sorts before higher-confidence pending → kept as canonical
|
||
assert min([approved, pending_hi], key=he._consolidation_priority)["id"] == "a"
|
||
|
||
|
||
# ── #81.4 fact-dependent / application ──
|
||
|
||
@pytest.mark.parametrize("rule", [
|
||
"במקרה דנן ועדת הערר קבעה כי ההיתר בטל",
|
||
"בענייננו אין הצדקה לפיצוי",
|
||
"בערר שלפנינו הוכח כי השומה שגויה",
|
||
])
|
||
def test_is_fact_dependent_hits(rule):
|
||
assert hq.is_fact_dependent(rule) is True
|
||
|
||
|
||
@pytest.mark.parametrize("rule", [
|
||
"ועדת הערר מוסמכת לדון בהיטל השבחה",
|
||
"נטל ההוכחה מוטל על המבקש",
|
||
"פגיעה תכנונית מזכה בפיצוי לפי סעיף 197",
|
||
])
|
||
def test_is_fact_dependent_misses(rule):
|
||
assert hq.is_fact_dependent(rule) is False
|
||
|
||
|
||
def test_application_flag_from_rule_type():
|
||
flags = hq.compute_quality_flags(
|
||
"נטל ההוכחה על המבקש", "נטל ההוכחה על המבקש כאמור",
|
||
rule_type="application",
|
||
)
|
||
assert hq.FLAG_APPLICATION in flags
|
||
|
||
|
||
def test_application_flag_from_deixis_even_if_holding():
|
||
flags = hq.compute_quality_flags(
|
||
"במקרה דנן נדחה הערר", "כפי שקבענו במקרה דנן נדחה הערר",
|
||
rule_type="holding",
|
||
)
|
||
assert hq.FLAG_APPLICATION in flags
|
||
|
||
|
||
def test_clean_holding_rule_has_no_flags():
|
||
flags = hq.compute_quality_flags(
|
||
"ועדת הערר מוסמכת לדון בטענות חוקתיות הנוגעות לתכנית",
|
||
"הוועדה מוסמכת לדון אף בטענות מסוג זה, ככל שהן נוגעות לתכנית שבנדון.",
|
||
rule_type="holding",
|
||
)
|
||
assert flags == []
|
||
|
||
|
||
# ── INV-DM7: authority is DERIVED from the source, never a rule_type value ──
|
||
|
||
def test_derive_authority_binding_for_higher_courts():
|
||
assert hq.derive_authority("עליון") == "binding"
|
||
assert hq.derive_authority("מנהלי") == "binding"
|
||
|
||
|
||
def test_derive_authority_persuasive_for_committee():
|
||
assert hq.derive_authority("ועדת_ערר_מחוזית") == "persuasive"
|
||
|
||
|
||
def test_derive_authority_none_for_unknown_or_empty():
|
||
assert hq.derive_authority("") is None
|
||
assert hq.derive_authority(None) is None
|
||
assert hq.derive_authority("משהו אחר") is None
|
||
|
||
|
||
# ── #82.3 lexical near-duplicate signal ──
|
||
|
||
def test_jaccard_high_for_reworded_same_rule():
|
||
a = "נטל ההוכחה בהיטל השבחה מוטל על הוועדה המקומית"
|
||
b = "נטל ההוכחה בהיטל השבחה מוטל על הוועדה המקומית בלבד"
|
||
assert hq.jaccard_shingles(a, b) >= 0.5
|
||
|
||
|
||
def test_jaccard_low_for_distinct_rules():
|
||
a = "ועדת הערר מוסמכת לדון בהיטל השבחה"
|
||
b = "המועד להגשת ערר הוא שלושים יום"
|
||
assert hq.jaccard_shingles(a, b) < 0.2
|
||
|
||
|
||
def test_normalized_levenshtein_identical_and_disjoint():
|
||
assert hq.normalized_levenshtein("אבג", "אבג") == 1.0
|
||
assert hq.normalized_levenshtein("", "אבג") == 0.0
|
||
|
||
|
||
def test_lexical_near_duplicate_band():
|
||
a = "נטל ההוכחה בהיטל השבחה מוטל על הוועדה המקומית"
|
||
b = "נטל ההוכחה בהיטל השבחה מוטל על הוועדה המקומית, כך נפסק"
|
||
assert hq.lexical_near_duplicate(a, b) is True
|
||
c = "המועד להגשת ערר על שומה הוא שלושים ימים"
|
||
assert hq.lexical_near_duplicate(a, c) is False
|