fix(halacha): split authority (derived) from rule_role — stop source-conflation (INV-DM7)
The extractor classified rule_type by SOURCE bindingness (higher-court→binding, committee→persuasive) instead of by rule KIND. The gold-set proved it: 'binding' appeared on 19/19 external rulings & 0 committees; 'persuasive' on 13/13 committees & 0 external — only 58% agreement with the human role tags. The two axes (authority vs rule role) were crammed into one enum. This splits them per INV-DM7: - authority (binding/persuasive) — DERIVED from case_law.precedent_level (עליון/מנהלי→binding, ועדת_ערר_מחוזית→persuasive), never stored, never LLM-guessed. New helper halacha_quality.derive_authority; surfaced read-only in list_halachot / goldset_list / search results. - rule_type — now the rule ROLE only: holding/interpretive/procedural/ application/obiter. Both extractor prompts unified to this vocabulary; _coerce_halacha no longer defaults rule_type from the source; legacy binding→holding / persuasive→interpretive fold for safety. UI: authority shown as a separate read-only badge (gold=מחייב / muted=משכנע) across the review queue, precedent detail, and gold-set; the gold-set role selector drops binding/persuasive and adds מהותי (holding). Migration: scripts/halacha_rule_role_backfill.py re-classifies the 276 pre-split binding/persuasive rows into a genuine role via local claude_session (run after deploy). Gold-set correct_type/ai_correct_type 'binding'→'holding' via SQL. Sources (≥3, per research-decision policy): OASIS LegalRuleML v1.0 (appliesAuthority/Strength as metadata orthogonal to rule logic) · SemEval-2023 Task 6 LegalEval (rhetorical roles by function, authority kept separate) · Bluebook signals (weight-of-authority is a separate dimension). Invariants: ESTABLISHES INV-DM7. Upholds G1 (normalize at source — extractor classifies role, system derives authority) and G2 (single source of truth — authority derived, not a parallel stored field). Tests: 211 pass + new derive_authority/coerce coverage. web-ui build + tsc clean. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
46
mcp-server/tests/test_halacha_coerce.py
Normal file
46
mcp-server/tests/test_halacha_coerce.py
Normal file
@@ -0,0 +1,46 @@
|
||||
"""rule_type coercion after the authority/role split (INV-DM7).
|
||||
|
||||
The extractor's rule_type holds the rule ROLE only — it is never defaulted from
|
||||
the source's bindingness. Legacy authority values fold to the nearest role.
|
||||
"""
|
||||
from legal_mcp.services.halacha_extractor import (
|
||||
_LEGACY_RULE_TYPE_FOLD,
|
||||
_VALID_RULE_TYPES,
|
||||
_coerce_halacha,
|
||||
)
|
||||
|
||||
_BASE = {"rule_statement": "כלל כלשהו", "supporting_quote": "ציטוט תומך כלשהו"}
|
||||
|
||||
|
||||
def _rt(rule_type):
|
||||
return _coerce_halacha({**_BASE, "rule_type": rule_type})["rule_type"]
|
||||
|
||||
|
||||
def test_valid_roles_are_the_five_roles_only():
|
||||
assert _VALID_RULE_TYPES == {
|
||||
"holding", "interpretive", "procedural", "application", "obiter",
|
||||
}
|
||||
assert "binding" not in _VALID_RULE_TYPES
|
||||
assert "persuasive" not in _VALID_RULE_TYPES
|
||||
|
||||
|
||||
def test_legacy_authority_values_fold_to_a_role():
|
||||
assert _rt("binding") == "holding"
|
||||
assert _rt("persuasive") == "interpretive"
|
||||
assert _LEGACY_RULE_TYPE_FOLD == {"binding": "holding", "persuasive": "interpretive"}
|
||||
|
||||
|
||||
def test_genuine_roles_pass_through():
|
||||
for role in ("holding", "interpretive", "procedural", "application", "obiter"):
|
||||
assert _rt(role) == role
|
||||
|
||||
|
||||
def test_unknown_or_missing_defaults_to_interpretive():
|
||||
assert _rt("nonsense") == "interpretive"
|
||||
assert _coerce_halacha(_BASE)["rule_type"] == "interpretive"
|
||||
|
||||
|
||||
def test_coerce_rejects_rows_missing_required_fields():
|
||||
assert _coerce_halacha({"rule_statement": "x"}) is None
|
||||
assert _coerce_halacha({"supporting_quote": "y"}) is None
|
||||
assert _coerce_halacha("not a dict") is None
|
||||
@@ -211,23 +211,40 @@ def test_application_flag_from_rule_type():
|
||||
assert hq.FLAG_APPLICATION in flags
|
||||
|
||||
|
||||
def test_application_flag_from_deixis_even_if_binding():
|
||||
def test_application_flag_from_deixis_even_if_holding():
|
||||
flags = hq.compute_quality_flags(
|
||||
"במקרה דנן נדחה הערר", "כפי שקבענו במקרה דנן נדחה הערר",
|
||||
rule_type="binding",
|
||||
rule_type="holding",
|
||||
)
|
||||
assert hq.FLAG_APPLICATION in flags
|
||||
|
||||
|
||||
def test_clean_binding_rule_has_no_flags():
|
||||
def test_clean_holding_rule_has_no_flags():
|
||||
flags = hq.compute_quality_flags(
|
||||
"ועדת הערר מוסמכת לדון בטענות חוקתיות הנוגעות לתכנית",
|
||||
"הוועדה מוסמכת לדון אף בטענות מסוג זה, ככל שהן נוגעות לתכנית שבנדון.",
|
||||
rule_type="binding",
|
||||
rule_type="holding",
|
||||
)
|
||||
assert flags == []
|
||||
|
||||
|
||||
# ── INV-DM7: authority is DERIVED from the source, never a rule_type value ──
|
||||
|
||||
def test_derive_authority_binding_for_higher_courts():
|
||||
assert hq.derive_authority("עליון") == "binding"
|
||||
assert hq.derive_authority("מנהלי") == "binding"
|
||||
|
||||
|
||||
def test_derive_authority_persuasive_for_committee():
|
||||
assert hq.derive_authority("ועדת_ערר_מחוזית") == "persuasive"
|
||||
|
||||
|
||||
def test_derive_authority_none_for_unknown_or_empty():
|
||||
assert hq.derive_authority("") is None
|
||||
assert hq.derive_authority(None) is None
|
||||
assert hq.derive_authority("משהו אחר") is None
|
||||
|
||||
|
||||
# ── #82.3 lexical near-duplicate signal ──
|
||||
|
||||
def test_jaccard_high_for_reworded_same_rule():
|
||||
|
||||
Reference in New Issue
Block a user