feat(halacha): application gate + lexical dedup tail + quality harnesses (#81,#82)
Halacha-extraction quality (#81) and dedup-on-insert (#82) — engine changes (pure + tested) plus measurement/ops tooling. halacha_quality.py - #81.4 application gate: is_fact_dependent() (high-precision "applied to THIS case" deixis per the strict rubric §3/§27) + FLAG_APPLICATION. compute_quality_flags now takes rule_type and flags rule_type=='application' OR fact-dependent — blocking auto-approve (an illustration is not a generalizable holding). - #82.3 lexical tail signal: jaccard_shingles / normalized_levenshtein / lexical_near_duplicate + FLAG_NEAR_DUPLICATE, for the 0.83–0.93 cosine band. halacha_extractor.py — pass rule_type to the flag computation; re-type a binding-labeled fact-application to 'application' (mirrors non_decision→obiter). db.py (store_halachot_for_chunk) — dedup now fetches the nearest same-precedent neighbor once: cosine ≥ DEDUP → skip (unchanged); cosine in [BAND, DEDUP) with high lexical overlap → FLAG_NEAR_DUPLICATE (review, not skip — never drop a possibly-distinct principle unreviewed). config.py — HALACHA_DEDUP_BAND_COSINE (0.83). Scripts: - scripts/halacha_goldset.py (#81.7) — export stratified sample for human tagging; score validators (P/R/F1) against the tags. Backbone for #81.8. - scripts/halacha_batch_reconcile.py (#82.7) — conservative cross-precedent dedup (cosine ≥0.95), dry-run report only. - scripts/calibrate_halacha_dedup.py (#82.1) — calibrate the lexical thresholds against the 2026-06-03 cleanup gold-set. Deferred (documented): #82.4 merge-provenance and #82.5 DB ON CONFLICT/UNIQUE on normalized quote are NOT included — the current skip+flag behavior is safe, whereas a UNIQUE on normalized_quote would fail on existing dups and a blind merge risks losing provenance; they need their own chair-reviewed migration. #82.6 over-merge guard is moot until merge lands. #81.6 full rhetorical-role classifier deferred (section pre-filter + application flag cover the practical case); #81.8 blocked on the human-tagged gold-set (harness now provided). Verified: - pytest tests/test_halacha_quality.py — 52 passed (14 new). - calibrate: configured (0.55,0.70) → precision 1.0 (zero false-merge), recall 0.30 — correct profile for an auto-approve-blocking signal. - goldset export: 15-row sample CSV. batch reconcile: 819 halachot → 5 cross-precedent candidate pairs. Invariants: G1 (normalize at source — flag at insert, not at read); §6 (no silent swallow — suspect items flagged to review, never dropped); G2 (no parallel path — same store_halachot_for_chunk / compute_quality_flags). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -181,3 +181,75 @@ def test_consolidation_priority_prefers_approved_then_confidence():
|
||||
"quote_verified": True, "rule_statement": "x"}
|
||||
# approved sorts before higher-confidence pending → kept as canonical
|
||||
assert min([approved, pending_hi], key=he._consolidation_priority)["id"] == "a"
|
||||
|
||||
|
||||
# ── #81.4 fact-dependent / application ──
|
||||
|
||||
@pytest.mark.parametrize("rule", [
|
||||
"במקרה דנן ועדת הערר קבעה כי ההיתר בטל",
|
||||
"בענייננו אין הצדקה לפיצוי",
|
||||
"בערר שלפנינו הוכח כי השומה שגויה",
|
||||
])
|
||||
def test_is_fact_dependent_hits(rule):
|
||||
assert hq.is_fact_dependent(rule) is True
|
||||
|
||||
|
||||
@pytest.mark.parametrize("rule", [
|
||||
"ועדת הערר מוסמכת לדון בהיטל השבחה",
|
||||
"נטל ההוכחה מוטל על המבקש",
|
||||
"פגיעה תכנונית מזכה בפיצוי לפי סעיף 197",
|
||||
])
|
||||
def test_is_fact_dependent_misses(rule):
|
||||
assert hq.is_fact_dependent(rule) is False
|
||||
|
||||
|
||||
def test_application_flag_from_rule_type():
|
||||
flags = hq.compute_quality_flags(
|
||||
"נטל ההוכחה על המבקש", "נטל ההוכחה על המבקש כאמור",
|
||||
rule_type="application",
|
||||
)
|
||||
assert hq.FLAG_APPLICATION in flags
|
||||
|
||||
|
||||
def test_application_flag_from_deixis_even_if_binding():
|
||||
flags = hq.compute_quality_flags(
|
||||
"במקרה דנן נדחה הערר", "כפי שקבענו במקרה דנן נדחה הערר",
|
||||
rule_type="binding",
|
||||
)
|
||||
assert hq.FLAG_APPLICATION in flags
|
||||
|
||||
|
||||
def test_clean_binding_rule_has_no_flags():
|
||||
flags = hq.compute_quality_flags(
|
||||
"ועדת הערר מוסמכת לדון בטענות חוקתיות הנוגעות לתכנית",
|
||||
"הוועדה מוסמכת לדון אף בטענות מסוג זה, ככל שהן נוגעות לתכנית שבנדון.",
|
||||
rule_type="binding",
|
||||
)
|
||||
assert flags == []
|
||||
|
||||
|
||||
# ── #82.3 lexical near-duplicate signal ──
|
||||
|
||||
def test_jaccard_high_for_reworded_same_rule():
|
||||
a = "נטל ההוכחה בהיטל השבחה מוטל על הוועדה המקומית"
|
||||
b = "נטל ההוכחה בהיטל השבחה מוטל על הוועדה המקומית בלבד"
|
||||
assert hq.jaccard_shingles(a, b) >= 0.5
|
||||
|
||||
|
||||
def test_jaccard_low_for_distinct_rules():
|
||||
a = "ועדת הערר מוסמכת לדון בהיטל השבחה"
|
||||
b = "המועד להגשת ערר הוא שלושים יום"
|
||||
assert hq.jaccard_shingles(a, b) < 0.2
|
||||
|
||||
|
||||
def test_normalized_levenshtein_identical_and_disjoint():
|
||||
assert hq.normalized_levenshtein("אבג", "אבג") == 1.0
|
||||
assert hq.normalized_levenshtein("", "אבג") == 0.0
|
||||
|
||||
|
||||
def test_lexical_near_duplicate_band():
|
||||
a = "נטל ההוכחה בהיטל השבחה מוטל על הוועדה המקומית"
|
||||
b = "נטל ההוכחה בהיטל השבחה מוטל על הוועדה המקומית, כך נפסק"
|
||||
assert hq.lexical_near_duplicate(a, b) is True
|
||||
c = "המועד להגשת ערר על שומה הוא שלושים ימים"
|
||||
assert hq.lexical_near_duplicate(a, c) is False
|
||||
|
||||
Reference in New Issue
Block a user