Merge pull request 'feat(halacha): #82.4 provenance-union על dedup-skip + #82.6 over-merge guard' (#192) from worktree-halacha-dedup-provenance-guard into main
This commit was merged in pull request #192.
This commit is contained in:
53
mcp-server/tests/test_halacha_dedup_action.py
Normal file
53
mcp-server/tests/test_halacha_dedup_action.py
Normal file
@@ -0,0 +1,53 @@
|
||||
"""Tests for #82.4 / #82.6 — dedup-on-insert decision + over-merge guard.
|
||||
|
||||
``halacha_quality.dedup_action`` is the PAIRWISE decision a fresh halacha makes
|
||||
against its single nearest same-precedent neighbor: skip (semantic dup), flag
|
||||
(lexical tail), or keep. It compares to exactly ONE neighbor and only ever drops
|
||||
the *incoming* row, so a chain A~B~C can never collapse to one row — the
|
||||
over-merge guard (#82.6). Pure/offline.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from legal_mcp.services import halacha_quality as hq
|
||||
|
||||
# operating point: DEDUP_COSINE=0.93 → dedup_distance=0.07 ; BAND=0.83 → 0.17
|
||||
DEDUP_D = 1.0 - 0.93
|
||||
BAND_D = 1.0 - 0.83
|
||||
|
||||
SIMILAR_A = "מיצוי הליכים הוא תנאי סף להגשת ערר לוועדה"
|
||||
SIMILAR_B = "מיצוי הליכים הוא תנאי סף להגשת הערר לוועדה"
|
||||
DIFFERENT = "מתחם שיקול הדעת התכנוני של הוועדה המקומית רחב"
|
||||
|
||||
|
||||
def test_skip_below_dedup_distance():
|
||||
# cosine ≥ 0.93 (dist ≤ 0.07) → skip, regardless of wording
|
||||
assert hq.dedup_action(0.03, DIFFERENT, SIMILAR_A, DEDUP_D, BAND_D) == "skip"
|
||||
assert hq.dedup_action(0.05, SIMILAR_A, SIMILAR_B, DEDUP_D, BAND_D) == "skip"
|
||||
|
||||
|
||||
def test_flag_in_lexical_tail():
|
||||
# in the 0.07–0.17 band AND lexically near → flag (not skip, not keep)
|
||||
assert hq.dedup_action(0.12, SIMILAR_A, SIMILAR_B, DEDUP_D, BAND_D) == "flag"
|
||||
|
||||
|
||||
def test_keep_in_tail_when_not_lexically_similar():
|
||||
# in the band but lexically distinct → keep (don't flag a different rule)
|
||||
assert hq.dedup_action(0.12, DIFFERENT, SIMILAR_A, DEDUP_D, BAND_D) == "keep"
|
||||
|
||||
|
||||
def test_over_merge_guard_distinct_rule_kept():
|
||||
"""Beyond the band, even a lexically-similar rule is KEPT — and because the
|
||||
decision is pairwise (one neighbor, incoming-only drop), a chain A~B~C with
|
||||
A,C distinct never collapses to a single row (#82.6)."""
|
||||
assert hq.dedup_action(0.30, SIMILAR_A, SIMILAR_B, DEDUP_D, BAND_D) == "keep"
|
||||
assert hq.dedup_action(0.50, DIFFERENT, SIMILAR_A, DEDUP_D, BAND_D) == "keep"
|
||||
|
||||
|
||||
def test_boundary_exactly_at_band_edge():
|
||||
# dist == band_distance is still within the tail (≤), lexical → flag
|
||||
assert hq.dedup_action(BAND_D, SIMILAR_A, SIMILAR_B, DEDUP_D, BAND_D) == "flag"
|
||||
# just past the band → keep
|
||||
assert hq.dedup_action(BAND_D + 0.001, SIMILAR_A, SIMILAR_B, DEDUP_D, BAND_D) == "keep"
|
||||
Reference in New Issue
Block a user