"""Tests for #82.4 / #82.6 — dedup-on-insert decision + over-merge guard. ``halacha_quality.dedup_action`` is the PAIRWISE decision a fresh halacha makes against its single nearest same-precedent neighbor: skip (semantic dup), flag (lexical tail), or keep. It compares to exactly ONE neighbor and only ever drops the *incoming* row, so a chain A~B~C can never collapse to one row — the over-merge guard (#82.6). Pure/offline. """ from __future__ import annotations import pytest from legal_mcp.services import halacha_quality as hq # operating point: DEDUP_COSINE=0.93 → dedup_distance=0.07 ; BAND=0.83 → 0.17 DEDUP_D = 1.0 - 0.93 BAND_D = 1.0 - 0.83 SIMILAR_A = "מיצוי הליכים הוא תנאי סף להגשת ערר לוועדה" SIMILAR_B = "מיצוי הליכים הוא תנאי סף להגשת הערר לוועדה" DIFFERENT = "מתחם שיקול הדעת התכנוני של הוועדה המקומית רחב" def test_skip_below_dedup_distance(): # cosine ≥ 0.93 (dist ≤ 0.07) → skip, regardless of wording assert hq.dedup_action(0.03, DIFFERENT, SIMILAR_A, DEDUP_D, BAND_D) == "skip" assert hq.dedup_action(0.05, SIMILAR_A, SIMILAR_B, DEDUP_D, BAND_D) == "skip" def test_flag_in_lexical_tail(): # in the 0.07–0.17 band AND lexically near → flag (not skip, not keep) assert hq.dedup_action(0.12, SIMILAR_A, SIMILAR_B, DEDUP_D, BAND_D) == "flag" def test_keep_in_tail_when_not_lexically_similar(): # in the band but lexically distinct → keep (don't flag a different rule) assert hq.dedup_action(0.12, DIFFERENT, SIMILAR_A, DEDUP_D, BAND_D) == "keep" def test_over_merge_guard_distinct_rule_kept(): """Beyond the band, even a lexically-similar rule is KEPT — and because the decision is pairwise (one neighbor, incoming-only drop), a chain A~B~C with A,C distinct never collapses to a single row (#82.6).""" assert hq.dedup_action(0.30, SIMILAR_A, SIMILAR_B, DEDUP_D, BAND_D) == "keep" assert hq.dedup_action(0.50, DIFFERENT, SIMILAR_A, DEDUP_D, BAND_D) == "keep" def test_boundary_exactly_at_band_edge(): # dist == band_distance is still within the tail (≤), lexical → flag assert hq.dedup_action(BAND_D, SIMILAR_A, SIMILAR_B, DEDUP_D, BAND_D) == "flag" # just past the band → keep assert hq.dedup_action(BAND_D + 0.001, SIMILAR_A, SIMILAR_B, DEDUP_D, BAND_D) == "keep"