legal-ai/mcp-server/tests/test_halacha_quality.py

from __future__ import annotations

import pytest

from legal_mcp.services import halacha_quality as hq


# ── non-decision / obiter ──

@pytest.mark.parametrize("text", [
    "איני רואה לקבוע מסמרות בשאלה זו",
    "אין צורך להכריע בטענה זו",
    "למעלה מן הצורך נעיר כי",
    "הערה זו ניתנת אגב אורחא",
])
def test_detect_non_decision_hits(text):
    assert hq.detect_non_decision(text) is not None


@pytest.mark.parametrize("text", [
    "בית המשפט קבע כי ההיתר בטל",
    "ועדת הערר מוסמכת לדון בטענת סטייה מתכנית",
    "",
])
def test_detect_non_decision_misses(text):
    assert hq.detect_non_decision(text) is None


def test_non_decision_scans_all_fields():
    # marker sits in the quote, not the abstracted rule
    assert hq.detect_non_decision("כלל כללי", "", "וכאן אין צורך להכריע") is not None


# ── truncated quote ──

def test_truncated_dangling_letter():
    assert hq.is_quote_truncated("ראוי כי תהיה השפעה על ה") is True


def test_truncated_empty():
    assert hq.is_quote_truncated("   ") is True


@pytest.mark.parametrize("quote", [
    "ועדת הערר היא הגוף המקצועי האמון על בחינת ההיבטים התכנוניים.",
    "אין לועדה סמכות לסטות מתקנות התכנון והבניה",        # no period, but full word
    "ההיתר תואם את התכנית החלה על האיזור",
])
def test_not_truncated_complete_clauses(quote):
    assert hq.is_quote_truncated(quote) is False


# ── thin restatement ──

def test_thin_restatement_near_copy():
    quote = "ביטול היתר מחייב טעמים כבדי משקל של אינטרס ציבורי"
    rule = "ביטול היתר מחייב טעמים כבדי משקל של אינטרס ציבורי"
    assert hq.is_thin_restatement(rule, quote) is True


def test_not_thin_when_abstracted():
    quote = "אין חולק כי אין לועדה סמכות לסטות מתקנות"
    rule = ("ועדה מקומית לתכנון ובניה אינה מוסמכת לסטות מהוראות תקנות התכנון "
            "והבניה, ובכלל זה מהוראות התוספת השנייה, ואין בידה ליתן היתר הסוטה מהן.")
    assert hq.is_thin_restatement(rule, quote) is False


def test_thin_handles_empty():
    assert hq.is_thin_restatement("", "something") is False


# ── aggregate flags + auto-approve gate semantics ──

def test_clean_halacha_no_flags():
    rule = ("ועדת הערר מוסמכת לדון בערר על החלטה ליתן היתר בנייה גם כאשר נטען "
            "כי ההיתר סוטה מתכנית, בהתאם למגמת תיקון 43 לחוק.")
    quote = ("פרשנות מרחיבה המאפשרת הגשת ערר גם במקרה של מתן היתר כאשר נטען כי "
             "ההיתר סוטה מתכנית הולמת את מגמת המחוקק בתיקון 43.")
    assert hq.compute_quality_flags(rule, quote, "", quote_verified=True) == []


def test_flags_accumulate():
    flags = hq.compute_quality_flags(
        "כלל אגב אורחא על ה", "כלל אגב אורחא על ה",
        quote_verified=False,
    )
    assert hq.FLAG_NON_DECISION in flags
    assert hq.FLAG_TRUNCATED_QUOTE in flags
    assert hq.FLAG_QUOTE_UNVERIFIED in flags


def test_normalize_text_quote_variants():
    assert hq.normalize_text('עע"מ   317/10') == hq.normalize_text("עע״מ 317/10")


# ── #81.3 NLI entailment — pure prompt + parser ──

def test_build_nli_prompt_contains_pairs():
    items = [
        {"rule_statement": "כלל אלף", "supporting_quote": "ציטוט אלף"},
        {"rule_statement": "כלל בית", "supporting_quote": "ציטוט בית"},
    ]
    p = hq.build_nli_prompt(items)
    assert "כלל אלף" in p and "ציטוט בית" in p
    assert "זוג 1" in p and "זוג 2" in p


@pytest.mark.parametrize("raw,n,expected", [
    (["entailed", "neutral"], 2, ["entailed", "neutral"]),
    (["ENTAILED", "Contradiction"], 2, ["entailed", "contradiction"]),  # case-insensitive
    ([{"verdict": "neutral"}, {"verdict": "entailed"}], 2, ["neutral", "entailed"]),  # dict shape
    (["entailed"], 2, ["entailed", "entailed"]),          # length mismatch -> fail-open
    (None, 2, ["entailed", "entailed"]),                  # non-list -> fail-open
    (["bananas", "neutral"], 2, ["entailed", "neutral"]), # unknown label -> entailed
])
def test_parse_nli_verdicts(raw, n, expected):
    assert hq.parse_nli_verdicts(raw, n) == expected


# ── _nli_check (async, via claude_session) — fail-open + verdict mapping ──

def test_nli_check_fail_open(monkeypatch):
    import asyncio
    from legal_mcp.services import halacha_extractor as he

    async def boom(*a, **k):
        raise RuntimeError("no claude CLI here")
    monkeypatch.setattr(he.claude_session, "query_json", boom)
    items = [{"rule_statement": "a", "supporting_quote": "b"}]
    assert asyncio.run(he._nli_check(items)) == ["entailed"]  # never blocks


def test_nli_check_maps_verdicts(monkeypatch):
    import asyncio
    from legal_mcp.services import halacha_extractor as he

    async def fake(*a, **k):
        return ["entailed", "neutral"]
    monkeypatch.setattr(he.claude_session, "query_json", fake)
    items = [{"rule_statement": "a", "supporting_quote": "b"},
             {"rule_statement": "c", "supporting_quote": "d"}]
    assert asyncio.run(he._nli_check(items)) == ["entailed", "neutral"]


def test_nli_check_empty():
    import asyncio
    from legal_mcp.services import halacha_extractor as he
    assert asyncio.run(he._nli_check([])) == []


# ── #81.5 consolidation — pure prompt + fold-group parser ──

def test_build_consolidation_prompt():
    items = [
        {"halacha_index": 3, "rule_statement": "כלל גימל", "reasoning_summary": "כי"},
        {"halacha_index": 7, "rule_statement": "כלל זין", "reasoning_summary": ""},
    ]
    p = hq.build_consolidation_prompt(items)
    assert "[3] כלל גימל" in p and "[7] כלל זין" in p and "היגיון: כי" in p


@pytest.mark.parametrize("raw,expected", [
    ([[2, 5, 9], [14, 18]], [[2, 5, 9], [14, 18]]),
    ([[2, 5], [7]], [[2, 5]]),                  # singleton group dropped
    ([["2", "5"]], [[2, 5]]),                    # string ints coerced
    ([[2, 2, 5]], [[2, 5]]),                     # dedup within group
    ([], []),                                    # nothing to fold
    ("garbage", []),                             # non-list -> safe
    (None, []),                                  # None -> safe
    ([[1, "x"], [3, 4]], [[3, 4]]),             # drop group that falls below 2 valid
])
def test_parse_fold_groups(raw, expected):
    assert hq.parse_fold_groups(raw) == expected


def test_consolidation_priority_prefers_approved_then_confidence():
    from legal_mcp.services import halacha_extractor as he
    approved = {"id": "a", "review_status": "approved", "confidence": 0.7,
                "quote_verified": True, "rule_statement": "x"}
    pending_hi = {"id": "b", "review_status": "pending_review", "confidence": 0.95,
                  "quote_verified": True, "rule_statement": "x"}
    # approved sorts before higher-confidence pending → kept as canonical
    assert min([approved, pending_hi], key=he._consolidation_priority)["id"] == "a"


# ── #81.4 fact-dependent / application ──

@pytest.mark.parametrize("rule", [
    "במקרה דנן ועדת הערר קבעה כי ההיתר בטל",
    "בענייננו אין הצדקה לפיצוי",
    "בערר שלפנינו הוכח כי השומה שגויה",
])
def test_is_fact_dependent_hits(rule):
    assert hq.is_fact_dependent(rule) is True


@pytest.mark.parametrize("rule", [
    "ועדת הערר מוסמכת לדון בהיטל השבחה",
    "נטל ההוכחה מוטל על המבקש",
    "פגיעה תכנונית מזכה בפיצוי לפי סעיף 197",
])
def test_is_fact_dependent_misses(rule):
    assert hq.is_fact_dependent(rule) is False


def test_application_flag_from_rule_type():
    flags = hq.compute_quality_flags(
        "נטל ההוכחה על המבקש", "נטל ההוכחה על המבקש כאמור",
        rule_type="application",
    )
    assert hq.FLAG_APPLICATION in flags


def test_application_flag_from_deixis_even_if_holding():
    flags = hq.compute_quality_flags(
        "במקרה דנן נדחה הערר", "כפי שקבענו במקרה דנן נדחה הערר",
        rule_type="holding",
    )
    assert hq.FLAG_APPLICATION in flags


def test_clean_holding_rule_has_no_flags():
    flags = hq.compute_quality_flags(
        "ועדת הערר מוסמכת לדון בטענות חוקתיות הנוגעות לתכנית",
        "הוועדה מוסמכת לדון אף בטענות מסוג זה, ככל שהן נוגעות לתכנית שבנדון.",
        rule_type="holding",
    )
    assert flags == []


# ── INV-DM7: authority is DERIVED from the source, never a rule_type value ──

def test_derive_authority_binding_for_higher_courts():
    assert hq.derive_authority("עליון") == "binding"
    assert hq.derive_authority("מנהלי") == "binding"


def test_derive_authority_persuasive_for_committee():
    assert hq.derive_authority("ועדת_ערר_מחוזית") == "persuasive"


def test_derive_authority_none_for_unknown_or_empty():
    assert hq.derive_authority("") is None
    assert hq.derive_authority(None) is None
    assert hq.derive_authority("משהו אחר") is None


# ── #82.3 lexical near-duplicate signal ──

def test_jaccard_high_for_reworded_same_rule():
    a = "נטל ההוכחה בהיטל השבחה מוטל על הוועדה המקומית"
    b = "נטל ההוכחה בהיטל השבחה מוטל על הוועדה המקומית בלבד"
    assert hq.jaccard_shingles(a, b) >= 0.5


def test_jaccard_low_for_distinct_rules():
    a = "ועדת הערר מוסמכת לדון בהיטל השבחה"
    b = "המועד להגשת ערר הוא שלושים יום"
    assert hq.jaccard_shingles(a, b) < 0.2


def test_normalized_levenshtein_identical_and_disjoint():
    assert hq.normalized_levenshtein("אבג", "אבג") == 1.0
    assert hq.normalized_levenshtein("", "אבג") == 0.0


def test_lexical_near_duplicate_band():
    a = "נטל ההוכחה בהיטל השבחה מוטל על הוועדה המקומית"
    b = "נטל ההוכחה בהיטל השבחה מוטל על הוועדה המקומית, כך נפסק"
    assert hq.lexical_near_duplicate(a, b) is True
    c = "המועד להגשת ערר על שומה הוא שלושים ימים"
    assert hq.lexical_near_duplicate(a, c) is False