legal-ai/mcp-server/tests/test_halacha_quality.py

from __future__ import annotations

import pytest

from legal_mcp.services import halacha_quality as hq


# ── non-decision / obiter ──

@pytest.mark.parametrize("text", [
    "איני רואה לקבוע מסמרות בשאלה זו",
    "אין צורך להכריע בטענה זו",
    "למעלה מן הצורך נעיר כי",
    "הערה זו ניתנת אגב אורחא",
])
def test_detect_non_decision_hits(text):
    assert hq.detect_non_decision(text) is not None


@pytest.mark.parametrize("text", [
    "בית המשפט קבע כי ההיתר בטל",
    "ועדת הערר מוסמכת לדון בטענת סטייה מתכנית",
    "",
])
def test_detect_non_decision_misses(text):
    assert hq.detect_non_decision(text) is None


def test_non_decision_scans_all_fields():
    # marker sits in the quote, not the abstracted rule
    assert hq.detect_non_decision("כלל כללי", "", "וכאן אין צורך להכריע") is not None


# ── truncated quote ──

def test_truncated_dangling_letter():
    assert hq.is_quote_truncated("ראוי כי תהיה השפעה על ה") is True


def test_truncated_empty():
    assert hq.is_quote_truncated("   ") is True


@pytest.mark.parametrize("quote", [
    "ועדת הערר היא הגוף המקצועי האמון על בחינת ההיבטים התכנוניים.",
    "אין לועדה סמכות לסטות מתקנות התכנון והבניה",        # no period, but full word
    "ההיתר תואם את התכנית החלה על האיזור",
])
def test_not_truncated_complete_clauses(quote):
    assert hq.is_quote_truncated(quote) is False


# ── thin restatement ──

def test_thin_restatement_near_copy():
    quote = "ביטול היתר מחייב טעמים כבדי משקל של אינטרס ציבורי"
    rule = "ביטול היתר מחייב טעמים כבדי משקל של אינטרס ציבורי"
    assert hq.is_thin_restatement(rule, quote) is True


def test_not_thin_when_abstracted():
    quote = "אין חולק כי אין לועדה סמכות לסטות מתקנות"
    rule = ("ועדה מקומית לתכנון ובניה אינה מוסמכת לסטות מהוראות תקנות התכנון "
            "והבניה, ובכלל זה מהוראות התוספת השנייה, ואין בידה ליתן היתר הסוטה מהן.")
    assert hq.is_thin_restatement(rule, quote) is False


def test_thin_handles_empty():
    assert hq.is_thin_restatement("", "something") is False


# ── aggregate flags + auto-approve gate semantics ──

def test_clean_halacha_no_flags():
    rule = ("ועדת הערר מוסמכת לדון בערר על החלטה ליתן היתר בנייה גם כאשר נטען "
            "כי ההיתר סוטה מתכנית, בהתאם למגמת תיקון 43 לחוק.")
    quote = ("פרשנות מרחיבה המאפשרת הגשת ערר גם במקרה של מתן היתר כאשר נטען כי "
             "ההיתר סוטה מתכנית הולמת את מגמת המחוקק בתיקון 43.")
    assert hq.compute_quality_flags(rule, quote, "", quote_verified=True) == []


def test_flags_accumulate():
    flags = hq.compute_quality_flags(
        "כלל אגב אורחא על ה", "כלל אגב אורחא על ה",
        quote_verified=False,
    )
    assert hq.FLAG_NON_DECISION in flags
    assert hq.FLAG_TRUNCATED_QUOTE in flags
    assert hq.FLAG_QUOTE_UNVERIFIED in flags


def test_normalize_text_quote_variants():
    assert hq.normalize_text('עע"מ   317/10') == hq.normalize_text("עע״מ 317/10")


# ── #81.3 NLI entailment — pure prompt + parser ──

def test_build_nli_prompt_contains_pairs():
    items = [
        {"rule_statement": "כלל אלף", "supporting_quote": "ציטוט אלף"},
        {"rule_statement": "כלל בית", "supporting_quote": "ציטוט בית"},
    ]
    p = hq.build_nli_prompt(items)
    assert "כלל אלף" in p and "ציטוט בית" in p
    assert "זוג 1" in p and "זוג 2" in p


@pytest.mark.parametrize("raw,n,expected", [
    (["entailed", "neutral"], 2, ["entailed", "neutral"]),
    (["ENTAILED", "Contradiction"], 2, ["entailed", "contradiction"]),  # case-insensitive
    ([{"verdict": "neutral"}, {"verdict": "entailed"}], 2, ["neutral", "entailed"]),  # dict shape
    (["entailed"], 2, ["entailed", "entailed"]),          # length mismatch -> fail-open
    (None, 2, ["entailed", "entailed"]),                  # non-list -> fail-open
    (["bananas", "neutral"], 2, ["entailed", "neutral"]), # unknown label -> entailed
])
def test_parse_nli_verdicts(raw, n, expected):
    assert hq.parse_nli_verdicts(raw, n) == expected


# ── _nli_check (async, via claude_session) — fail-open + verdict mapping ──

def test_nli_check_fail_open(monkeypatch):
    import asyncio
    from legal_mcp.services import halacha_extractor as he

    async def boom(*a, **k):
        raise RuntimeError("no claude CLI here")
    monkeypatch.setattr(he.claude_session, "query_json", boom)
    items = [{"rule_statement": "a", "supporting_quote": "b"}]
    assert asyncio.run(he._nli_check(items)) == ["entailed"]  # never blocks


def test_nli_check_maps_verdicts(monkeypatch):
    import asyncio
    from legal_mcp.services import halacha_extractor as he

    async def fake(*a, **k):
        return ["entailed", "neutral"]
    monkeypatch.setattr(he.claude_session, "query_json", fake)
    items = [{"rule_statement": "a", "supporting_quote": "b"},
             {"rule_statement": "c", "supporting_quote": "d"}]
    assert asyncio.run(he._nli_check(items)) == ["entailed", "neutral"]


def test_nli_check_empty():
    import asyncio
    from legal_mcp.services import halacha_extractor as he
    assert asyncio.run(he._nli_check([])) == []


# ── #81.5 consolidation — pure prompt + fold-group parser ──

def test_build_consolidation_prompt():
    items = [
        {"halacha_index": 3, "rule_statement": "כלל גימל", "reasoning_summary": "כי"},
        {"halacha_index": 7, "rule_statement": "כלל זין", "reasoning_summary": ""},
    ]
    p = hq.build_consolidation_prompt(items)
    assert "[3] כלל גימל" in p and "[7] כלל זין" in p and "היגיון: כי" in p


@pytest.mark.parametrize("raw,expected", [
    ([[2, 5, 9], [14, 18]], [[2, 5, 9], [14, 18]]),
    ([[2, 5], [7]], [[2, 5]]),                  # singleton group dropped
    ([["2", "5"]], [[2, 5]]),                    # string ints coerced
    ([[2, 2, 5]], [[2, 5]]),                     # dedup within group
    ([], []),                                    # nothing to fold
    ("garbage", []),                             # non-list -> safe
    (None, []),                                  # None -> safe
    ([[1, "x"], [3, 4]], [[3, 4]]),             # drop group that falls below 2 valid
])
def test_parse_fold_groups(raw, expected):
    assert hq.parse_fold_groups(raw) == expected


def test_consolidation_priority_prefers_approved_then_confidence():
    from legal_mcp.services import halacha_extractor as he
    approved = {"id": "a", "review_status": "approved", "confidence": 0.7,
                "quote_verified": True, "rule_statement": "x"}
    pending_hi = {"id": "b", "review_status": "pending_review", "confidence": 0.95,
                  "quote_verified": True, "rule_statement": "x"}
    # approved sorts before higher-confidence pending → kept as canonical
    assert min([approved, pending_hi], key=he._consolidation_priority)["id"] == "a"