"""Unit tests for the tri-model panel extraction core (#152, Phase A). Pure logic only — classify (the chair's approval rule), _coerce_list (judge-reply normalisation), and cluster_candidates (cross-model matching/voting) with injected embeddings. No LLM, no Voyage, no DB. """ from __future__ import annotations import pytest from legal_mcp import config from legal_mcp.services import panel_extraction as pe # ── classify — chaim's rule ──────────────────────────────────────── def test_classify_three_votes_approves_regardless_of_score(): assert pe.classify(3, 0.10) == "approved" assert pe.classify(3, 0.99) == "approved" def test_classify_two_votes_gated_by_floor(): floor = config.HALACHA_PANEL_SCORE_FLOOR assert pe.classify(2, floor) == "approved" assert pe.classify(2, floor + 0.05) == "approved" assert pe.classify(2, floor - 0.01) == "pending_review" def test_classify_one_or_zero_votes_rejected(): assert pe.classify(1, 0.99) == "rejected" assert pe.classify(0, 0.99) == "rejected" # ── _coerce_list — judge reply normalisation ─────────────────────── def test_coerce_list_accepts_bare_list(): raw = [{"rule_statement": "כלל", "supporting_quote": "ציטוט", "score": 0.9}] out = pe._coerce_list(raw) assert len(out) == 1 and out[0]["rule_type"] == "interpretive" def test_coerce_list_unwraps_dict_wrapper_and_drops_incomplete(): raw = {"principles": [ {"rule_statement": "כלל", "supporting_quote": "ציטוט", "rule_type": "holding", "score": 1.5}, {"rule_statement": "", "supporting_quote": "ציטוט"}, # no rule → drop {"rule_statement": "כלל2", "supporting_quote": ""}, # no quote → drop ]} out = pe._coerce_list(raw) assert len(out) == 1 assert out[0]["rule_type"] == "holding" assert out[0]["score"] == 1.0 # clamped to [0,1] def test_coerce_list_bad_rule_type_falls_back(): out = pe._coerce_list([{"rule_statement": "כלל", "supporting_quote": "צ", "rule_type": "obiter", "score": 0.5}]) assert out[0]["rule_type"] == "interpretive" def test_coerce_list_junk_returns_empty(): assert pe._coerce_list("nonsense") == [] assert pe._coerce_list(None) == [] # ── cluster_candidates — cross-model matching & voting ───────────── def _c(rule, score): return {"rule_statement": rule, "supporting_quote": "q", "reasoning_summary": "", "rule_type": "interpretive", "score": score} def test_cluster_merges_across_models_counts_votes_and_means_score(): # same principle proposed by all three (identical embedding) → 1 cluster, 3 votes a, b, c = _c("X", 0.9), _c("X", 0.8), _c("X", 0.7) per_model = {"claude": [a], "deepseek": [b], "gemini": [c]} embs = {id(a): [1.0, 0.0], id(b): [1.0, 0.0], id(c): [1.0, 0.0]} out = pe.cluster_candidates(per_model, embs) assert len(out) == 1 cl = out[0] assert cl["votes"] == 3 assert cl["score"] == pytest.approx((0.9 + 0.8 + 0.7) / 3, abs=1e-3) assert cl["verdict"] == "approved" assert cl["voters"] == ["claude", "deepseek", "gemini"] def test_cluster_separates_distinct_principles(): a, b = _c("X", 0.9), _c("Y", 0.9) per_model = {"claude": [a, b]} embs = {id(a): [1.0, 0.0], id(b): [0.0, 1.0]} # orthogonal → 2 clusters out = pe.cluster_candidates(per_model, embs) assert len(out) == 2 assert all(cl["votes"] == 1 and cl["verdict"] == "rejected" for cl in out) def test_cluster_same_model_twice_counts_one_vote_keeps_best_score(): # one model proposes two near-dupes; another proposes the same → 2 votes, not 3 a1, a2 = _c("X", 0.6), _c("X", 0.95) b = _c("X", 0.88) per_model = {"claude": [a1, a2], "deepseek": [b]} embs = {id(a1): [1.0, 0.0], id(a2): [1.0, 0.0], id(b): [1.0, 0.0]} out = pe.cluster_candidates(per_model, embs) assert len(out) == 1 cl = out[0] assert cl["votes"] == 2 # claude counts once # claude's best (0.95) and deepseek (0.88) → mean assert cl["score"] == pytest.approx((0.95 + 0.88) / 2, abs=1e-3) assert cl["rule_statement"] == "X" def test_cluster_sorted_strongest_first(): a = _c("X", 0.9) # 1 vote b, c = _c("Y", 0.9), _c("Y", 0.9) # 2 votes per_model = {"claude": [a, b], "deepseek": [c]} embs = {id(a): [1.0, 0.0], id(b): [0.0, 1.0], id(c): [0.0, 1.0]} out = pe.cluster_candidates(per_model, embs) assert out[0]["rule_statement"] == "Y" and out[0]["votes"] == 2 assert out[1]["rule_statement"] == "X" and out[1]["votes"] == 1