legal-ai/mcp-server/tests/test_panel_extract_selection.py

"""Phase B selection logic — cap-of-5 + dedup-frees-slot in _extract_via_panel (#152).

Drives the orchestrator in dry_run mode with panel_extract / corpus-dedup / chunk
selection monkeypatched, so the cap and the "linked-existing frees a slot" rule
are verified without LLM/DB.
"""
from __future__ import annotations

import asyncio
from uuid import uuid4

import pytest

from legal_mcp import config
from legal_mcp.services import halacha_extractor as he

CID = uuid4()


def _cluster(rule, verdict, votes=3, score=0.9):
    return {"rule_statement": rule, "supporting_quote": f"q:{rule}",
            "reasoning_summary": "", "rule_type": "interpretive",
            "votes": votes, "score": score, "voters": ["claude", "deepseek", "gemini"][:votes],
            "verdict": verdict, "embedding": [1.0, 0.0]}


def _patch_common(monkeypatch, clusters):
    async def fake_case(_id):
        return {"id": CID, "source_kind": "external_upload", "is_binding": True,
                "full_text": " ".join(f"q:{c['rule_statement']}" for c in clusters)}

    async def fake_chunks(_id):
        return ([{"content": "reasoning text"}], False)

    async def fake_panel(text, **kw):
        return clusters

    async def none_match(emb, threshold=0.85, status_filter=()):
        return None  # default: nothing known → all new (tests override per-case)

    monkeypatch.setattr(he.db, "get_case_law", fake_case)
    monkeypatch.setattr(he, "_select_extractable_chunks", fake_chunks)
    monkeypatch.setattr(he.panel_extraction, "panel_extract", fake_panel)
    monkeypatch.setattr(he.db, "nearest_canonical_halacha", none_match)


def _run(monkeypatch, clusters, nearest_fn=None):
    if nearest_fn:
        monkeypatch.setattr(he.db, "nearest_canonical_halacha", nearest_fn)
    return asyncio.run(he._extract_via_panel(CID, dry_run=True))


def test_drops_rejected_keeps_approved_and_pending(monkeypatch):
    clusters = [
        _cluster("A", "approved"), _cluster("B", "pending_review", votes=2, score=0.7),
        _cluster("C", "rejected", votes=1, score=0.9),
    ]
    _patch_common(monkeypatch, clusters)
    res = _run(monkeypatch, clusters)
    rules = [p["rule_statement"] for p in res["to_store"]]
    assert "A" in rules and "B" in rules and "C" not in rules


def test_cap_limits_new_to_max(monkeypatch):
    monkeypatch.setattr(config, "HALACHA_PANEL_MAX_NEW", 3)
    clusters = [_cluster(f"R{i}", "approved") for i in range(6)]
    _patch_common(monkeypatch, clusters)

    async def none_match(emb, threshold=0.85, status_filter=()):
        return None  # all new
    res = _run(monkeypatch, clusters, none_match)
    assert res["new"] == 3 and res["dropped_over_cap"] == 3
    assert len(res["to_store"]) == 3


def test_linked_existing_does_not_consume_cap(monkeypatch):
    monkeypatch.setattr(config, "HALACHA_PANEL_MAX_NEW", 2)
    # 5 candidates; the first 3 are "known" (link), last 2 are new
    clusters = [_cluster(f"K{i}", "approved") for i in range(3)] + \
               [_cluster(f"N{i}", "approved") for i in range(2)]
    _patch_common(monkeypatch, clusters)
    known = {"K0", "K1", "K2"}

    async def nearest(emb, threshold=0.85, status_filter=()):
        # called per candidate in order; pop from a queue mirroring clusters
        rule = nearest._order.pop(0)
        return ("canon", 0.99) if rule in known else None
    nearest._order = [c["rule_statement"] for c in clusters]
    res = _run(monkeypatch, clusters, nearest)
    # 3 linked (free) + 2 new (within cap) → all 5 stored, nothing dropped
    assert res["linked"] == 3 and res["new"] == 2 and res["dropped_over_cap"] == 0
    assert len(res["to_store"]) == 5