feat(principles): decision-level panel extraction regime — cap-5 + dedup-frees-slot (Phase B, #152)
extract() routes to _extract_via_panel when HALACHA_PANEL_REGIME_ENABLED: the 3-model panel proposes → votes/score → approval rule → dedup vs corpus (known links as citation, frees a cap slot) → cap HALACHA_PANEL_MAX_NEW genuinely-new principles/decision (by score), rest dropped. Replaces single-model auto-approve; legacy path kept as <2-judge fallback. db.store_panel_principles persists the pre-decided verdict + source-aware canonical create/link (G9 reviewer=panel:...). Dry-run validated on 29468-08-23: ~18 → 4 principles. 6 new tests; full suite 422 green. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
92
mcp-server/tests/test_panel_extract_selection.py
Normal file
92
mcp-server/tests/test_panel_extract_selection.py
Normal file
@@ -0,0 +1,92 @@
|
||||
"""Phase B selection logic — cap-of-5 + dedup-frees-slot in _extract_via_panel (#152).
|
||||
|
||||
Drives the orchestrator in dry_run mode with panel_extract / corpus-dedup / chunk
|
||||
selection monkeypatched, so the cap and the "linked-existing frees a slot" rule
|
||||
are verified without LLM/DB.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from uuid import uuid4
|
||||
|
||||
import pytest
|
||||
|
||||
from legal_mcp import config
|
||||
from legal_mcp.services import halacha_extractor as he
|
||||
|
||||
CID = uuid4()
|
||||
|
||||
|
||||
def _cluster(rule, verdict, votes=3, score=0.9):
|
||||
return {"rule_statement": rule, "supporting_quote": f"q:{rule}",
|
||||
"reasoning_summary": "", "rule_type": "interpretive",
|
||||
"votes": votes, "score": score, "voters": ["claude", "deepseek", "gemini"][:votes],
|
||||
"verdict": verdict, "embedding": [1.0, 0.0]}
|
||||
|
||||
|
||||
def _patch_common(monkeypatch, clusters):
|
||||
async def fake_case(_id):
|
||||
return {"id": CID, "source_kind": "external_upload", "is_binding": True,
|
||||
"full_text": " ".join(f"q:{c['rule_statement']}" for c in clusters)}
|
||||
|
||||
async def fake_chunks(_id):
|
||||
return ([{"content": "reasoning text"}], False)
|
||||
|
||||
async def fake_panel(text, **kw):
|
||||
return clusters
|
||||
|
||||
async def none_match(emb, threshold=0.85, status_filter=()):
|
||||
return None # default: nothing known → all new (tests override per-case)
|
||||
|
||||
monkeypatch.setattr(he.db, "get_case_law", fake_case)
|
||||
monkeypatch.setattr(he, "_select_extractable_chunks", fake_chunks)
|
||||
monkeypatch.setattr(he.panel_extraction, "panel_extract", fake_panel)
|
||||
monkeypatch.setattr(he.db, "nearest_canonical_halacha", none_match)
|
||||
|
||||
|
||||
def _run(monkeypatch, clusters, nearest_fn=None):
|
||||
if nearest_fn:
|
||||
monkeypatch.setattr(he.db, "nearest_canonical_halacha", nearest_fn)
|
||||
return asyncio.run(he._extract_via_panel(CID, dry_run=True))
|
||||
|
||||
|
||||
def test_drops_rejected_keeps_approved_and_pending(monkeypatch):
|
||||
clusters = [
|
||||
_cluster("A", "approved"), _cluster("B", "pending_review", votes=2, score=0.7),
|
||||
_cluster("C", "rejected", votes=1, score=0.9),
|
||||
]
|
||||
_patch_common(monkeypatch, clusters)
|
||||
res = _run(monkeypatch, clusters)
|
||||
rules = [p["rule_statement"] for p in res["to_store"]]
|
||||
assert "A" in rules and "B" in rules and "C" not in rules
|
||||
|
||||
|
||||
def test_cap_limits_new_to_max(monkeypatch):
|
||||
monkeypatch.setattr(config, "HALACHA_PANEL_MAX_NEW", 3)
|
||||
clusters = [_cluster(f"R{i}", "approved") for i in range(6)]
|
||||
_patch_common(monkeypatch, clusters)
|
||||
|
||||
async def none_match(emb, threshold=0.85, status_filter=()):
|
||||
return None # all new
|
||||
res = _run(monkeypatch, clusters, none_match)
|
||||
assert res["new"] == 3 and res["dropped_over_cap"] == 3
|
||||
assert len(res["to_store"]) == 3
|
||||
|
||||
|
||||
def test_linked_existing_does_not_consume_cap(monkeypatch):
|
||||
monkeypatch.setattr(config, "HALACHA_PANEL_MAX_NEW", 2)
|
||||
# 5 candidates; the first 3 are "known" (link), last 2 are new
|
||||
clusters = [_cluster(f"K{i}", "approved") for i in range(3)] + \
|
||||
[_cluster(f"N{i}", "approved") for i in range(2)]
|
||||
_patch_common(monkeypatch, clusters)
|
||||
known = {"K0", "K1", "K2"}
|
||||
|
||||
async def nearest(emb, threshold=0.85, status_filter=()):
|
||||
# called per candidate in order; pop from a queue mirroring clusters
|
||||
rule = nearest._order.pop(0)
|
||||
return ("canon", 0.99) if rule in known else None
|
||||
nearest._order = [c["rule_statement"] for c in clusters]
|
||||
res = _run(monkeypatch, clusters, nearest)
|
||||
# 3 linked (free) + 2 new (within cap) → all 5 stored, nothing dropped
|
||||
assert res["linked"] == 3 and res["new"] == 2 and res["dropped_over_cap"] == 0
|
||||
assert len(res["to_store"]) == 5
|
||||
Reference in New Issue
Block a user