"""Unit tests for canonical_statement synthesis (V41 Phase 4) — INV-LRN6 / INV-AH. Pure-helper coverage + the grounding/drift/citation gates of synthesize_canonical, with db / claude_session / embeddings monkeypatched (no DB, no LLM, no Voyage). """ from __future__ import annotations import asyncio from uuid import uuid4 import pytest from legal_mcp.services import canonical_synthesis as cs CID = uuid4() # ── pure helpers ─────────────────────────────────────────────────── def test_cosine_identity_and_orthogonal(): assert cs._cosine([1.0, 0.0], [1.0, 0.0]) == pytest.approx(1.0) assert cs._cosine([1.0, 0.0], [0.0, 1.0]) == pytest.approx(0.0) assert cs._cosine([0.0, 0.0], [1.0, 1.0]) == 0.0 # zero-norm guard def test_new_citations_flags_invented_docket_only(): src = 'העיקרון מתוך ערר 1234/05 והלכה נוספת' # statute section is fine; shared docket is fine; new docket flagged out = 'לפי סעיף 197 לחוק, וכפי שנקבע בערר 1234/05 ובעע"מ 9999/21' assert cs._new_citations(out, src) == ['9999/21'] assert cs._new_citations('סעיף 197 לחוק התכנון והבניה', src) == [] def _data(*, statement="עיקרון מקורי נקי", instances=None, embedding=None): return { "id": str(CID), "canonical_statement": statement, "practice_areas": [], "subject_tags": [], "review_status": "pending_synthesis", "instance_count": len(instances or [{}]), "embedding": embedding, "instances": instances if instances is not None else [ {"instance_type": "original", "treatment": "mentioned", "rule_statement": "עיקרון מקורי נקי", "supporting_quote": "ציטוט תומך מהפסיקה", "reasoning_summary": "", "case_number": "1234-01-20", "case_name": "פלוני"}, ], } def _patch(monkeypatch, *, data, llm, emb=None): async def fake_fetch(_cid): return data async def fake_query(*a, **k): return llm async def fake_embed(texts, input_type="document"): # default: proposed embeds identical to a [1,0] source → drift 1.0 return [emb([t]) if emb else [1.0, 0.0] for t in texts] monkeypatch.setattr(cs.db, "fetch_canonical_synthesis_input", fake_fetch) monkeypatch.setattr(cs.claude_session, "query_json", fake_query) monkeypatch.setattr(cs.embeddings, "embed_texts", fake_embed) def _run(monkeypatch, **kw): return asyncio.run(cs.synthesize_canonical(CID, **kw)) # ── gate behaviour ───────────────────────────────────────────────── def test_accepted_when_grounded_and_low_drift(monkeypatch): _patch(monkeypatch, data=_data(embedding=[1.0, 0.0]), llm={"canonical_statement": "עיקרון מזוקק כללי", "grounded": True, "changed": True, "reason": "זוקק"}) res = _run(monkeypatch) assert res["status"] == "accepted" and res["accepted"] is True assert res["proposed"] == "עיקרון מזוקק כללי" assert res["embedding"] == [1.0, 0.0] assert res["drift_cosine"] == pytest.approx(1.0) def test_abstained_when_not_grounded(monkeypatch): _patch(monkeypatch, data=_data(), llm={"canonical_statement": "x", "grounded": False, "reason": "אין עיגון"}) res = _run(monkeypatch) assert res["status"] == "abstained" and res["accepted"] is False assert res["proposed"] == res["original"] # original kept def test_abstained_when_no_change(monkeypatch): _patch(monkeypatch, data=_data(statement="זהה"), llm={"canonical_statement": "זהה", "grounded": True}) assert _run(monkeypatch)["status"] == "abstained" def test_drift_rejected_keeps_original(monkeypatch): # source [1,0], proposed embeds to [0,1] → cosine 0 < floor _patch(monkeypatch, data=_data(embedding=[1.0, 0.0]), llm={"canonical_statement": "עיקרון אחר לגמרי", "grounded": True}, emb=lambda t: [0.0, 1.0]) res = _run(monkeypatch, drift_floor=0.80) assert res["status"] == "drift_rejected" and res["accepted"] is False assert res["drift_cosine"] == pytest.approx(0.0) assert res["proposed"] == "עיקרון אחר לגמרי" # surfaced for audit, not committed def test_new_citation_rejected(monkeypatch): _patch(monkeypatch, data=_data(embedding=[1.0, 0.0]), llm={"canonical_statement": 'עיקרון עם ציטוט חדש עע"מ 8888/22', "grounded": True}) res = _run(monkeypatch) assert res["status"] == "new_citation" and res["accepted"] is False def test_no_instances(monkeypatch): _patch(monkeypatch, data=_data(instances=[]), llm={"canonical_statement": "x", "grounded": True}) assert _run(monkeypatch)["status"] == "no_instances" def test_llm_error_on_none(monkeypatch): _patch(monkeypatch, data=_data(), llm=None) assert _run(monkeypatch)["status"] == "llm_error" def test_not_found(monkeypatch): async def none_fetch(_cid): return None monkeypatch.setattr(cs.db, "fetch_canonical_synthesis_input", none_fetch) assert asyncio.run(cs.synthesize_canonical(CID))["status"] == "not_found"