"""Domain-scope tests for search_decisions (GAP-12 / INV-RET1). Policy under test (see CLAUDE.md + tools/search.py): 1. explicit practice_area -> used as-is, search runs; 2. case_number + case.practice_area set -> use case value, runs; 3. case_number + empty case.practice_area but derivable prefix (8xxx) -> derive domain, runs; 4. case_number present but UNDETERMINABLE (case.practice_area empty AND prefix not 1/8/9) -> BLOCK (return Hebrew error, hybrid search NEVER called); 5. no case_number, no practice_area -> warn + proceed (runs). All DB / embedding / hybrid-search / telemetry calls are monkeypatched so the test runs fully offline with no live Postgres or model. """ from __future__ import annotations import asyncio import json from uuid import uuid4 import pytest from legal_mcp.services import db, embeddings, hybrid_search, telemetry from legal_mcp.tools import search as search_tool def _run(coro): return asyncio.run(coro) @pytest.fixture() def patched(monkeypatch: pytest.MonkeyPatch) -> dict: """Patch all I/O boundaries. Record what hybrid_search received. ``calls["hybrid"]`` is appended to ONLY when the real search runs, so asserting ``calls["hybrid"] == []`` proves the search was blocked. """ calls: dict = {"hybrid": [], "cases": {}} async def _embed_query(query: str): return [0.0] * 8 async def _search_documents_hybrid(**kwargs): calls["hybrid"].append(kwargs) # one synthetic hit so the formatting path is exercised return [ { "score": 0.9, "case_number": "X", "document_title": "doc", "section_type": "facts", "page_number": 1, "content": "hit", "match_type": "text", "image_thumbnail_path": None, } ] async def _get_case_by_number(case_number: str): return calls["cases"].get(case_number) def _log_search_bg(**kwargs): return None monkeypatch.setattr(embeddings, "embed_query", _embed_query) monkeypatch.setattr( hybrid_search, "search_documents_hybrid", _search_documents_hybrid ) monkeypatch.setattr(db, "get_case_by_number", _get_case_by_number) monkeypatch.setattr(telemetry, "log_search_bg", _log_search_bg) return calls def test_explicit_practice_area_used(patched: dict) -> None: out = _run( search_tool.search_decisions( query="זכויות בנייה", practice_area="betterment_levy" ) ) assert len(patched["hybrid"]) == 1 assert patched["hybrid"][0]["practice_area"] == "betterment_levy" # explicit value must not trigger a case lookup assert patched["cases"] == {} # ran -> JSON result, not an error string assert json.loads(out)["status"] == "ok" assert json.loads(out)["data"][0]["content"] == "hit" def test_case_practice_area_used(patched: dict) -> None: patched["cases"]["8126/25"] = { "id": str(uuid4()), "practice_area": "betterment_levy", "appeal_subtype": "betterment_levy", } out = _run( search_tool.search_decisions(query="היטל", case_number="8126/25") ) assert len(patched["hybrid"]) == 1 assert patched["hybrid"][0]["practice_area"] == "betterment_levy" assert json.loads(out)["status"] == "ok" assert json.loads(out)["data"][0]["content"] == "hit" def test_case_empty_practice_area_derived_from_prefix(patched: dict) -> None: # case row exists but practice_area is empty -> derive from 8xxx prefix patched["cases"]["8126/25"] = { "id": str(uuid4()), "practice_area": "", "appeal_subtype": "", } out = _run( search_tool.search_decisions(query="היטל", case_number="8126/25") ) assert len(patched["hybrid"]) == 1 assert patched["hybrid"][0]["practice_area"] == "betterment_levy" assert json.loads(out)["status"] == "ok" assert json.loads(out)["data"][0]["content"] == "hit" def test_case_undeterminable_is_blocked(patched: dict) -> None: # case exists, empty practice_area, and prefix is NOT 1/8/9 -> block patched["cases"]["7777/25"] = { "id": str(uuid4()), "practice_area": "", "appeal_subtype": "", } out = _run( search_tool.search_decisions(query="משהו", case_number="7777/25") ) # hybrid search must NOT have been called assert patched["hybrid"] == [] # GAP-48: returns the {status,data,message} envelope with status="error" parsed = json.loads(out) assert parsed["status"] == "error" assert "7777/25" in parsed["message"] def test_no_case_no_practice_area_proceeds(patched: dict) -> None: # exploratory / chat search: cross-domain, must NOT be blocked out = _run(search_tool.search_decisions(query="חיפוש חופשי")) assert len(patched["hybrid"]) == 1 assert patched["hybrid"][0]["practice_area"] is None assert patched["cases"] == {} assert json.loads(out)["status"] == "ok" assert json.loads(out)["data"][0]["content"] == "hit"