merge: GAP-12 — domain-scope search_decisions (INV-RET1)
Some checks failed
Build & Deploy / build-and-deploy (push) Has been cancelled
Some checks failed
Build & Deploy / build-and-deploy (push) Has been cancelled
Derive practice_area from case (case row → number-prefix fallback); block only when a case is present but undeterminable; case-less/exploratory search stays cross-domain. Verified offline (test_search_domain_scope.py 5/5). Closes PR #10.
This commit is contained in:
@@ -7,7 +7,7 @@ import logging
|
||||
import time
|
||||
from uuid import UUID
|
||||
|
||||
from legal_mcp.services import db, embeddings, hybrid_search, telemetry
|
||||
from legal_mcp.services import db, embeddings, hybrid_search, practice_area as pa, telemetry
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -30,7 +30,9 @@ async def search_decisions(
|
||||
appeal_subtype: סוג ערר לסינון (building_permit/betterment_levy/compensation_197)
|
||||
case_number: אם סופק, ה-practice_area/subtype יוסקו אוטומטית מהתיק
|
||||
"""
|
||||
# Auto-resolve practice_area from case_number if available
|
||||
# Auto-resolve practice_area from case_number if available (GAP-12 / INV-RET1):
|
||||
# explicit practice_area wins; otherwise derive from the case so the search is
|
||||
# scoped to the case's legal domain. Case-less search stays cross-domain.
|
||||
resolved_case_id: UUID | None = None
|
||||
if case_number and not practice_area:
|
||||
case = await db.get_case_by_number(case_number)
|
||||
@@ -42,6 +44,22 @@ async def search_decisions(
|
||||
except (KeyError, ValueError, TypeError):
|
||||
resolved_case_id = None
|
||||
|
||||
# Case row had no practice_area — fall back to deriving from the
|
||||
# case-number prefix (1xxx/8xxx/9xxx). Returns "" for unknown prefixes.
|
||||
if not practice_area:
|
||||
practice_area = pa.derive_domain_practice_area(case_number)
|
||||
|
||||
# Still undeterminable: a case is present but we cannot scope the
|
||||
# search to its domain. This is a data anomaly — BLOCK rather than
|
||||
# silently running a cross-domain search for a specific case.
|
||||
if not practice_area:
|
||||
return (
|
||||
f"שגיאה: לא ניתן לקבוע את התחום המשפטי (practice_area) של תיק "
|
||||
f"{case_number}. לתיק אין practice_area מוגדר ולא ניתן להסיק אותו "
|
||||
f"ממספר התיק. זוהי אנומליית נתונים — נא להגדיר את ה-practice_area "
|
||||
f"של התיק (למשל דרך case_update) לפני הרצת חיפוש מסונן לתיק זה."
|
||||
)
|
||||
|
||||
if not practice_area:
|
||||
logger.warning(
|
||||
"search_decisions called without practice_area filter — "
|
||||
|
||||
144
mcp-server/tests/test_search_domain_scope.py
Normal file
144
mcp-server/tests/test_search_domain_scope.py
Normal file
@@ -0,0 +1,144 @@
|
||||
"""Domain-scope tests for search_decisions (GAP-12 / INV-RET1).
|
||||
|
||||
Policy under test (see CLAUDE.md + tools/search.py):
|
||||
1. explicit practice_area -> used as-is, search runs;
|
||||
2. case_number + case.practice_area set -> use case value, runs;
|
||||
3. case_number + empty case.practice_area but derivable prefix (8xxx)
|
||||
-> derive domain, runs;
|
||||
4. case_number present but UNDETERMINABLE (case.practice_area empty AND
|
||||
prefix not 1/8/9) -> BLOCK (return Hebrew error, hybrid search
|
||||
NEVER called);
|
||||
5. no case_number, no practice_area -> warn + proceed (runs).
|
||||
|
||||
All DB / embedding / hybrid-search / telemetry calls are monkeypatched so
|
||||
the test runs fully offline with no live Postgres or model.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
from uuid import uuid4
|
||||
|
||||
import pytest
|
||||
|
||||
from legal_mcp.services import db, embeddings, hybrid_search, telemetry
|
||||
from legal_mcp.tools import search as search_tool
|
||||
|
||||
|
||||
def _run(coro):
|
||||
return asyncio.run(coro)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def patched(monkeypatch: pytest.MonkeyPatch) -> dict:
|
||||
"""Patch all I/O boundaries. Record what hybrid_search received.
|
||||
|
||||
``calls["hybrid"]`` is appended to ONLY when the real search runs, so
|
||||
asserting ``calls["hybrid"] == []`` proves the search was blocked.
|
||||
"""
|
||||
calls: dict = {"hybrid": [], "cases": {}}
|
||||
|
||||
async def _embed_query(query: str):
|
||||
return [0.0] * 8
|
||||
|
||||
async def _search_documents_hybrid(**kwargs):
|
||||
calls["hybrid"].append(kwargs)
|
||||
# one synthetic hit so the formatting path is exercised
|
||||
return [
|
||||
{
|
||||
"score": 0.9,
|
||||
"case_number": "X",
|
||||
"document_title": "doc",
|
||||
"section_type": "facts",
|
||||
"page_number": 1,
|
||||
"content": "hit",
|
||||
"match_type": "text",
|
||||
"image_thumbnail_path": None,
|
||||
}
|
||||
]
|
||||
|
||||
async def _get_case_by_number(case_number: str):
|
||||
return calls["cases"].get(case_number)
|
||||
|
||||
def _log_search_bg(**kwargs):
|
||||
return None
|
||||
|
||||
monkeypatch.setattr(embeddings, "embed_query", _embed_query)
|
||||
monkeypatch.setattr(
|
||||
hybrid_search, "search_documents_hybrid", _search_documents_hybrid
|
||||
)
|
||||
monkeypatch.setattr(db, "get_case_by_number", _get_case_by_number)
|
||||
monkeypatch.setattr(telemetry, "log_search_bg", _log_search_bg)
|
||||
return calls
|
||||
|
||||
|
||||
def test_explicit_practice_area_used(patched: dict) -> None:
|
||||
out = _run(
|
||||
search_tool.search_decisions(
|
||||
query="זכויות בנייה", practice_area="betterment_levy"
|
||||
)
|
||||
)
|
||||
assert len(patched["hybrid"]) == 1
|
||||
assert patched["hybrid"][0]["practice_area"] == "betterment_levy"
|
||||
# explicit value must not trigger a case lookup
|
||||
assert patched["cases"] == {}
|
||||
# ran -> JSON result, not an error string
|
||||
assert json.loads(out)[0]["content"] == "hit"
|
||||
|
||||
|
||||
def test_case_practice_area_used(patched: dict) -> None:
|
||||
patched["cases"]["8126/25"] = {
|
||||
"id": str(uuid4()),
|
||||
"practice_area": "betterment_levy",
|
||||
"appeal_subtype": "betterment_levy",
|
||||
}
|
||||
out = _run(
|
||||
search_tool.search_decisions(query="היטל", case_number="8126/25")
|
||||
)
|
||||
assert len(patched["hybrid"]) == 1
|
||||
assert patched["hybrid"][0]["practice_area"] == "betterment_levy"
|
||||
assert json.loads(out)[0]["content"] == "hit"
|
||||
|
||||
|
||||
def test_case_empty_practice_area_derived_from_prefix(patched: dict) -> None:
|
||||
# case row exists but practice_area is empty -> derive from 8xxx prefix
|
||||
patched["cases"]["8126/25"] = {
|
||||
"id": str(uuid4()),
|
||||
"practice_area": "",
|
||||
"appeal_subtype": "",
|
||||
}
|
||||
out = _run(
|
||||
search_tool.search_decisions(query="היטל", case_number="8126/25")
|
||||
)
|
||||
assert len(patched["hybrid"]) == 1
|
||||
assert patched["hybrid"][0]["practice_area"] == "betterment_levy"
|
||||
assert json.loads(out)[0]["content"] == "hit"
|
||||
|
||||
|
||||
def test_case_undeterminable_is_blocked(patched: dict) -> None:
|
||||
# case exists, empty practice_area, and prefix is NOT 1/8/9 -> block
|
||||
patched["cases"]["7777/25"] = {
|
||||
"id": str(uuid4()),
|
||||
"practice_area": "",
|
||||
"appeal_subtype": "",
|
||||
}
|
||||
out = _run(
|
||||
search_tool.search_decisions(query="משהו", case_number="7777/25")
|
||||
)
|
||||
# hybrid search must NOT have been called
|
||||
assert patched["hybrid"] == []
|
||||
# returns a Hebrew error string, not JSON
|
||||
assert out.startswith("שגיאה")
|
||||
assert "7777/25" in out
|
||||
with pytest.raises(json.JSONDecodeError):
|
||||
json.loads(out)
|
||||
|
||||
|
||||
def test_no_case_no_practice_area_proceeds(patched: dict) -> None:
|
||||
# exploratory / chat search: cross-domain, must NOT be blocked
|
||||
out = _run(search_tool.search_decisions(query="חיפוש חופשי"))
|
||||
assert len(patched["hybrid"]) == 1
|
||||
assert patched["hybrid"][0]["practice_area"] is None
|
||||
assert patched["cases"] == {}
|
||||
assert json.loads(out)[0]["content"] == "hit"
|
||||
Reference in New Issue
Block a user