Compare commits
1 Commits
system-spe
...
fix/fu4-co
| Author | SHA1 | Date | |
|---|---|---|---|
| 1af689a969 |
@@ -3165,7 +3165,10 @@ async def search_precedent_library_semantic(
|
||||
of halacha review status.
|
||||
"""
|
||||
pool = await get_pool()
|
||||
halacha_filters = ["h.review_status IN ('approved', 'published')"]
|
||||
halacha_filters = [
|
||||
"h.review_status IN ('approved', 'published')",
|
||||
f"cl.source_kind = '{source_kind}'",
|
||||
]
|
||||
chunk_filters = [f"cl.source_kind = '{source_kind}'"]
|
||||
h_params: list = [query_embedding, limit]
|
||||
c_params: list = [query_embedding, limit]
|
||||
@@ -3398,7 +3401,10 @@ async def search_precedent_library_lexical(
|
||||
return []
|
||||
|
||||
pool = await get_pool()
|
||||
halacha_filters = ["h.review_status IN ('approved', 'published')"]
|
||||
halacha_filters = [
|
||||
"h.review_status IN ('approved', 'published')",
|
||||
f"cl.source_kind = '{source_kind}'",
|
||||
]
|
||||
chunk_filters = [f"cl.source_kind = '{source_kind}'"]
|
||||
# $1 = query, $2 = limit. Filters append starting at $3.
|
||||
h_params: list = [query, limit]
|
||||
|
||||
97
mcp-server/tests/test_precedent_corpus_isolation.py
Normal file
97
mcp-server/tests/test_precedent_corpus_isolation.py
Normal file
@@ -0,0 +1,97 @@
|
||||
"""Regression test for GAP-10 / INV-RET1: corpus separation enforced on
|
||||
EVERY precedent-library query path — including the halacha sub-query.
|
||||
|
||||
Bug: ``search_precedent_library_semantic`` and
|
||||
``search_precedent_library_lexical`` filtered the *chunk* sub-query by
|
||||
``cl.source_kind`` but NOT the *halacha* sub-query. So an external
|
||||
(``source_kind='external_upload'``) search leaked internal-committee
|
||||
halachot, and an internal search leaked external-ruling halachot — a
|
||||
cross-corpus contamination of the rule-level results.
|
||||
|
||||
Fix: the same ``cl.source_kind = '<kind>'`` predicate that gates the
|
||||
chunk query now also gates the halacha query, in BOTH functions.
|
||||
|
||||
This test runs fully OFFLINE — it monkeypatches ``db.get_pool`` with a
|
||||
fake pool that captures every SQL string passed to ``fetch`` instead of
|
||||
hitting Postgres. It asserts the captured halacha SQL carries the
|
||||
source_kind predicate identical to the chunk SQL.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
|
||||
import pytest
|
||||
|
||||
from legal_mcp.services import db
|
||||
|
||||
|
||||
class _FakePool:
|
||||
"""Captures SQL passed to ``fetch``; returns no rows."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.queries: list[str] = []
|
||||
|
||||
async def fetch(self, sql: str, *args) -> list: # noqa: ANN002
|
||||
self.queries.append(sql)
|
||||
return []
|
||||
|
||||
|
||||
def _classify(queries: list[str]) -> tuple[str, str]:
|
||||
"""Return (halacha_sql, chunk_sql) from the captured queries."""
|
||||
halacha = next(q for q in queries if "FROM halachot h" in q)
|
||||
chunk = next(q for q in queries if "FROM precedent_chunks pc" in q)
|
||||
return halacha, chunk
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def fake_pool(monkeypatch: pytest.MonkeyPatch) -> _FakePool:
|
||||
pool = _FakePool()
|
||||
|
||||
async def _get_pool() -> _FakePool:
|
||||
return pool
|
||||
|
||||
monkeypatch.setattr(db, "get_pool", _get_pool)
|
||||
return pool
|
||||
|
||||
|
||||
@pytest.mark.parametrize("source_kind", ["external_upload", "internal_committee"])
|
||||
def test_semantic_halacha_query_is_source_kind_scoped(
|
||||
fake_pool: _FakePool, source_kind: str
|
||||
) -> None:
|
||||
asyncio.run(
|
||||
db.search_precedent_library_semantic(
|
||||
query_embedding=[0.0] * 8,
|
||||
source_kind=source_kind,
|
||||
include_halachot=True,
|
||||
limit=5,
|
||||
)
|
||||
)
|
||||
halacha_sql, chunk_sql = _classify(fake_pool.queries)
|
||||
predicate = f"cl.source_kind = '{source_kind}'"
|
||||
assert predicate in chunk_sql, "chunk query must be source_kind-scoped (precondition)"
|
||||
assert predicate in halacha_sql, (
|
||||
"halacha query MUST carry the same source_kind predicate as the "
|
||||
"chunk query — otherwise cross-corpus halacha leakage (GAP-10)"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("source_kind", ["external_upload", "internal_committee"])
|
||||
def test_lexical_halacha_query_is_source_kind_scoped(
|
||||
fake_pool: _FakePool, source_kind: str
|
||||
) -> None:
|
||||
asyncio.run(
|
||||
db.search_precedent_library_lexical(
|
||||
query="zoning setback",
|
||||
source_kind=source_kind,
|
||||
include_halachot=True,
|
||||
limit=5,
|
||||
)
|
||||
)
|
||||
halacha_sql, chunk_sql = _classify(fake_pool.queries)
|
||||
predicate = f"cl.source_kind = '{source_kind}'"
|
||||
assert predicate in chunk_sql, "chunk query must be source_kind-scoped (precondition)"
|
||||
assert predicate in halacha_sql, (
|
||||
"halacha query MUST carry the same source_kind predicate as the "
|
||||
"chunk query — otherwise cross-corpus halacha leakage (GAP-10)"
|
||||
)
|
||||
Reference in New Issue
Block a user