Compare commits
1 Commits
system-spe
...
fix/fu4-co
| Author | SHA1 | Date | |
|---|---|---|---|
| 1af689a969 |
@@ -3165,7 +3165,10 @@ async def search_precedent_library_semantic(
|
|||||||
of halacha review status.
|
of halacha review status.
|
||||||
"""
|
"""
|
||||||
pool = await get_pool()
|
pool = await get_pool()
|
||||||
halacha_filters = ["h.review_status IN ('approved', 'published')"]
|
halacha_filters = [
|
||||||
|
"h.review_status IN ('approved', 'published')",
|
||||||
|
f"cl.source_kind = '{source_kind}'",
|
||||||
|
]
|
||||||
chunk_filters = [f"cl.source_kind = '{source_kind}'"]
|
chunk_filters = [f"cl.source_kind = '{source_kind}'"]
|
||||||
h_params: list = [query_embedding, limit]
|
h_params: list = [query_embedding, limit]
|
||||||
c_params: list = [query_embedding, limit]
|
c_params: list = [query_embedding, limit]
|
||||||
@@ -3398,7 +3401,10 @@ async def search_precedent_library_lexical(
|
|||||||
return []
|
return []
|
||||||
|
|
||||||
pool = await get_pool()
|
pool = await get_pool()
|
||||||
halacha_filters = ["h.review_status IN ('approved', 'published')"]
|
halacha_filters = [
|
||||||
|
"h.review_status IN ('approved', 'published')",
|
||||||
|
f"cl.source_kind = '{source_kind}'",
|
||||||
|
]
|
||||||
chunk_filters = [f"cl.source_kind = '{source_kind}'"]
|
chunk_filters = [f"cl.source_kind = '{source_kind}'"]
|
||||||
# $1 = query, $2 = limit. Filters append starting at $3.
|
# $1 = query, $2 = limit. Filters append starting at $3.
|
||||||
h_params: list = [query, limit]
|
h_params: list = [query, limit]
|
||||||
|
|||||||
97
mcp-server/tests/test_precedent_corpus_isolation.py
Normal file
97
mcp-server/tests/test_precedent_corpus_isolation.py
Normal file
@@ -0,0 +1,97 @@
|
|||||||
|
"""Regression test for GAP-10 / INV-RET1: corpus separation enforced on
|
||||||
|
EVERY precedent-library query path — including the halacha sub-query.
|
||||||
|
|
||||||
|
Bug: ``search_precedent_library_semantic`` and
|
||||||
|
``search_precedent_library_lexical`` filtered the *chunk* sub-query by
|
||||||
|
``cl.source_kind`` but NOT the *halacha* sub-query. So an external
|
||||||
|
(``source_kind='external_upload'``) search leaked internal-committee
|
||||||
|
halachot, and an internal search leaked external-ruling halachot — a
|
||||||
|
cross-corpus contamination of the rule-level results.
|
||||||
|
|
||||||
|
Fix: the same ``cl.source_kind = '<kind>'`` predicate that gates the
|
||||||
|
chunk query now also gates the halacha query, in BOTH functions.
|
||||||
|
|
||||||
|
This test runs fully OFFLINE — it monkeypatches ``db.get_pool`` with a
|
||||||
|
fake pool that captures every SQL string passed to ``fetch`` instead of
|
||||||
|
hitting Postgres. It asserts the captured halacha SQL carries the
|
||||||
|
source_kind predicate identical to the chunk SQL.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from legal_mcp.services import db
|
||||||
|
|
||||||
|
|
||||||
|
class _FakePool:
|
||||||
|
"""Captures SQL passed to ``fetch``; returns no rows."""
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.queries: list[str] = []
|
||||||
|
|
||||||
|
async def fetch(self, sql: str, *args) -> list: # noqa: ANN002
|
||||||
|
self.queries.append(sql)
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def _classify(queries: list[str]) -> tuple[str, str]:
|
||||||
|
"""Return (halacha_sql, chunk_sql) from the captured queries."""
|
||||||
|
halacha = next(q for q in queries if "FROM halachot h" in q)
|
||||||
|
chunk = next(q for q in queries if "FROM precedent_chunks pc" in q)
|
||||||
|
return halacha, chunk
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def fake_pool(monkeypatch: pytest.MonkeyPatch) -> _FakePool:
|
||||||
|
pool = _FakePool()
|
||||||
|
|
||||||
|
async def _get_pool() -> _FakePool:
|
||||||
|
return pool
|
||||||
|
|
||||||
|
monkeypatch.setattr(db, "get_pool", _get_pool)
|
||||||
|
return pool
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("source_kind", ["external_upload", "internal_committee"])
|
||||||
|
def test_semantic_halacha_query_is_source_kind_scoped(
|
||||||
|
fake_pool: _FakePool, source_kind: str
|
||||||
|
) -> None:
|
||||||
|
asyncio.run(
|
||||||
|
db.search_precedent_library_semantic(
|
||||||
|
query_embedding=[0.0] * 8,
|
||||||
|
source_kind=source_kind,
|
||||||
|
include_halachot=True,
|
||||||
|
limit=5,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
halacha_sql, chunk_sql = _classify(fake_pool.queries)
|
||||||
|
predicate = f"cl.source_kind = '{source_kind}'"
|
||||||
|
assert predicate in chunk_sql, "chunk query must be source_kind-scoped (precondition)"
|
||||||
|
assert predicate in halacha_sql, (
|
||||||
|
"halacha query MUST carry the same source_kind predicate as the "
|
||||||
|
"chunk query — otherwise cross-corpus halacha leakage (GAP-10)"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("source_kind", ["external_upload", "internal_committee"])
|
||||||
|
def test_lexical_halacha_query_is_source_kind_scoped(
|
||||||
|
fake_pool: _FakePool, source_kind: str
|
||||||
|
) -> None:
|
||||||
|
asyncio.run(
|
||||||
|
db.search_precedent_library_lexical(
|
||||||
|
query="zoning setback",
|
||||||
|
source_kind=source_kind,
|
||||||
|
include_halachot=True,
|
||||||
|
limit=5,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
halacha_sql, chunk_sql = _classify(fake_pool.queries)
|
||||||
|
predicate = f"cl.source_kind = '{source_kind}'"
|
||||||
|
assert predicate in chunk_sql, "chunk query must be source_kind-scoped (precondition)"
|
||||||
|
assert predicate in halacha_sql, (
|
||||||
|
"halacha query MUST carry the same source_kind predicate as the "
|
||||||
|
"chunk query — otherwise cross-corpus halacha leakage (GAP-10)"
|
||||||
|
)
|
||||||
Reference in New Issue
Block a user