From 358d82e90e6421a699f88621ffc2b6c6d66e7724 Mon Sep 17 00:00:00 2001 From: Chaim Date: Sat, 30 May 2026 20:57:27 +0000 Subject: [PATCH] feat(retrieval): require practice_area only for internal/cases; enable searchable filter + health visibility (GAP-13, FU-2a) Co-Authored-By: Claude Opus 4.8 (1M context) --- mcp-server/src/legal_mcp/services/db.py | 14 ++++++++++---- mcp-server/src/legal_mcp/services/metrics.py | 4 ++++ mcp-server/tests/test_idempotent_ingest.py | 9 +++++++++ 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/mcp-server/src/legal_mcp/services/db.py b/mcp-server/src/legal_mcp/services/db.py index d2cfedc..632746e 100644 --- a/mcp-server/src/legal_mcp/services/db.py +++ b/mcp-server/src/legal_mcp/services/db.py @@ -2735,8 +2735,12 @@ def _compute_searchable(row: dict, has_embedded_chunk: bool) -> bool: return False if not (row.get("case_name") or "").strip(): return False - if not (row.get("practice_area") or "").strip(): - return False + # practice_area is required only for identifier-keyed corpora (internal + # committee decisions, active cases). External precedents (e.g. בג"ץ) are + # legitimately cross-domain and may have no single practice_area. + if (row.get("source_kind") or "") != "external_upload": + if not (row.get("practice_area") or "").strip(): + return False if not (row.get("source_kind") or "").strip(): return False tags = row.get("subject_tags") or [] @@ -3272,8 +3276,9 @@ async def search_precedent_library_semantic( halacha_filters = [ "h.review_status IN ('approved', 'published')", f"cl.source_kind = '{source_kind}'", + "cl.searchable = true", ] - chunk_filters = [f"cl.source_kind = '{source_kind}'"] + chunk_filters = [f"cl.source_kind = '{source_kind}'", "cl.searchable = true"] h_params: list = [query_embedding, limit] c_params: list = [query_embedding, limit] h_idx = 3 @@ -3508,8 +3513,9 @@ async def search_precedent_library_lexical( halacha_filters = [ "h.review_status IN ('approved', 'published')", f"cl.source_kind = '{source_kind}'", + "cl.searchable = true", ] - chunk_filters = [f"cl.source_kind = '{source_kind}'"] + chunk_filters = [f"cl.source_kind = '{source_kind}'", "cl.searchable = true"] # $1 = query, $2 = limit. Filters append starting at $3. h_params: list = [query, limit] c_params: list = [query, limit] diff --git a/mcp-server/src/legal_mcp/services/metrics.py b/mcp-server/src/legal_mcp/services/metrics.py index ce43dda..c32b4f5 100644 --- a/mcp-server/src/legal_mcp/services/metrics.py +++ b/mcp-server/src/legal_mcp/services/metrics.py @@ -123,6 +123,9 @@ async def get_dashboard() -> dict: total_corpus = await conn.fetchval("SELECT COUNT(*) FROM style_corpus") total_patterns = await conn.fetchval("SELECT COUNT(*) FROM style_patterns") total_case_law = await conn.fetchval("SELECT COUNT(*) FROM case_law") + non_searchable_case_law = await conn.fetchval( + "SELECT COUNT(*) FROM case_law WHERE NOT searchable" + ) # QA summary qa_total = await conn.fetchval("SELECT COUNT(DISTINCT case_id) FROM qa_results") @@ -154,6 +157,7 @@ async def get_dashboard() -> dict: "style_corpus": total_corpus, "style_patterns": total_patterns, "case_law_entries": total_case_law, + "non_searchable_case_law": non_searchable_case_law, }, "cases_by_status": cases_by_status, "qa": { diff --git a/mcp-server/tests/test_idempotent_ingest.py b/mcp-server/tests/test_idempotent_ingest.py index 04b5f08..a78b754 100644 --- a/mcp-server/tests/test_idempotent_ingest.py +++ b/mcp-server/tests/test_idempotent_ingest.py @@ -70,6 +70,15 @@ def test_compute_searchable_false_without_core_fields(): assert db._compute_searchable(row, has_embedded_chunk=True) is False +def test_compute_searchable_external_allows_empty_practice_area(): + # External precedents (e.g. בג"ץ) are cross-domain — empty practice_area + # must NOT disqualify them, as long as the rest of the contract holds. + row = _complete_row() + row["source_kind"] = "external_upload" + row["practice_area"] = "" + assert db._compute_searchable(row, has_embedded_chunk=True) is True + + # ── ingest wires in recompute_searchable (both types) ────────────────── def test_ingest_calls_recompute_searchable(monkeypatch, tmp_path): calls = {"recompute": [], "meta": [], "hal": []}