feat(retrieval): require practice_area only for internal/cases; enable searchable filter + health visibility (GAP-13, FU-2a)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-30 20:57:27 +00:00
parent 6dbcb7e798
commit 358d82e90e
3 changed files with 23 additions and 4 deletions

View File

@@ -2735,8 +2735,12 @@ def _compute_searchable(row: dict, has_embedded_chunk: bool) -> bool:
return False
if not (row.get("case_name") or "").strip():
return False
if not (row.get("practice_area") or "").strip():
return False
# practice_area is required only for identifier-keyed corpora (internal
# committee decisions, active cases). External precedents (e.g. בג"ץ) are
# legitimately cross-domain and may have no single practice_area.
if (row.get("source_kind") or "") != "external_upload":
if not (row.get("practice_area") or "").strip():
return False
if not (row.get("source_kind") or "").strip():
return False
tags = row.get("subject_tags") or []
@@ -3272,8 +3276,9 @@ async def search_precedent_library_semantic(
halacha_filters = [
"h.review_status IN ('approved', 'published')",
f"cl.source_kind = '{source_kind}'",
"cl.searchable = true",
]
chunk_filters = [f"cl.source_kind = '{source_kind}'"]
chunk_filters = [f"cl.source_kind = '{source_kind}'", "cl.searchable = true"]
h_params: list = [query_embedding, limit]
c_params: list = [query_embedding, limit]
h_idx = 3
@@ -3508,8 +3513,9 @@ async def search_precedent_library_lexical(
halacha_filters = [
"h.review_status IN ('approved', 'published')",
f"cl.source_kind = '{source_kind}'",
"cl.searchable = true",
]
chunk_filters = [f"cl.source_kind = '{source_kind}'"]
chunk_filters = [f"cl.source_kind = '{source_kind}'", "cl.searchable = true"]
# $1 = query, $2 = limit. Filters append starting at $3.
h_params: list = [query, limit]
c_params: list = [query, limit]

View File

@@ -123,6 +123,9 @@ async def get_dashboard() -> dict:
total_corpus = await conn.fetchval("SELECT COUNT(*) FROM style_corpus")
total_patterns = await conn.fetchval("SELECT COUNT(*) FROM style_patterns")
total_case_law = await conn.fetchval("SELECT COUNT(*) FROM case_law")
non_searchable_case_law = await conn.fetchval(
"SELECT COUNT(*) FROM case_law WHERE NOT searchable"
)
# QA summary
qa_total = await conn.fetchval("SELECT COUNT(DISTINCT case_id) FROM qa_results")
@@ -154,6 +157,7 @@ async def get_dashboard() -> dict:
"style_corpus": total_corpus,
"style_patterns": total_patterns,
"case_law_entries": total_case_law,
"non_searchable_case_law": non_searchable_case_law,
},
"cases_by_status": cases_by_status,
"qa": {

View File

@@ -70,6 +70,15 @@ def test_compute_searchable_false_without_core_fields():
assert db._compute_searchable(row, has_embedded_chunk=True) is False
def test_compute_searchable_external_allows_empty_practice_area():
# External precedents (e.g. בג"ץ) are cross-domain — empty practice_area
# must NOT disqualify them, as long as the rest of the contract holds.
row = _complete_row()
row["source_kind"] = "external_upload"
row["practice_area"] = ""
assert db._compute_searchable(row, has_embedded_chunk=True) is True
# ── ingest wires in recompute_searchable (both types) ──────────────────
def test_ingest_calls_recompute_searchable(monkeypatch, tmp_path):
calls = {"recompute": [], "meta": [], "hal": []}