feat(retrieval): require practice_area only for internal/cases; enable searchable filter + health visibility (GAP-13, FU-2a)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-30 20:57:27 +00:00
parent 6dbcb7e798
commit 358d82e90e
3 changed files with 23 additions and 4 deletions

View File

@@ -2735,8 +2735,12 @@ def _compute_searchable(row: dict, has_embedded_chunk: bool) -> bool:
return False
if not (row.get("case_name") or "").strip():
return False
if not (row.get("practice_area") or "").strip():
return False
# practice_area is required only for identifier-keyed corpora (internal
# committee decisions, active cases). External precedents (e.g. בג"ץ) are
# legitimately cross-domain and may have no single practice_area.
if (row.get("source_kind") or "") != "external_upload":
if not (row.get("practice_area") or "").strip():
return False
if not (row.get("source_kind") or "").strip():
return False
tags = row.get("subject_tags") or []
@@ -3272,8 +3276,9 @@ async def search_precedent_library_semantic(
halacha_filters = [
"h.review_status IN ('approved', 'published')",
f"cl.source_kind = '{source_kind}'",
"cl.searchable = true",
]
chunk_filters = [f"cl.source_kind = '{source_kind}'"]
chunk_filters = [f"cl.source_kind = '{source_kind}'", "cl.searchable = true"]
h_params: list = [query_embedding, limit]
c_params: list = [query_embedding, limit]
h_idx = 3
@@ -3508,8 +3513,9 @@ async def search_precedent_library_lexical(
halacha_filters = [
"h.review_status IN ('approved', 'published')",
f"cl.source_kind = '{source_kind}'",
"cl.searchable = true",
]
chunk_filters = [f"cl.source_kind = '{source_kind}'"]
chunk_filters = [f"cl.source_kind = '{source_kind}'", "cl.searchable = true"]
# $1 = query, $2 = limit. Filters append starting at $3.
h_params: list = [query, limit]
c_params: list = [query, limit]