FU-2a: idempotent ingest + write-time normalization + searchable flag (GAP-03/06/13) #12
@@ -2735,6 +2735,10 @@ def _compute_searchable(row: dict, has_embedded_chunk: bool) -> bool:
|
|||||||
return False
|
return False
|
||||||
if not (row.get("case_name") or "").strip():
|
if not (row.get("case_name") or "").strip():
|
||||||
return False
|
return False
|
||||||
|
# practice_area is required only for identifier-keyed corpora (internal
|
||||||
|
# committee decisions, active cases). External precedents (e.g. בג"ץ) are
|
||||||
|
# legitimately cross-domain and may have no single practice_area.
|
||||||
|
if (row.get("source_kind") or "") != "external_upload":
|
||||||
if not (row.get("practice_area") or "").strip():
|
if not (row.get("practice_area") or "").strip():
|
||||||
return False
|
return False
|
||||||
if not (row.get("source_kind") or "").strip():
|
if not (row.get("source_kind") or "").strip():
|
||||||
@@ -3272,8 +3276,9 @@ async def search_precedent_library_semantic(
|
|||||||
halacha_filters = [
|
halacha_filters = [
|
||||||
"h.review_status IN ('approved', 'published')",
|
"h.review_status IN ('approved', 'published')",
|
||||||
f"cl.source_kind = '{source_kind}'",
|
f"cl.source_kind = '{source_kind}'",
|
||||||
|
"cl.searchable = true",
|
||||||
]
|
]
|
||||||
chunk_filters = [f"cl.source_kind = '{source_kind}'"]
|
chunk_filters = [f"cl.source_kind = '{source_kind}'", "cl.searchable = true"]
|
||||||
h_params: list = [query_embedding, limit]
|
h_params: list = [query_embedding, limit]
|
||||||
c_params: list = [query_embedding, limit]
|
c_params: list = [query_embedding, limit]
|
||||||
h_idx = 3
|
h_idx = 3
|
||||||
@@ -3508,8 +3513,9 @@ async def search_precedent_library_lexical(
|
|||||||
halacha_filters = [
|
halacha_filters = [
|
||||||
"h.review_status IN ('approved', 'published')",
|
"h.review_status IN ('approved', 'published')",
|
||||||
f"cl.source_kind = '{source_kind}'",
|
f"cl.source_kind = '{source_kind}'",
|
||||||
|
"cl.searchable = true",
|
||||||
]
|
]
|
||||||
chunk_filters = [f"cl.source_kind = '{source_kind}'"]
|
chunk_filters = [f"cl.source_kind = '{source_kind}'", "cl.searchable = true"]
|
||||||
# $1 = query, $2 = limit. Filters append starting at $3.
|
# $1 = query, $2 = limit. Filters append starting at $3.
|
||||||
h_params: list = [query, limit]
|
h_params: list = [query, limit]
|
||||||
c_params: list = [query, limit]
|
c_params: list = [query, limit]
|
||||||
|
|||||||
@@ -123,6 +123,9 @@ async def get_dashboard() -> dict:
|
|||||||
total_corpus = await conn.fetchval("SELECT COUNT(*) FROM style_corpus")
|
total_corpus = await conn.fetchval("SELECT COUNT(*) FROM style_corpus")
|
||||||
total_patterns = await conn.fetchval("SELECT COUNT(*) FROM style_patterns")
|
total_patterns = await conn.fetchval("SELECT COUNT(*) FROM style_patterns")
|
||||||
total_case_law = await conn.fetchval("SELECT COUNT(*) FROM case_law")
|
total_case_law = await conn.fetchval("SELECT COUNT(*) FROM case_law")
|
||||||
|
non_searchable_case_law = await conn.fetchval(
|
||||||
|
"SELECT COUNT(*) FROM case_law WHERE NOT searchable"
|
||||||
|
)
|
||||||
|
|
||||||
# QA summary
|
# QA summary
|
||||||
qa_total = await conn.fetchval("SELECT COUNT(DISTINCT case_id) FROM qa_results")
|
qa_total = await conn.fetchval("SELECT COUNT(DISTINCT case_id) FROM qa_results")
|
||||||
@@ -154,6 +157,7 @@ async def get_dashboard() -> dict:
|
|||||||
"style_corpus": total_corpus,
|
"style_corpus": total_corpus,
|
||||||
"style_patterns": total_patterns,
|
"style_patterns": total_patterns,
|
||||||
"case_law_entries": total_case_law,
|
"case_law_entries": total_case_law,
|
||||||
|
"non_searchable_case_law": non_searchable_case_law,
|
||||||
},
|
},
|
||||||
"cases_by_status": cases_by_status,
|
"cases_by_status": cases_by_status,
|
||||||
"qa": {
|
"qa": {
|
||||||
|
|||||||
@@ -70,6 +70,15 @@ def test_compute_searchable_false_without_core_fields():
|
|||||||
assert db._compute_searchable(row, has_embedded_chunk=True) is False
|
assert db._compute_searchable(row, has_embedded_chunk=True) is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_compute_searchable_external_allows_empty_practice_area():
|
||||||
|
# External precedents (e.g. בג"ץ) are cross-domain — empty practice_area
|
||||||
|
# must NOT disqualify them, as long as the rest of the contract holds.
|
||||||
|
row = _complete_row()
|
||||||
|
row["source_kind"] = "external_upload"
|
||||||
|
row["practice_area"] = ""
|
||||||
|
assert db._compute_searchable(row, has_embedded_chunk=True) is True
|
||||||
|
|
||||||
|
|
||||||
# ── ingest wires in recompute_searchable (both types) ──────────────────
|
# ── ingest wires in recompute_searchable (both types) ──────────────────
|
||||||
def test_ingest_calls_recompute_searchable(monkeypatch, tmp_path):
|
def test_ingest_calls_recompute_searchable(monkeypatch, tmp_path):
|
||||||
calls = {"recompute": [], "meta": [], "hal": []}
|
calls = {"recompute": [], "meta": [], "hal": []}
|
||||||
|
|||||||
Reference in New Issue
Block a user