"""FU-2a: idempotent ingest + write-time normalization + searchable flag. Offline tests for the *pure* pieces (canonical normalization, completeness predicate) and ingest wiring. The real ON CONFLICT upsert is verified by a DB smoke test against localhost:5433 (see plan Task 6), since it requires a live Postgres partial unique index. """ from __future__ import annotations import asyncio from uuid import uuid4 import pytest from legal_mcp.services import db, ingest def _run(coro): return asyncio.run(coro) # ── GAP-06: canonical normalization (pure, deterministic) ────────────── @pytest.mark.parametrize("raw,expected", [ ("ערר 8137/24", "8137-24"), (" עע\"מ 1/20 ", "1-20"), ("8126-03-25", "8126-03-25"), # month segment preserved ("בל\"מ 1010-01-25", "1010-01-25"), ("8047/23", "8047-23"), ]) def test_canonical_case_number(raw, expected): assert db._canonical_case_number(raw) == expected def test_canonical_does_not_invent_month(): # No month in input → none added (X1 §1). assert db._canonical_case_number("8126/24") == "8126-24" # ── GAP-13: completeness predicate (pure) ────────────────────────────── def _complete_row(): return { "case_number": "8047-23", "case_name": "פלוני נ' הוועדה", "practice_area": "rishuy_uvniya", "source_kind": "internal_committee", "extraction_status": "completed", "headnote": "תקציר", "summary": "", "subject_tags": [], } def test_compute_searchable_true_when_complete(): assert db._compute_searchable(_complete_row(), has_embedded_chunk=True) is True def test_compute_searchable_false_without_embedded_chunk(): assert db._compute_searchable(_complete_row(), has_embedded_chunk=False) is False def test_compute_searchable_false_without_metadata(): row = _complete_row() row["headnote"] = ""; row["summary"] = ""; row["subject_tags"] = [] assert db._compute_searchable(row, has_embedded_chunk=True) is False def test_compute_searchable_false_when_extraction_incomplete(): row = _complete_row(); row["extraction_status"] = "pending" assert db._compute_searchable(row, has_embedded_chunk=True) is False def test_compute_searchable_false_without_core_fields(): row = _complete_row(); row["practice_area"] = "" assert db._compute_searchable(row, has_embedded_chunk=True) is False # ── ingest wires in recompute_searchable (both types) ────────────────── def test_ingest_calls_recompute_searchable(monkeypatch, tmp_path): calls = {"recompute": [], "meta": [], "hal": []} async def _extract_text(path): return ("text", 1, [0]) monkeypatch.setattr(ingest.extractor, "extract_text", _extract_text) monkeypatch.setattr(ingest.extractor, "strip_nevo_preamble", lambda t: t) monkeypatch.setattr(ingest.chunker, "chunk_document", lambda t, page_offsets=None: [type("C", (), { "chunk_index": 0, "content": "c", "section_type": "b", "page_number": 1})()]) async def _embed(texts, input_type="document"): return [[0.0] * 8 for _ in texts] monkeypatch.setattr(ingest.embeddings, "embed_texts", _embed) async def _store(cid, dicts): return len(dicts) monkeypatch.setattr(ingest.db, "store_precedent_chunks", _store) async def _create_internal(**kw): return {"id": uuid4()} monkeypatch.setattr(ingest.db, "create_internal_committee_decision", _create_internal) async def _noop(*a, **k): return None monkeypatch.setattr(ingest.db, "set_case_law_extraction_status", _noop) monkeypatch.setattr(ingest.db, "set_case_law_halacha_status", _noop) monkeypatch.setattr(ingest.db, "request_metadata_extraction", lambda cid: calls["meta"].append(cid) or _noop()) monkeypatch.setattr(ingest.db, "request_halacha_extraction", lambda cid: calls["hal"].append(cid) or _noop()) async def _recompute(cid): calls["recompute"].append(cid) monkeypatch.setattr(ingest.db, "recompute_searchable", _recompute) monkeypatch.setattr(ingest.config, "PARENT_DOC_RETRIEVAL_ENABLED", False) monkeypatch.setattr(ingest.config, "MULTIMODAL_ENABLED", False) from legal_mcp.services import internal_decisions _run(internal_decisions.ingest_internal_decision( case_number="8047/23", text="t", chair_name="x", practice_area="rishuy_uvniya")) assert len(calls["recompute"]) == 1, "ingest must recompute searchable after success"