"""FU-3: re-index on content change (offline, monkeypatched I/O).""" from __future__ import annotations import asyncio from uuid import uuid4 import pytest from legal_mcp.services import db, ingest def _run(coro): return asyncio.run(coro) # ── content_hash is deterministic ────────────────────────────────────── def test_content_hash_deterministic(): h1 = db._content_hash("פסק דין כלשהו") h2 = db._content_hash("פסק דין כלשהו") assert h1 == h2 and len(h1) == 64 # sha256 hex def test_content_hash_empty_is_blank(): assert db._content_hash("") == "" assert db._content_hash(None) == "" def test_content_hash_changes_with_text(): assert db._content_hash("alpha") != db._content_hash("beta") # ── mark_indexed copies content_hash → indexed_hash ───────────────────── def test_mark_indexed_executes_update(monkeypatch): seen = {} class _Conn: async def execute(self, q, *a): seen["q"] = q; seen["args"] = a async def __aenter__(self): return self async def __aexit__(self, *a): return False class _Pool: def acquire(self): return _Conn() async def _pool(): return _Pool() monkeypatch.setattr(db, "get_pool", _pool) cid = uuid4() _run(db.mark_indexed(cid)) assert "indexed_hash" in seen["q"] and "content_hash" in seen["q"] assert seen["args"][0] == cid # ── reindex_case_law re-embeds from stored text, no extractor/LLM ─────── def test_reindex_case_law_uses_stored_text(monkeypatch): cid = uuid4() calls = {"chunk_embed_store": [], "mark_indexed": []} async def _get_case_law(x): return {"id": cid, "full_text": "טקסט שמור של ההחלטה"} monkeypatch.setattr(ingest.db, "get_case_law", _get_case_law) async def _ces(case_law_id, text, page_offsets, page_count, progress): calls["chunk_embed_store"].append((case_law_id, text)) return 5 monkeypatch.setattr(ingest, "_chunk_embed_store", _ces) async def _mark(x): calls["mark_indexed"].append(x) monkeypatch.setattr(ingest.db, "mark_indexed", _mark) out = _run(ingest.reindex_case_law(cid)) assert out["chunks"] == 5 and out["reindexed"] is True assert calls["chunk_embed_store"][0][1] == "טקסט שמור של ההחלטה" assert calls["mark_indexed"] == [cid] def test_reindex_case_law_missing_row_raises(monkeypatch): async def _none(x): return None monkeypatch.setattr(ingest.db, "get_case_law", _none) with pytest.raises(ValueError, match="not found"): _run(ingest.reindex_case_law(uuid4()))