From e522555b1a51f66ca519943fadb27fd7a4f6de25 Mon Sep 17 00:00:00 2001 From: Chaim Date: Sat, 30 May 2026 22:02:16 +0000 Subject: [PATCH] test(reindex): failing tests for content-hash re-index (FU-3) --- mcp-server/tests/test_reindex_on_change.py | 82 ++++++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 mcp-server/tests/test_reindex_on_change.py diff --git a/mcp-server/tests/test_reindex_on_change.py b/mcp-server/tests/test_reindex_on_change.py new file mode 100644 index 0000000..35c29c2 --- /dev/null +++ b/mcp-server/tests/test_reindex_on_change.py @@ -0,0 +1,82 @@ +"""FU-3: re-index on content change (offline, monkeypatched I/O).""" +from __future__ import annotations + +import asyncio +from uuid import uuid4 + +import pytest + +from legal_mcp.services import db, ingest + + +def _run(coro): + return asyncio.run(coro) + + +# ── content_hash is deterministic ────────────────────────────────────── +def test_content_hash_deterministic(): + h1 = db._content_hash("פסק דין כלשהו") + h2 = db._content_hash("פסק דין כלשהו") + assert h1 == h2 and len(h1) == 64 # sha256 hex + + +def test_content_hash_empty_is_blank(): + assert db._content_hash("") == "" + assert db._content_hash(None) == "" + + +def test_content_hash_changes_with_text(): + assert db._content_hash("alpha") != db._content_hash("beta") + + +# ── mark_indexed copies content_hash → indexed_hash ───────────────────── +def test_mark_indexed_executes_update(monkeypatch): + seen = {} + + class _Conn: + async def execute(self, q, *a): + seen["q"] = q; seen["args"] = a + async def __aenter__(self): return self + async def __aexit__(self, *a): return False + + class _Pool: + def acquire(self): return _Conn() + + async def _pool(): return _Pool() + monkeypatch.setattr(db, "get_pool", _pool) + + cid = uuid4() + _run(db.mark_indexed(cid)) + assert "indexed_hash" in seen["q"] and "content_hash" in seen["q"] + assert seen["args"][0] == cid + + +# ── reindex_case_law re-embeds from stored text, no extractor/LLM ─────── +def test_reindex_case_law_uses_stored_text(monkeypatch): + cid = uuid4() + calls = {"chunk_embed_store": [], "mark_indexed": []} + + async def _get_case_law(x): + return {"id": cid, "full_text": "טקסט שמור של ההחלטה"} + monkeypatch.setattr(ingest.db, "get_case_law", _get_case_law) + + async def _ces(case_law_id, text, page_offsets, page_count, progress): + calls["chunk_embed_store"].append((case_law_id, text)) + return 5 + monkeypatch.setattr(ingest, "_chunk_embed_store", _ces) + + async def _mark(x): + calls["mark_indexed"].append(x) + monkeypatch.setattr(ingest.db, "mark_indexed", _mark) + + out = _run(ingest.reindex_case_law(cid)) + assert out["chunks"] == 5 and out["reindexed"] is True + assert calls["chunk_embed_store"][0][1] == "טקסט שמור של ההחלטה" + assert calls["mark_indexed"] == [cid] + + +def test_reindex_case_law_missing_row_raises(monkeypatch): + async def _none(x): return None + monkeypatch.setattr(ingest.db, "get_case_law", _none) + with pytest.raises(ValueError, match="not found"): + _run(ingest.reindex_case_law(uuid4()))