"""Regression test for #138 — metadata extraction must distinguish a transient failure (Gemini hiccup despite the row having text) from a permanent empty (no text to extract). Conflating them as 'no_metadata' let the drain settle the row to 'completed' and silently strand it with empty metadata. ``extract_and_apply`` returns: * ``extraction_failed`` when ``extract_metadata`` yields nothing BUT the row has full_text → retryable. * ``no_metadata`` when the row has no text → genuinely nothing to do. Runs fully OFFLINE — monkeypatches ``extract_metadata`` and ``db.get_case_law``. """ from __future__ import annotations import asyncio from uuid import uuid4 import pytest from legal_mcp.services import db, precedent_metadata_extractor as pme def _run(coro): loop = asyncio.new_event_loop() try: return loop.run_until_complete(coro) finally: loop.close() @pytest.fixture() def empty_extract(monkeypatch: pytest.MonkeyPatch): async def _empty(_cid): return {} monkeypatch.setattr(pme, "extract_metadata", _empty) def test_empty_result_with_text_is_transient_failure(empty_extract, monkeypatch): async def _rec(_cid): return {"full_text": "פסק דין ארוך עם תוכן ממשי לחילוץ"} monkeypatch.setattr(db, "get_case_law", _rec) out = _run(pme.extract_and_apply(uuid4())) assert out["status"] == "extraction_failed", out def test_empty_result_without_text_is_no_metadata(empty_extract, monkeypatch): async def _rec(_cid): return {"full_text": ""} monkeypatch.setattr(db, "get_case_law", _rec) out = _run(pme.extract_and_apply(uuid4())) assert out["status"] == "no_metadata", out def test_missing_record_is_no_metadata(empty_extract, monkeypatch): async def _none(_cid): return None monkeypatch.setattr(db, "get_case_law", _none) out = _run(pme.extract_and_apply(uuid4())) assert out["status"] == "no_metadata", out