"""Regression test for #140 — cited_only stubs must never enter the extraction work queue. ``list_pending_extraction_requests`` must apply ``EXTRACTION_ELIGIBLE_PREDICATE`` so a citation-only stub (no full_text, no precedent_chunks) is excluded even if it carries a stamped ``*_extraction_requested_at`` and a default 'pending' status. The predicate is the single shared eligibility rule (#139 reuses it). Runs OFFLINE — a fake pool captures the SQL and asserts the predicate is wired into the WHERE clause (same style as test_halacha_reextract_preserves_approved). """ from __future__ import annotations import asyncio import pytest from legal_mcp.services import db class _FakePool: def __init__(self) -> None: self.fetched: list[str] = [] async def fetch(self, sql: str, *args): # noqa: ANN002 self.fetched.append(sql) return [] @pytest.fixture() def fake_pool(monkeypatch: pytest.MonkeyPatch) -> _FakePool: pool = _FakePool() async def _get_pool() -> _FakePool: return pool monkeypatch.setattr(db, "get_pool", _get_pool) return pool def _norm(sql: str) -> str: return " ".join(sql.split()) def test_predicate_excludes_cited_only_and_requires_chunks() -> None: pred = _norm(db.EXTRACTION_ELIGIBLE_PREDICATE) assert "source_kind <> 'cited_only'" in pred assert "precedent_chunks" in pred and "EXISTS" in pred.upper() @pytest.mark.parametrize("kind", ["metadata", "halacha"]) def test_list_pending_applies_eligibility_predicate(fake_pool: _FakePool, kind: str) -> None: loop = asyncio.new_event_loop() try: loop.run_until_complete(db.list_pending_extraction_requests(kind=kind)) finally: loop.close() assert fake_pool.fetched, "expected a queue query" sql = _norm(fake_pool.fetched[0]) # The eligibility predicate must be ANDed into the queue WHERE clause. assert _norm(db.EXTRACTION_ELIGIBLE_PREDICATE) in sql, sql # ...alongside the requested_at gate, for the correct kind. col = "metadata_extraction_requested_at" if kind == "metadata" else "halacha_extraction_requested_at" assert f"{col} IS NOT NULL" in sql, sql