"""Regression test for #139 — orphaned 'pending' extraction rows are reconciled. A row can be ``_extraction_status='pending'`` with ``_extraction_requested_at IS NULL`` — never enqueued, invisible to the drain (which selects ``requested_at IS NOT NULL``). ``requeue_stale`` heals only 'processing'. ``reconcile_orphaned_pending_extractions`` restores the "eligible ⇒ queued" invariant, kind-agnostic, reusing the SAME eligibility predicate as the queue reader (#140, G2) so cited_only/chunkless stubs are never proactively enqueued. Runs OFFLINE — a fake pool captures executed SQL (same style as the sibling extraction-queue tests). """ from __future__ import annotations import asyncio import pytest from legal_mcp.services import db class _FakePool: def __init__(self) -> None: self.executed: list[str] = [] async def execute(self, sql: str, *args): # noqa: ANN002 self.executed.append(sql) return "UPDATE 3" @pytest.fixture() def fake_pool(monkeypatch: pytest.MonkeyPatch) -> _FakePool: pool = _FakePool() async def _get_pool() -> _FakePool: return pool monkeypatch.setattr(db, "get_pool", _get_pool) return pool def _run(coro): loop = asyncio.new_event_loop() try: return loop.run_until_complete(coro) finally: loop.close() def _norm(sql: str) -> str: return " ".join(sql.split()) @pytest.mark.parametrize("kind,status_col,req_col", [ ("halacha", "halacha_extraction_status", "halacha_extraction_requested_at"), ("metadata", "metadata_extraction_status", "metadata_extraction_requested_at"), ]) def test_reconcile_targets_eligible_unstamped_pending(fake_pool, kind, status_col, req_col): n = _run(db.reconcile_orphaned_pending_extractions(kind=kind)) assert n == 3 sql = _norm(fake_pool.executed[0]) # Only pending rows with NO queue stamp... assert f"{status_col} = 'pending'" in sql, sql assert f"{req_col} IS NULL" in sql, sql # ...and only EXTRACTION-eligible ones (shared #140 predicate — no parallel rule). assert _norm(db.EXTRACTION_ELIGIBLE_PREDICATE) in sql, sql # It stamps the queue + re-affirms pending. assert f"{req_col} = now()" in sql, sql def test_reconcile_distinct_from_requeue_stale(fake_pool): """reconcile handles 'pending'; requeue_stale handles 'processing' — separate.""" _run(db.reconcile_orphaned_pending_extractions(kind="halacha")) sql = _norm(fake_pool.executed[0]) assert "= 'processing'" not in sql, sql def test_request_halacha_sets_pending_status(fake_pool): """#139 drift fix — request_halacha_extraction writes status+stamp together.""" _run(db.request_halacha_extraction("00000000-0000-0000-0000-000000000000")) sql = _norm(fake_pool.executed[0]) assert "halacha_extraction_requested_at = now()" in sql, sql assert "halacha_extraction_status = 'pending'" in sql, sql