legal-ai/mcp-server/tests/test_extraction_queue_eligibility.py

"""Regression test for #140 — cited_only stubs must never enter the extraction
work queue.

``list_pending_extraction_requests`` must apply ``EXTRACTION_ELIGIBLE_PREDICATE``
so a citation-only stub (no full_text, no precedent_chunks) is excluded even if
it carries a stamped ``*_extraction_requested_at`` and a default 'pending'
status. The predicate is the single shared eligibility rule (#139 reuses it).

Runs OFFLINE — a fake pool captures the SQL and asserts the predicate is wired
into the WHERE clause (same style as test_halacha_reextract_preserves_approved).
"""

from __future__ import annotations

import asyncio

import pytest

from legal_mcp.services import db


class _FakePool:
    def __init__(self) -> None:
        self.fetched: list[str] = []

    async def fetch(self, sql: str, *args):  # noqa: ANN002
        self.fetched.append(sql)
        return []


@pytest.fixture()
def fake_pool(monkeypatch: pytest.MonkeyPatch) -> _FakePool:
    pool = _FakePool()

    async def _get_pool() -> _FakePool:
        return pool

    monkeypatch.setattr(db, "get_pool", _get_pool)
    return pool


def _norm(sql: str) -> str:
    return " ".join(sql.split())


def test_predicate_excludes_cited_only_and_requires_chunks() -> None:
    pred = _norm(db.EXTRACTION_ELIGIBLE_PREDICATE)
    assert "source_kind <> 'cited_only'" in pred
    assert "precedent_chunks" in pred and "EXISTS" in pred.upper()


@pytest.mark.parametrize("kind", ["metadata", "halacha"])
def test_list_pending_applies_eligibility_predicate(fake_pool: _FakePool, kind: str) -> None:
    loop = asyncio.new_event_loop()
    try:
        loop.run_until_complete(db.list_pending_extraction_requests(kind=kind))
    finally:
        loop.close()

    assert fake_pool.fetched, "expected a queue query"
    sql = _norm(fake_pool.fetched[0])
    # The eligibility predicate must be ANDed into the queue WHERE clause.
    assert _norm(db.EXTRACTION_ELIGIBLE_PREDICATE) in sql, sql
    # ...alongside the requested_at gate, for the correct kind.
    col = "metadata_extraction_requested_at" if kind == "metadata" else "halacha_extraction_requested_at"
    assert f"{col} IS NOT NULL" in sql, sql