fix(extraction): reconcile לתיקים-יתומים בתור — pending+requested_at=NULL (#139)
תיק יכול להיות <kind>_extraction_status='pending' עם _requested_at=NULL — מעולם-לא-נכנס-לתור (מסלולי bulk/מיגרציה, או status שנכתב לפני החותם), והדריינר (סורק requested_at IS NOT NULL) עיוור אליו לנצח → ה-backlog מתנקז בשקט לאפס. requeue_stale_processing_extractions מרפא רק 'processing'. נצפה 2026-06-14: 96 תיקים pending אך 0 בתור (תוקנו ידנית). תיקון (G1 — שחזור invariant במקור, G2 — predicate יחיד): - db.reconcile_orphaned_pending_extractions(kind=) — kind-agnostic, מחזיר את invariant "שורה ברת-חילוץ ⇒ בתור": חותם requested_at ל-rows שהם pending + requested_at IS NULL + EXTRACTION_ELIGIBLE_PREDICATE (אותו מסנן של #140 — cited_only/chunkless לעולם לא נדחפים). אידמפוטנטי (rows מסומנים לא נתפסים). - precedent_library.process_pending_extractions קורא reconcile אחרי requeue_stale ולפני list — תיקים-משוחזרים נקלטים באותו pass. מנגנון-ריפוי יחיד (G2), לא מסלול מקביל; requeue_stale='processing', reconcile='pending'. - request_halacha_extraction מציב status='pending' עם החותם (סימטרי ל-metadata) — סוגר את חלון-ה-drift שמייצר pending+NULL מלכתחילה. מצב חי נקי (0 יתומים-כשירים אחרי התיקון-הידני); זהו תיקון מונע — הדריינר יְרַפֵּא יתומים עתידיים אוטומטית. בדיקות: test_extraction_orphan_reconcile (predicate משותף, pending+NULL בלבד, מובחן מ-requeue_stale, request_halacha סימטרי), שני ה-kinds. כל 349 עוברות. Invariants: G1, G2 (predicate משותף עם #140, ריפוי יחיד), INV-G3/INV-DUR1 (X16), INV-G4 (אין בליעה שקטה — reconcile מתעד), G12. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
85
mcp-server/tests/test_extraction_orphan_reconcile.py
Normal file
85
mcp-server/tests/test_extraction_orphan_reconcile.py
Normal file
@@ -0,0 +1,85 @@
|
||||
"""Regression test for #139 — orphaned 'pending' extraction rows are reconciled.
|
||||
|
||||
A row can be ``<kind>_extraction_status='pending'`` with
|
||||
``<kind>_extraction_requested_at IS NULL`` — never enqueued, invisible to the
|
||||
drain (which selects ``requested_at IS NOT NULL``). ``requeue_stale`` heals only
|
||||
'processing'. ``reconcile_orphaned_pending_extractions`` restores the
|
||||
"eligible ⇒ queued" invariant, kind-agnostic, reusing the SAME eligibility
|
||||
predicate as the queue reader (#140, G2) so cited_only/chunkless stubs are never
|
||||
proactively enqueued.
|
||||
|
||||
Runs OFFLINE — a fake pool captures executed SQL (same style as the sibling
|
||||
extraction-queue tests).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
|
||||
import pytest
|
||||
|
||||
from legal_mcp.services import db
|
||||
|
||||
|
||||
class _FakePool:
|
||||
def __init__(self) -> None:
|
||||
self.executed: list[str] = []
|
||||
|
||||
async def execute(self, sql: str, *args): # noqa: ANN002
|
||||
self.executed.append(sql)
|
||||
return "UPDATE 3"
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def fake_pool(monkeypatch: pytest.MonkeyPatch) -> _FakePool:
|
||||
pool = _FakePool()
|
||||
|
||||
async def _get_pool() -> _FakePool:
|
||||
return pool
|
||||
|
||||
monkeypatch.setattr(db, "get_pool", _get_pool)
|
||||
return pool
|
||||
|
||||
|
||||
def _run(coro):
|
||||
loop = asyncio.new_event_loop()
|
||||
try:
|
||||
return loop.run_until_complete(coro)
|
||||
finally:
|
||||
loop.close()
|
||||
|
||||
|
||||
def _norm(sql: str) -> str:
|
||||
return " ".join(sql.split())
|
||||
|
||||
|
||||
@pytest.mark.parametrize("kind,status_col,req_col", [
|
||||
("halacha", "halacha_extraction_status", "halacha_extraction_requested_at"),
|
||||
("metadata", "metadata_extraction_status", "metadata_extraction_requested_at"),
|
||||
])
|
||||
def test_reconcile_targets_eligible_unstamped_pending(fake_pool, kind, status_col, req_col):
|
||||
n = _run(db.reconcile_orphaned_pending_extractions(kind=kind))
|
||||
assert n == 3
|
||||
sql = _norm(fake_pool.executed[0])
|
||||
# Only pending rows with NO queue stamp...
|
||||
assert f"{status_col} = 'pending'" in sql, sql
|
||||
assert f"{req_col} IS NULL" in sql, sql
|
||||
# ...and only EXTRACTION-eligible ones (shared #140 predicate — no parallel rule).
|
||||
assert _norm(db.EXTRACTION_ELIGIBLE_PREDICATE) in sql, sql
|
||||
# It stamps the queue + re-affirms pending.
|
||||
assert f"{req_col} = now()" in sql, sql
|
||||
|
||||
|
||||
def test_reconcile_distinct_from_requeue_stale(fake_pool):
|
||||
"""reconcile handles 'pending'; requeue_stale handles 'processing' — separate."""
|
||||
_run(db.reconcile_orphaned_pending_extractions(kind="halacha"))
|
||||
sql = _norm(fake_pool.executed[0])
|
||||
assert "= 'processing'" not in sql, sql
|
||||
|
||||
|
||||
def test_request_halacha_sets_pending_status(fake_pool):
|
||||
"""#139 drift fix — request_halacha_extraction writes status+stamp together."""
|
||||
_run(db.request_halacha_extraction("00000000-0000-0000-0000-000000000000"))
|
||||
sql = _norm(fake_pool.executed[0])
|
||||
assert "halacha_extraction_requested_at = now()" in sql, sql
|
||||
assert "halacha_extraction_status = 'pending'" in sql, sql
|
||||
Reference in New Issue
Block a user