Merge pull request 'fix(metadata): לא להתיישב 'completed' בכשל-חילוץ-Gemini חולף (#138)' (#261) from worktree-metadata-no-settle-on-fail into main
This commit was merged in pull request #261.
This commit is contained in:
@@ -291,10 +291,16 @@ async def process_pending_extractions(kind: str = "metadata", limit: int = 20) -
|
|||||||
if result.get("status") == "extraction_failed":
|
if result.get("status") == "extraction_failed":
|
||||||
await db.set_case_law_halacha_status(cid, "failed")
|
await db.set_case_law_halacha_status(cid, "failed")
|
||||||
await db.clear_extraction_request(cid, kind=kind)
|
await db.clear_extraction_request(cid, kind=kind)
|
||||||
|
elif result.get("status") == "extraction_failed":
|
||||||
|
# metadata transient failure (Gemini hiccup despite full text) —
|
||||||
|
# do NOT settle 'completed' or the row is silently stranded with
|
||||||
|
# empty metadata and the drain never revisits it (#138). Revert
|
||||||
|
# to 'pending' (the queue timestamp is preserved) so it re-drains.
|
||||||
|
await db.set_case_law_metadata_status(cid, "pending")
|
||||||
else:
|
else:
|
||||||
# metadata — set terminal 'completed' status (also clears the
|
# metadata success / no_changes / no_metadata(no text) — set
|
||||||
# request timestamp) so the UI badge settles instead of
|
# terminal 'completed' (also clears the request timestamp) so the
|
||||||
# lingering on 'processing'.
|
# UI badge settles instead of lingering on 'processing'.
|
||||||
await db.set_case_law_metadata_status(cid, "completed")
|
await db.set_case_law_metadata_status(cid, "completed")
|
||||||
processed += 1
|
processed += 1
|
||||||
results.append({
|
results.append({
|
||||||
|
|||||||
@@ -428,7 +428,20 @@ async def extract_and_apply(
|
|||||||
"""Convenience wrapper: extract → merge into row → return summary."""
|
"""Convenience wrapper: extract → merge into row → return summary."""
|
||||||
suggested = await extract_metadata(case_law_id)
|
suggested = await extract_metadata(case_law_id)
|
||||||
if not suggested:
|
if not suggested:
|
||||||
return {"status": "no_metadata", "fields": []}
|
# Empty result has two very different meanings (#138): the precedent has
|
||||||
|
# NO text to extract from (permanent — nothing the queue can ever do), vs
|
||||||
|
# the Gemini call FAILED despite the row having full text (transient — a
|
||||||
|
# key/network/rate-limit hiccup that a retry can recover). Conflating
|
||||||
|
# them as 'no_metadata' let the drain settle the row to 'completed' on a
|
||||||
|
# transient failure, silently stranding it with empty metadata. Branch on
|
||||||
|
# whether text was actually present so the caller can retry the transient
|
||||||
|
# case and only settle the genuinely-empty one.
|
||||||
|
record = await db.get_case_law(case_law_id)
|
||||||
|
has_text = bool(((record or {}).get("full_text") or "").strip())
|
||||||
|
return {
|
||||||
|
"status": "extraction_failed" if has_text else "no_metadata",
|
||||||
|
"fields": [],
|
||||||
|
}
|
||||||
result = await apply_to_record(case_law_id, suggested, overwrite_case_number=overwrite_case_number)
|
result = await apply_to_record(case_law_id, suggested, overwrite_case_number=overwrite_case_number)
|
||||||
if result["updated"]:
|
if result["updated"]:
|
||||||
await db.recompute_searchable(case_law_id)
|
await db.recompute_searchable(case_law_id)
|
||||||
|
|||||||
63
mcp-server/tests/test_metadata_extract_failure_status.py
Normal file
63
mcp-server/tests/test_metadata_extract_failure_status.py
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
"""Regression test for #138 — metadata extraction must distinguish a transient
|
||||||
|
failure (Gemini hiccup despite the row having text) from a permanent empty
|
||||||
|
(no text to extract). Conflating them as 'no_metadata' let the drain settle the
|
||||||
|
row to 'completed' and silently strand it with empty metadata.
|
||||||
|
|
||||||
|
``extract_and_apply`` returns:
|
||||||
|
* ``extraction_failed`` when ``extract_metadata`` yields nothing BUT the row
|
||||||
|
has full_text → retryable.
|
||||||
|
* ``no_metadata`` when the row has no text → genuinely nothing to do.
|
||||||
|
|
||||||
|
Runs fully OFFLINE — monkeypatches ``extract_metadata`` and ``db.get_case_law``.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
from uuid import uuid4
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from legal_mcp.services import db, precedent_metadata_extractor as pme
|
||||||
|
|
||||||
|
|
||||||
|
def _run(coro):
|
||||||
|
loop = asyncio.new_event_loop()
|
||||||
|
try:
|
||||||
|
return loop.run_until_complete(coro)
|
||||||
|
finally:
|
||||||
|
loop.close()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def empty_extract(monkeypatch: pytest.MonkeyPatch):
|
||||||
|
async def _empty(_cid):
|
||||||
|
return {}
|
||||||
|
monkeypatch.setattr(pme, "extract_metadata", _empty)
|
||||||
|
|
||||||
|
|
||||||
|
def test_empty_result_with_text_is_transient_failure(empty_extract, monkeypatch):
|
||||||
|
async def _rec(_cid):
|
||||||
|
return {"full_text": "פסק דין ארוך עם תוכן ממשי לחילוץ"}
|
||||||
|
monkeypatch.setattr(db, "get_case_law", _rec)
|
||||||
|
|
||||||
|
out = _run(pme.extract_and_apply(uuid4()))
|
||||||
|
assert out["status"] == "extraction_failed", out
|
||||||
|
|
||||||
|
|
||||||
|
def test_empty_result_without_text_is_no_metadata(empty_extract, monkeypatch):
|
||||||
|
async def _rec(_cid):
|
||||||
|
return {"full_text": ""}
|
||||||
|
monkeypatch.setattr(db, "get_case_law", _rec)
|
||||||
|
|
||||||
|
out = _run(pme.extract_and_apply(uuid4()))
|
||||||
|
assert out["status"] == "no_metadata", out
|
||||||
|
|
||||||
|
|
||||||
|
def test_missing_record_is_no_metadata(empty_extract, monkeypatch):
|
||||||
|
async def _none(_cid):
|
||||||
|
return None
|
||||||
|
monkeypatch.setattr(db, "get_case_law", _none)
|
||||||
|
|
||||||
|
out = _run(pme.extract_and_apply(uuid4()))
|
||||||
|
assert out["status"] == "no_metadata", out
|
||||||
Reference in New Issue
Block a user