Merge pull request 'fix(metadata): לא להתיישב 'completed' בכשל-חילוץ-Gemini חולף (#138)' (#261) from worktree-metadata-no-settle-on-fail into main
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m42s
G12 Leak-Guard / leak-guard (push) Successful in 6s
Lint — undefined names / undefined-names (push) Successful in 11s

This commit was merged in pull request #261.
This commit is contained in:
2026-06-15 03:35:47 +00:00
3 changed files with 86 additions and 4 deletions

View File

@@ -291,10 +291,16 @@ async def process_pending_extractions(kind: str = "metadata", limit: int = 20) -
if result.get("status") == "extraction_failed":
await db.set_case_law_halacha_status(cid, "failed")
await db.clear_extraction_request(cid, kind=kind)
elif result.get("status") == "extraction_failed":
# metadata transient failure (Gemini hiccup despite full text) —
# do NOT settle 'completed' or the row is silently stranded with
# empty metadata and the drain never revisits it (#138). Revert
# to 'pending' (the queue timestamp is preserved) so it re-drains.
await db.set_case_law_metadata_status(cid, "pending")
else:
# metadata — set terminal 'completed' status (also clears the
# request timestamp) so the UI badge settles instead of
# lingering on 'processing'.
# metadata success / no_changes / no_metadata(no text) — set
# terminal 'completed' (also clears the request timestamp) so the
# UI badge settles instead of lingering on 'processing'.
await db.set_case_law_metadata_status(cid, "completed")
processed += 1
results.append({

View File

@@ -428,7 +428,20 @@ async def extract_and_apply(
"""Convenience wrapper: extract → merge into row → return summary."""
suggested = await extract_metadata(case_law_id)
if not suggested:
return {"status": "no_metadata", "fields": []}
# Empty result has two very different meanings (#138): the precedent has
# NO text to extract from (permanent — nothing the queue can ever do), vs
# the Gemini call FAILED despite the row having full text (transient — a
# key/network/rate-limit hiccup that a retry can recover). Conflating
# them as 'no_metadata' let the drain settle the row to 'completed' on a
# transient failure, silently stranding it with empty metadata. Branch on
# whether text was actually present so the caller can retry the transient
# case and only settle the genuinely-empty one.
record = await db.get_case_law(case_law_id)
has_text = bool(((record or {}).get("full_text") or "").strip())
return {
"status": "extraction_failed" if has_text else "no_metadata",
"fields": [],
}
result = await apply_to_record(case_law_id, suggested, overwrite_case_number=overwrite_case_number)
if result["updated"]:
await db.recompute_searchable(case_law_id)