diff --git a/scripts/drain_digests.py b/scripts/drain_digests.py index 5c3e479..c824a98 100644 --- a/scripts/drain_digests.py +++ b/scripts/drain_digests.py @@ -36,6 +36,20 @@ CONCURRENCY = int(os.environ.get("DIGEST_DRAIN_CONCURRENCY", "3")) async def main() -> int: pool = await db.get_pool() + # Self-heal: an enrich that failed mid-LLM (e.g. the local claude + # subscription window was exhausted) can leave a row 'completed' with no + # concept_tag AND no underlying_citation — a real digest always extracts at + # least a citation, so "both empty" means the extraction never landed. Reset + # those to 'pending' so the next run retries (idempotent auto-resume). Safe: + # successfully-enriched rows always have a concept_tag or citation. + healed = await pool.execute( + "UPDATE digests SET extraction_status = 'pending' " + "WHERE extraction_status = 'completed' " + "AND coalesce(concept_tag,'') = '' AND coalesce(underlying_citation,'') = '' " + "AND coalesce(analysis_text,'') <> ''" + ) + if healed and healed != "UPDATE 0": + print(f"self-heal: reset failed-empty digests → pending ({healed})", flush=True) rows = await pool.fetch( "SELECT id FROM digests WHERE extraction_status = 'pending' ORDER BY created_at" )