From 3ae183009fe3fde9a310a8e1661a5ffbf04590ae Mon Sep 17 00:00:00 2001 From: Chaim Date: Sun, 7 Jun 2026 20:59:49 +0000 Subject: [PATCH] =?UTF-8?q?feat(digests):=20self-heal=20in=20drain=5Fdiges?= =?UTF-8?q?ts=20=E2=80=94=20auto-resume=20after=20quota/interruption?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ה-cron של drain_digests הוא מנגנון ה-resume (pending-based, idempotent, host-side, לא תלוי בסשן). חיזוק: אם enrich נכשל באמצע (מכסת claude נגמרה) השורה נשארה 'completed' עם שדות ריקים → לא היתה מטופלת שוב. עכשיו drain מאפס בתחילתו כל digest 'completed' עם concept_tag ריק *וגם* underlying_citation ריק (= חילוץ שמעולם לא נחת; שורה תקינה תמיד מכילה לפחות מראה-מקום) → pending לריצה חוזרת. כך כל קטיעה/מכסה מתאוששת אוטומטית בריצת ה-cron הבאה. Co-Authored-By: Claude Opus 4.8 (1M context) --- scripts/drain_digests.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/scripts/drain_digests.py b/scripts/drain_digests.py index 5c3e479..c824a98 100644 --- a/scripts/drain_digests.py +++ b/scripts/drain_digests.py @@ -36,6 +36,20 @@ CONCURRENCY = int(os.environ.get("DIGEST_DRAIN_CONCURRENCY", "3")) async def main() -> int: pool = await db.get_pool() + # Self-heal: an enrich that failed mid-LLM (e.g. the local claude + # subscription window was exhausted) can leave a row 'completed' with no + # concept_tag AND no underlying_citation — a real digest always extracts at + # least a citation, so "both empty" means the extraction never landed. Reset + # those to 'pending' so the next run retries (idempotent auto-resume). Safe: + # successfully-enriched rows always have a concept_tag or citation. + healed = await pool.execute( + "UPDATE digests SET extraction_status = 'pending' " + "WHERE extraction_status = 'completed' " + "AND coalesce(concept_tag,'') = '' AND coalesce(underlying_citation,'') = '' " + "AND coalesce(analysis_text,'') <> ''" + ) + if healed and healed != "UPDATE 0": + print(f"self-heal: reset failed-empty digests → pending ({healed})", flush=True) rows = await pool.fetch( "SELECT id FROM digests WHERE extraction_status = 'pending' ORDER BY created_at" )