From a3a02ca67a2c4b50318eb1ec3d7e5bc42ab41642 Mon Sep 17 00:00:00 2001 From: Chaim Date: Mon, 8 Jun 2026 04:59:12 +0000 Subject: [PATCH] fix(digests): enrich self-cleans duplicate-yomon rows (re-sent issues) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit אותו יומון יכול להגיע כשני PDF שונים (re-send/forward → בייטים שונים → content_hash dedup מפספס), אבל yomon_number ייחודי → ה-update ב-enrich מתנגש על uq_digests_yomon_number. עכשיו enrich תופס את ההתנגשות, מוחק את השורה הכפולה (היומון כבר קיים), ומחזיר status='duplicate' — כך ה-cron לא מנסה אותה שוב ושוב. סוגר לולאת-retry אינסופית פוטנציאלית במערכת הלא-מאוישת. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../src/legal_mcp/services/digest_library.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/mcp-server/src/legal_mcp/services/digest_library.py b/mcp-server/src/legal_mcp/services/digest_library.py index 92ef89e..b22fe25 100644 --- a/mcp-server/src/legal_mcp/services/digest_library.py +++ b/mcp-server/src/legal_mcp/services/digest_library.py @@ -213,7 +213,23 @@ async def enrich_digest(digest_id: UUID | str, progress: ProgressCb | None = Non fields["subject_tags"] = extracted["subject_tags"] if fields: - await db.update_digest(digest_id, **fields) + try: + await db.update_digest(digest_id, **fields) + except Exception as e: + # The same yomon issue can arrive as two different PDFs (re-sent / + # forwarded twice → different bytes → content_hash dedup misses it), + # but the yomon_number is unique. The extracted number then collides + # on uq_digests_yomon_number. This row is a duplicate of an already- + # ingested yomon → drop it so it isn't retried forever by the cron. + if "uq_digests_yomon_number" in str(e): + await db.delete_digest(digest_id) + logger.info( + "digest %s is a duplicate yomon (%s) — deleted", + digest_id, fields.get("yomon_number"), + ) + return {"status": "duplicate", "digest_id": str(digest_id), + "yomon_number": fields.get("yomon_number")} + raise merged = await db.get_digest(digest_id) await progress("embedding", 75, "מחשב embedding") -- 2.49.1