fix(digests): enrich self-cleans duplicate-yomon rows (re-sent issues)
אותו יומון יכול להגיע כשני PDF שונים (re-send/forward → בייטים שונים → content_hash dedup מפספס), אבל yomon_number ייחודי → ה-update ב-enrich מתנגש על uq_digests_yomon_number. עכשיו enrich תופס את ההתנגשות, מוחק את השורה הכפולה (היומון כבר קיים), ומחזיר status='duplicate' — כך ה-cron לא מנסה אותה שוב ושוב. סוגר לולאת-retry אינסופית פוטנציאלית במערכת הלא-מאוישת. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -213,7 +213,23 @@ async def enrich_digest(digest_id: UUID | str, progress: ProgressCb | None = Non
|
|||||||
fields["subject_tags"] = extracted["subject_tags"]
|
fields["subject_tags"] = extracted["subject_tags"]
|
||||||
|
|
||||||
if fields:
|
if fields:
|
||||||
|
try:
|
||||||
await db.update_digest(digest_id, **fields)
|
await db.update_digest(digest_id, **fields)
|
||||||
|
except Exception as e:
|
||||||
|
# The same yomon issue can arrive as two different PDFs (re-sent /
|
||||||
|
# forwarded twice → different bytes → content_hash dedup misses it),
|
||||||
|
# but the yomon_number is unique. The extracted number then collides
|
||||||
|
# on uq_digests_yomon_number. This row is a duplicate of an already-
|
||||||
|
# ingested yomon → drop it so it isn't retried forever by the cron.
|
||||||
|
if "uq_digests_yomon_number" in str(e):
|
||||||
|
await db.delete_digest(digest_id)
|
||||||
|
logger.info(
|
||||||
|
"digest %s is a duplicate yomon (%s) — deleted",
|
||||||
|
digest_id, fields.get("yomon_number"),
|
||||||
|
)
|
||||||
|
return {"status": "duplicate", "digest_id": str(digest_id),
|
||||||
|
"yomon_number": fields.get("yomon_number")}
|
||||||
|
raise
|
||||||
merged = await db.get_digest(digest_id)
|
merged = await db.get_digest(digest_id)
|
||||||
|
|
||||||
await progress("embedding", 75, "מחשב embedding")
|
await progress("embedding", 75, "מחשב embedding")
|
||||||
|
|||||||
Reference in New Issue
Block a user