fix(digests): enrich self-cleans duplicate-yomon rows (re-sent issues) #135

Merged
chaim merged 1 commits from worktree-digest-dup-yomon into main 2026-06-08 04:59:35 +00:00

View File

@@ -213,7 +213,23 @@ async def enrich_digest(digest_id: UUID | str, progress: ProgressCb | None = Non
fields["subject_tags"] = extracted["subject_tags"]
if fields:
await db.update_digest(digest_id, **fields)
try:
await db.update_digest(digest_id, **fields)
except Exception as e:
# The same yomon issue can arrive as two different PDFs (re-sent /
# forwarded twice → different bytes → content_hash dedup misses it),
# but the yomon_number is unique. The extracted number then collides
# on uq_digests_yomon_number. This row is a duplicate of an already-
# ingested yomon → drop it so it isn't retried forever by the cron.
if "uq_digests_yomon_number" in str(e):
await db.delete_digest(digest_id)
logger.info(
"digest %s is a duplicate yomon (%s) — deleted",
digest_id, fields.get("yomon_number"),
)
return {"status": "duplicate", "digest_id": str(digest_id),
"yomon_number": fields.get("yomon_number")}
raise
merged = await db.get_digest(digest_id)
await progress("embedding", 75, "מחשב embedding")