fix(digests): enrich self-cleans duplicate-yomon rows (re-sent issues) #135
@@ -213,7 +213,23 @@ async def enrich_digest(digest_id: UUID | str, progress: ProgressCb | None = Non
|
||||
fields["subject_tags"] = extracted["subject_tags"]
|
||||
|
||||
if fields:
|
||||
await db.update_digest(digest_id, **fields)
|
||||
try:
|
||||
await db.update_digest(digest_id, **fields)
|
||||
except Exception as e:
|
||||
# The same yomon issue can arrive as two different PDFs (re-sent /
|
||||
# forwarded twice → different bytes → content_hash dedup misses it),
|
||||
# but the yomon_number is unique. The extracted number then collides
|
||||
# on uq_digests_yomon_number. This row is a duplicate of an already-
|
||||
# ingested yomon → drop it so it isn't retried forever by the cron.
|
||||
if "uq_digests_yomon_number" in str(e):
|
||||
await db.delete_digest(digest_id)
|
||||
logger.info(
|
||||
"digest %s is a duplicate yomon (%s) — deleted",
|
||||
digest_id, fields.get("yomon_number"),
|
||||
)
|
||||
return {"status": "duplicate", "digest_id": str(digest_id),
|
||||
"yomon_number": fields.get("yomon_number")}
|
||||
raise
|
||||
merged = await db.get_digest(digest_id)
|
||||
|
||||
await progress("embedding", 75, "מחשב embedding")
|
||||
|
||||
Reference in New Issue
Block a user