Merge pull request 'fix(digests): enrich self-cleans duplicate-yomon rows (re-sent issues)' (#135) from worktree-digest-dup-yomon into main
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m43s
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m43s
This commit was merged in pull request #135.
This commit is contained in:
@@ -213,7 +213,23 @@ async def enrich_digest(digest_id: UUID | str, progress: ProgressCb | None = Non
|
|||||||
fields["subject_tags"] = extracted["subject_tags"]
|
fields["subject_tags"] = extracted["subject_tags"]
|
||||||
|
|
||||||
if fields:
|
if fields:
|
||||||
await db.update_digest(digest_id, **fields)
|
try:
|
||||||
|
await db.update_digest(digest_id, **fields)
|
||||||
|
except Exception as e:
|
||||||
|
# The same yomon issue can arrive as two different PDFs (re-sent /
|
||||||
|
# forwarded twice → different bytes → content_hash dedup misses it),
|
||||||
|
# but the yomon_number is unique. The extracted number then collides
|
||||||
|
# on uq_digests_yomon_number. This row is a duplicate of an already-
|
||||||
|
# ingested yomon → drop it so it isn't retried forever by the cron.
|
||||||
|
if "uq_digests_yomon_number" in str(e):
|
||||||
|
await db.delete_digest(digest_id)
|
||||||
|
logger.info(
|
||||||
|
"digest %s is a duplicate yomon (%s) — deleted",
|
||||||
|
digest_id, fields.get("yomon_number"),
|
||||||
|
)
|
||||||
|
return {"status": "duplicate", "digest_id": str(digest_id),
|
||||||
|
"yomon_number": fields.get("yomon_number")}
|
||||||
|
raise
|
||||||
merged = await db.get_digest(digest_id)
|
merged = await db.get_digest(digest_id)
|
||||||
|
|
||||||
await progress("embedding", 75, "מחשב embedding")
|
await progress("embedding", 75, "מחשב embedding")
|
||||||
|
|||||||
Reference in New Issue
Block a user