diff --git a/mcp-server/src/legal_mcp/services/digest_library.py b/mcp-server/src/legal_mcp/services/digest_library.py index 92ef89e..b22fe25 100644 --- a/mcp-server/src/legal_mcp/services/digest_library.py +++ b/mcp-server/src/legal_mcp/services/digest_library.py @@ -213,7 +213,23 @@ async def enrich_digest(digest_id: UUID | str, progress: ProgressCb | None = Non fields["subject_tags"] = extracted["subject_tags"] if fields: - await db.update_digest(digest_id, **fields) + try: + await db.update_digest(digest_id, **fields) + except Exception as e: + # The same yomon issue can arrive as two different PDFs (re-sent / + # forwarded twice → different bytes → content_hash dedup misses it), + # but the yomon_number is unique. The extracted number then collides + # on uq_digests_yomon_number. This row is a duplicate of an already- + # ingested yomon → drop it so it isn't retried forever by the cron. + if "uq_digests_yomon_number" in str(e): + await db.delete_digest(digest_id) + logger.info( + "digest %s is a duplicate yomon (%s) — deleted", + digest_id, fields.get("yomon_number"), + ) + return {"status": "duplicate", "digest_id": str(digest_id), + "yomon_number": fields.get("yomon_number")} + raise merged = await db.get_digest(digest_id) await progress("embedding", 75, "מחשב embedding")