From a92f543e7fb44dd79cffee626423553d1a9f09ac Mon Sep 17 00:00:00 2001
From: Chaim <chaim@marcus-law.co.il>
Date: Mon, 8 Jun 2026 07:24:05 +0000
Subject: [PATCH] feat(bulletins): staging endpoint /api/bulletins/upload
 (download archive first)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

העלון החודשי "עו"ד על נדל"ן" הוא פרסום נפרד מהיומון היומי (חודשי, רב-נושאי).
לפני תכנון הקטלוג — נוריד את כל הארכיון (~29) לתיקייה. endpoint זה רק מ-stage
את ה-PDF ל-data/bulletins/incoming (ללא DB), dedup לפי content_hash. n8n ימשוך
מ-chaim.marcus@gmail (subject "עו"ד על נדל"ן") וישלח לכאן.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 web/app.py | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/web/app.py b/web/app.py
index 92475d8..2c091c9 100644
--- a/web/app.py
+++ b/web/app.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import asyncio
+import hashlib
 import json
 import logging
 import os
@@ -6094,6 +6095,35 @@ async def digest_unlink(digest_id: str):
         raise HTTPException(404, str(e))
 
 
+# ── Monthly bulletins ("עו"ד על נדל"ן") — staging-only (pre-catalog) ──
+# A SEPARATE publication from the daily "כל יום" digests: a monthly, multi-topic
+# real-estate-law newsletter. Their catalog/processing is not designed yet — this
+# endpoint only STAGES the PDFs to data/bulletins/incoming so we can download the
+# whole back-archive first, then plan how to enrich the corpus from them. No DB.
+_BULLETINS_DIR = config.DATA_DIR / "bulletins" / "incoming"
+
+
+@app.post("/api/bulletins/upload")
+async def bulletin_upload(file: UploadFile = File(...)):
+    """Stage a monthly bulletin PDF to data/bulletins/incoming (no DB). Dedup by
+    content hash so re-running the n8n backfill never duplicates a file."""
+    suffix = Path(file.filename or "").suffix.lower()
+    if suffix not in ALLOWED_EXTENSIONS:
+        raise HTTPException(400, f"סוג קובץ לא נתמך: {suffix}")
+    content = await file.read()
+    if len(content) > MAX_FILE_SIZE:
+        raise HTTPException(413, "קובץ גדול מדי")
+    digest = hashlib.sha256(content).hexdigest()[:12]
+    _BULLETINS_DIR.mkdir(parents=True, exist_ok=True)
+    safe = re.sub(r"[^\w.\-+א-ת ]", "_", Path(file.filename or "bulletin.pdf").name)
+    dest = _BULLETINS_DIR / f"{digest}_{safe}"
+    # Idempotent: same content (any filename) already staged → skip.
+    if any(p.name.startswith(f"{digest}_") for p in _BULLETINS_DIR.glob(f"{digest}_*")):
+        return {"status": "exists", "filename": dest.name, "size": len(content)}
+    dest.write_bytes(content)
+    return {"status": "stored", "filename": dest.name, "size": len(content)}
+
+
 from legal_mcp.services import internal_decisions as int_decisions_service  # noqa: E402
 
 
-- 
2.49.1