From 6359363f13dcdfbe317c522652f6584d216a8829 Mon Sep 17 00:00:00 2001 From: Chaim Date: Thu, 11 Jun 2026 18:08:45 +0000 Subject: [PATCH] =?UTF-8?q?fix(storage):=20#106.5=20=E2=80=94=20serve=5Fbl?= =?UTF-8?q?ob=20probes=20S3=20sub-backend,=20not=20dual=20disk-OR-S3=20exi?= =?UTF-8?q?sts?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit באג-אינטראקציה שהתגלה לפני ה-flip ל-dual: DualBackend.exists() מחזיר True אם הקובץ על **הדיסק או** ב-S3. serve_blob בדק backend.exists() ואז הנפיק presigned — כך שתחת dual, קובץ שקיים-רק-בדיסק (mirror שנכשל / מחוץ לסט-ההגירה) היה מקבל redirect ל-presigned-URL שמחזיר 404 מ-MinIO, במקום fallback-לדיסק. תיקון: serve_blob בודק קיום ב-**S3 ספציפית** — `s3 = getattr(backend, "s3", backend)` (DualBackend.s3, או ה-S3Backend עצמו תחת s3) — כך שקובץ disk-only נופל ל-FileResponse אמיתי. תואם-לאחור ל-filesystem/s3 (getattr מחזיר את ה-backend עצמו). invariants: INV-STG6 (presigned רק כשהאובייקט באמת ב-S3) · INV-G10 (אפס שינוי תחת filesystem). tests: 6 (2 חדשות — dual מ-S3-sub-backend present→redirect / absent→disk-fallback). py_compile OK. Co-Authored-By: Claude Opus 4.8 --- web/app.py | 13 ++++++++++--- web/tests/test_serve_blob.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/web/app.py b/web/app.py index d85cce0..ed8524d 100644 --- a/web/app.py +++ b/web/app.py @@ -2789,14 +2789,21 @@ async def serve_blob( from pathlib import Path as _Path backend = storage.get_storage() if backend.name != "filesystem": + # Presign ONLY when the object is actually in S3. Under ``dual`` the + # generic ``exists`` is disk-OR-S3, so a disk-only file (failed mirror / + # outside the migration set) would otherwise be redirected to a presigned + # URL that 404s on MinIO. Probe the S3 sub-backend specifically + # (DualBackend.s3 — or the S3 backend itself under ``s3``) so disk-only + # files fall back to a real disk read. + s3 = getattr(backend, "s3", backend) try: key = storage.normalize_key(path) - if await backend.exists(key, bucket=bucket): - url = await backend.presign_get(key, bucket=bucket, download_name=filename) + if await s3.exists(key, bucket=bucket): + url = await s3.presign_get(key, bucket=bucket, download_name=filename) return RedirectResponse(url, status_code=302) except Exception: # noqa: BLE001 — never 500 on a key/presign hiccup; fall back to disk logger.exception("serve_blob: presign failed for %s; falling back to disk", path) - # not in object storage (dual fallback) — serve the disk copy if present + # not in object storage (dual disk-fallback) — serve the disk copy if present if not _Path(path).exists(): raise HTTPException(404, "קובץ לא נמצא") return FileResponse(path, media_type=media_type, filename=filename) diff --git a/web/tests/test_serve_blob.py b/web/tests/test_serve_blob.py index 6ec8373..cf0c198 100644 --- a/web/tests/test_serve_blob.py +++ b/web/tests/test_serve_blob.py @@ -78,3 +78,34 @@ def test_s3_missing_no_disk_404(monkeypatch): with pytest.raises(HTTPException) as ei: _serve(monkeypatch, "s3", False, DATA_DIR / "audit" / "_nope.txt") assert ei.value.status_code == 404 + + +class _FakeDual: + """DualBackend stand-in: generic exists() is disk-OR-S3 (always True here), + but the .s3 sub-backend is S3-only. The fix must probe .s3, not exists().""" + name = "dual" + + def __init__(self, s3_has: bool) -> None: + self.s3 = _FakeBackend("s3", s3_has) + + async def exists(self, key, *, bucket) -> bool: # noqa: ANN001 — disk-or-S3 → True + return True + + async def presign_get(self, key, *, bucket, download_name=None) -> str: # noqa: ANN001 + return f"https://s3.example/WRONG/{key}" # must NOT be used (proves .s3 probed) + + +def test_dual_probes_s3_subbackend_not_generic_exists(blob, monkeypatch): + """Regression (DualBackend.exists is disk-OR-S3): a file on disk but NOT in + S3 must fall back to a disk FileResponse, never a presigned URL that 404s.""" + monkeypatch.setattr(storage, "get_storage", lambda: _FakeDual(s3_has=False)) + r = asyncio.new_event_loop().run_until_complete( + app.serve_blob(str(blob), media_type="text/plain", filename="x.txt")) + assert isinstance(r, FileResponse) # disk fallback, NOT a (broken) redirect + + +def test_dual_in_s3_uses_s3_subbackend(blob, monkeypatch): + monkeypatch.setattr(storage, "get_storage", lambda: _FakeDual(s3_has=True)) + r = asyncio.new_event_loop().run_until_complete( + app.serve_blob(str(blob), media_type="text/plain", filename="x.txt")) + assert isinstance(r, RedirectResponse)