All checks were successful
G12 Leak-Guard / leak-guard (pull_request) Successful in 6s
באג-אינטראקציה שהתגלה לפני ה-flip ל-dual: DualBackend.exists() מחזיר True אם הקובץ על **הדיסק או** ב-S3. serve_blob בדק backend.exists() ואז הנפיק presigned — כך שתחת dual, קובץ שקיים-רק-בדיסק (mirror שנכשל / מחוץ לסט-ההגירה) היה מקבל redirect ל-presigned-URL שמחזיר 404 מ-MinIO, במקום fallback-לדיסק. תיקון: serve_blob בודק קיום ב-**S3 ספציפית** — `s3 = getattr(backend, "s3", backend)` (DualBackend.s3, או ה-S3Backend עצמו תחת s3) — כך שקובץ disk-only נופל ל-FileResponse אמיתי. תואם-לאחור ל-filesystem/s3 (getattr מחזיר את ה-backend עצמו). invariants: INV-STG6 (presigned רק כשהאובייקט באמת ב-S3) · INV-G10 (אפס שינוי תחת filesystem). tests: 6 (2 חדשות — dual מ-S3-sub-backend present→redirect / absent→disk-fallback). py_compile OK. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
112 lines
4.3 KiB
Python
112 lines
4.3 KiB
Python
"""Tests for #106.5 — web.app.serve_blob backend routing (INV-STG6).
|
|
|
|
Verifies the tri-model-panel cutover-safety design (2026-06-11):
|
|
- filesystem → FileResponse from disk (behaviour-preserving, current prod).
|
|
- s3/dual → 302 redirect to a presigned URL when the object is in MinIO.
|
|
- dual + miss → disk fallback (covers files outside the DB-tracked migration set
|
|
— e.g. dynamically-built analysis DOCX / research markdown).
|
|
- s3 + miss + no disk → 404.
|
|
|
|
Importing web/app.py needs a few env vars (it wires the Paperclip pool at import);
|
|
they're set before import. Skips cleanly if the heavy import can't be satisfied.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import os
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
os.environ.setdefault("PAPERCLIP_DB_URL", "postgres://x:x@127.0.0.1:54329/paperclip")
|
|
os.environ.setdefault("DATA_DIR", "/home/chaim/legal-ai/data")
|
|
sys.path.insert(0, str(Path(__file__).resolve().parents[1])) # web/
|
|
sys.path.insert(0, str(Path(__file__).resolve().parents[2] / "mcp-server" / "src")) # legal_mcp
|
|
|
|
app = pytest.importorskip("app", reason="web/app.py import prerequisites unavailable")
|
|
from fastapi.responses import FileResponse, RedirectResponse # noqa: E402
|
|
from legal_mcp.services import storage # noqa: E402
|
|
|
|
DATA_DIR = Path(os.environ["DATA_DIR"])
|
|
|
|
|
|
class _FakeBackend:
|
|
def __init__(self, name: str, has: bool) -> None:
|
|
self.name, self._has = name, has
|
|
|
|
async def exists(self, key, *, bucket) -> bool: # noqa: ANN001
|
|
return self._has
|
|
|
|
async def presign_get(self, key, *, bucket, download_name=None) -> str: # noqa: ANN001
|
|
return f"https://s3.example/{key}"
|
|
|
|
|
|
@pytest.fixture()
|
|
def blob(tmp_path_factory, monkeypatch):
|
|
# a real file UNDER DATA_DIR so storage.normalize_key accepts it
|
|
d = DATA_DIR / "audit"
|
|
d.mkdir(parents=True, exist_ok=True)
|
|
f = d / "_serveblob_pytest.txt"
|
|
f.write_text("hi")
|
|
yield f
|
|
f.unlink(missing_ok=True)
|
|
|
|
|
|
def _serve(monkeypatch, name, has, path):
|
|
monkeypatch.setattr(storage, "get_storage", lambda: _FakeBackend(name, has))
|
|
return asyncio.new_event_loop().run_until_complete(
|
|
app.serve_blob(str(path), media_type="text/plain", filename="x.txt"))
|
|
|
|
|
|
def test_filesystem_serves_from_disk(blob, monkeypatch):
|
|
assert isinstance(_serve(monkeypatch, "filesystem", False, blob), FileResponse)
|
|
|
|
|
|
def test_dual_in_s3_redirects_presigned(blob, monkeypatch):
|
|
assert isinstance(_serve(monkeypatch, "dual", True, blob), RedirectResponse)
|
|
|
|
|
|
def test_dual_missing_falls_back_to_disk(blob, monkeypatch):
|
|
# the panel's safety net: a file not yet in MinIO is still served from disk
|
|
assert isinstance(_serve(monkeypatch, "dual", False, blob), FileResponse)
|
|
|
|
|
|
def test_s3_missing_no_disk_404(monkeypatch):
|
|
from fastapi import HTTPException
|
|
with pytest.raises(HTTPException) as ei:
|
|
_serve(monkeypatch, "s3", False, DATA_DIR / "audit" / "_nope.txt")
|
|
assert ei.value.status_code == 404
|
|
|
|
|
|
class _FakeDual:
|
|
"""DualBackend stand-in: generic exists() is disk-OR-S3 (always True here),
|
|
but the .s3 sub-backend is S3-only. The fix must probe .s3, not exists()."""
|
|
name = "dual"
|
|
|
|
def __init__(self, s3_has: bool) -> None:
|
|
self.s3 = _FakeBackend("s3", s3_has)
|
|
|
|
async def exists(self, key, *, bucket) -> bool: # noqa: ANN001 — disk-or-S3 → True
|
|
return True
|
|
|
|
async def presign_get(self, key, *, bucket, download_name=None) -> str: # noqa: ANN001
|
|
return f"https://s3.example/WRONG/{key}" # must NOT be used (proves .s3 probed)
|
|
|
|
|
|
def test_dual_probes_s3_subbackend_not_generic_exists(blob, monkeypatch):
|
|
"""Regression (DualBackend.exists is disk-OR-S3): a file on disk but NOT in
|
|
S3 must fall back to a disk FileResponse, never a presigned URL that 404s."""
|
|
monkeypatch.setattr(storage, "get_storage", lambda: _FakeDual(s3_has=False))
|
|
r = asyncio.new_event_loop().run_until_complete(
|
|
app.serve_blob(str(blob), media_type="text/plain", filename="x.txt"))
|
|
assert isinstance(r, FileResponse) # disk fallback, NOT a (broken) redirect
|
|
|
|
|
|
def test_dual_in_s3_uses_s3_subbackend(blob, monkeypatch):
|
|
monkeypatch.setattr(storage, "get_storage", lambda: _FakeDual(s3_has=True))
|
|
r = asyncio.new_event_loop().run_until_complete(
|
|
app.serve_blob(str(blob), media_type="text/plain", filename="x.txt"))
|
|
assert isinstance(r, RedirectResponse)
|