feat(storage): #106.5 — חיווט-קריאה serve_blob (presigned + dual disk-fallback) #198
45
web/app.py
45
web/app.py
@@ -23,7 +23,7 @@ sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "mcp-server" / "
|
|||||||
import zipfile
|
import zipfile
|
||||||
|
|
||||||
from fastapi import BackgroundTasks, Body, FastAPI, File, Form, HTTPException, UploadFile
|
from fastapi import BackgroundTasks, Body, FastAPI, File, Form, HTTPException, UploadFile
|
||||||
from fastapi.responses import FileResponse, StreamingResponse
|
from fastapi.responses import FileResponse, RedirectResponse, StreamingResponse
|
||||||
from typing import Any, Literal
|
from typing import Any, Literal
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
@@ -31,7 +31,7 @@ import asyncpg
|
|||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
from legal_mcp import config
|
from legal_mcp import config
|
||||||
from legal_mcp.services import chunker, db, embeddings, extractor, git_sync, metrics as metrics_service, processor, proofreader, research_md
|
from legal_mcp.services import chunker, db, embeddings, extractor, git_sync, metrics as metrics_service, processor, proofreader, research_md, storage
|
||||||
from legal_mcp.tools import cases as cases_tools, search as search_tools, workflow as workflow_tools, drafting as drafting_tools, precedents as precedents_tools
|
from legal_mcp.tools import cases as cases_tools, search as search_tools, workflow as workflow_tools, drafting as drafting_tools, precedents as precedents_tools
|
||||||
from legal_mcp.tools.envelope import envelope_unwrap
|
from legal_mcp.tools.envelope import envelope_unwrap
|
||||||
|
|
||||||
@@ -2769,6 +2769,39 @@ async def api_local_files(case_number: str):
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
async def serve_blob(
|
||||||
|
path, *, media_type: str, filename: str,
|
||||||
|
bucket=storage.Bucket.DOCUMENTS,
|
||||||
|
):
|
||||||
|
"""Serve a case blob, routing through the storage layer (#106.5, INV-STG6).
|
||||||
|
|
||||||
|
- ``filesystem`` backend → stream the file from disk (unchanged behaviour).
|
||||||
|
- ``s3``/``dual`` backend → if the object is in MinIO, 302-redirect to a
|
||||||
|
short-lived presigned URL (bytes go browser↔MinIO, never through FastAPI);
|
||||||
|
under ``dual`` a not-yet-migrated file (e.g. the dynamically-built analysis
|
||||||
|
DOCX or research markdown) transparently falls back to its disk copy. This
|
||||||
|
is the tri-model panel's agreed cutover-safety design (2026-06-11): dual +
|
||||||
|
disk-fallback covers files outside the DB-tracked migration set.
|
||||||
|
|
||||||
|
The caller still owns its own existence/path-safety checks; this only
|
||||||
|
decides HOW to serve a path it already validated.
|
||||||
|
"""
|
||||||
|
from pathlib import Path as _Path
|
||||||
|
backend = storage.get_storage()
|
||||||
|
if backend.name != "filesystem":
|
||||||
|
try:
|
||||||
|
key = storage.normalize_key(path)
|
||||||
|
if await backend.exists(key, bucket=bucket):
|
||||||
|
url = await backend.presign_get(key, bucket=bucket, download_name=filename)
|
||||||
|
return RedirectResponse(url, status_code=302)
|
||||||
|
except Exception: # noqa: BLE001 — never 500 on a key/presign hiccup; fall back to disk
|
||||||
|
logger.exception("serve_blob: presign failed for %s; falling back to disk", path)
|
||||||
|
# not in object storage (dual fallback) — serve the disk copy if present
|
||||||
|
if not _Path(path).exists():
|
||||||
|
raise HTTPException(404, "קובץ לא נמצא")
|
||||||
|
return FileResponse(path, media_type=media_type, filename=filename)
|
||||||
|
|
||||||
|
|
||||||
@app.get("/api/cases/{case_number}/local-files/{folder}/{filename}")
|
@app.get("/api/cases/{case_number}/local-files/{folder}/{filename}")
|
||||||
async def api_read_local_file(case_number: str, folder: str, filename: str):
|
async def api_read_local_file(case_number: str, folder: str, filename: str):
|
||||||
"""Read contents of a local case file."""
|
"""Read contents of a local case file."""
|
||||||
@@ -2781,7 +2814,7 @@ async def api_read_local_file(case_number: str, folder: str, filename: str):
|
|||||||
path = case_dir / "documents" / folder / filename
|
path = case_dir / "documents" / folder / filename
|
||||||
if not path.exists() or not path.is_file():
|
if not path.exists() or not path.is_file():
|
||||||
raise HTTPException(404, "קובץ לא נמצא")
|
raise HTTPException(404, "קובץ לא נמצא")
|
||||||
return FileResponse(path, media_type="text/plain; charset=utf-8", filename=filename)
|
return await serve_blob(path, media_type="text/plain; charset=utf-8", filename=filename)
|
||||||
|
|
||||||
|
|
||||||
# ── Research analysis (analysis-and-research.md) — parse + edit ────
|
# ── Research analysis (analysis-and-research.md) — parse + edit ────
|
||||||
@@ -2812,7 +2845,7 @@ async def api_research_analysis_download(case_number: str):
|
|||||||
path = _research_file_path(case_number)
|
path = _research_file_path(case_number)
|
||||||
if not path.exists():
|
if not path.exists():
|
||||||
raise HTTPException(404, "טרם בוצע ניתוח משפטי לתיק זה")
|
raise HTTPException(404, "טרם בוצע ניתוח משפטי לתיק זה")
|
||||||
return FileResponse(
|
return await serve_blob(
|
||||||
path,
|
path,
|
||||||
media_type="text/markdown; charset=utf-8",
|
media_type="text/markdown; charset=utf-8",
|
||||||
filename=f"analysis-{case_number}.md",
|
filename=f"analysis-{case_number}.md",
|
||||||
@@ -2833,7 +2866,7 @@ async def api_research_analysis_export_docx(case_number: str):
|
|||||||
case_dir = config.find_case_dir(case_number)
|
case_dir = config.find_case_dir(case_number)
|
||||||
if case_dir.exists():
|
if case_dir.exists():
|
||||||
commit_and_push(case_dir, f"ניתוח משפטי: {path.name}")
|
commit_and_push(case_dir, f"ניתוח משפטי: {path.name}")
|
||||||
return FileResponse(
|
return await serve_blob(
|
||||||
path,
|
path,
|
||||||
media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
filename=path.name,
|
filename=path.name,
|
||||||
@@ -3095,7 +3128,7 @@ async def api_download_export(case_number: str, filename: str):
|
|||||||
path = export_dir / filename
|
path = export_dir / filename
|
||||||
if not path.exists() or not path.parent.samefile(export_dir):
|
if not path.exists() or not path.parent.samefile(export_dir):
|
||||||
raise HTTPException(404, "קובץ לא נמצא")
|
raise HTTPException(404, "קובץ לא נמצא")
|
||||||
return FileResponse(
|
return await serve_blob(
|
||||||
path,
|
path,
|
||||||
media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
filename=filename,
|
filename=filename,
|
||||||
|
|||||||
80
web/tests/test_serve_blob.py
Normal file
80
web/tests/test_serve_blob.py
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
"""Tests for #106.5 — web.app.serve_blob backend routing (INV-STG6).
|
||||||
|
|
||||||
|
Verifies the tri-model-panel cutover-safety design (2026-06-11):
|
||||||
|
- filesystem → FileResponse from disk (behaviour-preserving, current prod).
|
||||||
|
- s3/dual → 302 redirect to a presigned URL when the object is in MinIO.
|
||||||
|
- dual + miss → disk fallback (covers files outside the DB-tracked migration set
|
||||||
|
— e.g. dynamically-built analysis DOCX / research markdown).
|
||||||
|
- s3 + miss + no disk → 404.
|
||||||
|
|
||||||
|
Importing web/app.py needs a few env vars (it wires the Paperclip pool at import);
|
||||||
|
they're set before import. Skips cleanly if the heavy import can't be satisfied.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
os.environ.setdefault("PAPERCLIP_DB_URL", "postgres://x:x@127.0.0.1:54329/paperclip")
|
||||||
|
os.environ.setdefault("DATA_DIR", "/home/chaim/legal-ai/data")
|
||||||
|
sys.path.insert(0, str(Path(__file__).resolve().parents[1])) # web/
|
||||||
|
sys.path.insert(0, str(Path(__file__).resolve().parents[2] / "mcp-server" / "src")) # legal_mcp
|
||||||
|
|
||||||
|
app = pytest.importorskip("app", reason="web/app.py import prerequisites unavailable")
|
||||||
|
from fastapi.responses import FileResponse, RedirectResponse # noqa: E402
|
||||||
|
from legal_mcp.services import storage # noqa: E402
|
||||||
|
|
||||||
|
DATA_DIR = Path(os.environ["DATA_DIR"])
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeBackend:
|
||||||
|
def __init__(self, name: str, has: bool) -> None:
|
||||||
|
self.name, self._has = name, has
|
||||||
|
|
||||||
|
async def exists(self, key, *, bucket) -> bool: # noqa: ANN001
|
||||||
|
return self._has
|
||||||
|
|
||||||
|
async def presign_get(self, key, *, bucket, download_name=None) -> str: # noqa: ANN001
|
||||||
|
return f"https://s3.example/{key}"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def blob(tmp_path_factory, monkeypatch):
|
||||||
|
# a real file UNDER DATA_DIR so storage.normalize_key accepts it
|
||||||
|
d = DATA_DIR / "audit"
|
||||||
|
d.mkdir(parents=True, exist_ok=True)
|
||||||
|
f = d / "_serveblob_pytest.txt"
|
||||||
|
f.write_text("hi")
|
||||||
|
yield f
|
||||||
|
f.unlink(missing_ok=True)
|
||||||
|
|
||||||
|
|
||||||
|
def _serve(monkeypatch, name, has, path):
|
||||||
|
monkeypatch.setattr(storage, "get_storage", lambda: _FakeBackend(name, has))
|
||||||
|
return asyncio.new_event_loop().run_until_complete(
|
||||||
|
app.serve_blob(str(path), media_type="text/plain", filename="x.txt"))
|
||||||
|
|
||||||
|
|
||||||
|
def test_filesystem_serves_from_disk(blob, monkeypatch):
|
||||||
|
assert isinstance(_serve(monkeypatch, "filesystem", False, blob), FileResponse)
|
||||||
|
|
||||||
|
|
||||||
|
def test_dual_in_s3_redirects_presigned(blob, monkeypatch):
|
||||||
|
assert isinstance(_serve(monkeypatch, "dual", True, blob), RedirectResponse)
|
||||||
|
|
||||||
|
|
||||||
|
def test_dual_missing_falls_back_to_disk(blob, monkeypatch):
|
||||||
|
# the panel's safety net: a file not yet in MinIO is still served from disk
|
||||||
|
assert isinstance(_serve(monkeypatch, "dual", False, blob), FileResponse)
|
||||||
|
|
||||||
|
|
||||||
|
def test_s3_missing_no_disk_404(monkeypatch):
|
||||||
|
from fastapi import HTTPException
|
||||||
|
with pytest.raises(HTTPException) as ei:
|
||||||
|
_serve(monkeypatch, "s3", False, DATA_DIR / "audit" / "_nope.txt")
|
||||||
|
assert ei.value.status_code == 404
|
||||||
Reference in New Issue
Block a user