diff --git a/web/app.py b/web/app.py index e31f2fd..d85cce0 100644 --- a/web/app.py +++ b/web/app.py @@ -23,7 +23,7 @@ sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "mcp-server" / " import zipfile from fastapi import BackgroundTasks, Body, FastAPI, File, Form, HTTPException, UploadFile -from fastapi.responses import FileResponse, StreamingResponse +from fastapi.responses import FileResponse, RedirectResponse, StreamingResponse from typing import Any, Literal from pydantic import BaseModel @@ -31,7 +31,7 @@ import asyncpg import httpx from legal_mcp import config -from legal_mcp.services import chunker, db, embeddings, extractor, git_sync, metrics as metrics_service, processor, proofreader, research_md +from legal_mcp.services import chunker, db, embeddings, extractor, git_sync, metrics as metrics_service, processor, proofreader, research_md, storage from legal_mcp.tools import cases as cases_tools, search as search_tools, workflow as workflow_tools, drafting as drafting_tools, precedents as precedents_tools from legal_mcp.tools.envelope import envelope_unwrap @@ -2769,6 +2769,39 @@ async def api_local_files(case_number: str): return result +async def serve_blob( + path, *, media_type: str, filename: str, + bucket=storage.Bucket.DOCUMENTS, +): + """Serve a case blob, routing through the storage layer (#106.5, INV-STG6). + + - ``filesystem`` backend → stream the file from disk (unchanged behaviour). + - ``s3``/``dual`` backend → if the object is in MinIO, 302-redirect to a + short-lived presigned URL (bytes go browser↔MinIO, never through FastAPI); + under ``dual`` a not-yet-migrated file (e.g. the dynamically-built analysis + DOCX or research markdown) transparently falls back to its disk copy. This + is the tri-model panel's agreed cutover-safety design (2026-06-11): dual + + disk-fallback covers files outside the DB-tracked migration set. + + The caller still owns its own existence/path-safety checks; this only + decides HOW to serve a path it already validated. + """ + from pathlib import Path as _Path + backend = storage.get_storage() + if backend.name != "filesystem": + try: + key = storage.normalize_key(path) + if await backend.exists(key, bucket=bucket): + url = await backend.presign_get(key, bucket=bucket, download_name=filename) + return RedirectResponse(url, status_code=302) + except Exception: # noqa: BLE001 — never 500 on a key/presign hiccup; fall back to disk + logger.exception("serve_blob: presign failed for %s; falling back to disk", path) + # not in object storage (dual fallback) — serve the disk copy if present + if not _Path(path).exists(): + raise HTTPException(404, "קובץ לא נמצא") + return FileResponse(path, media_type=media_type, filename=filename) + + @app.get("/api/cases/{case_number}/local-files/{folder}/{filename}") async def api_read_local_file(case_number: str, folder: str, filename: str): """Read contents of a local case file.""" @@ -2781,7 +2814,7 @@ async def api_read_local_file(case_number: str, folder: str, filename: str): path = case_dir / "documents" / folder / filename if not path.exists() or not path.is_file(): raise HTTPException(404, "קובץ לא נמצא") - return FileResponse(path, media_type="text/plain; charset=utf-8", filename=filename) + return await serve_blob(path, media_type="text/plain; charset=utf-8", filename=filename) # ── Research analysis (analysis-and-research.md) — parse + edit ──── @@ -2812,7 +2845,7 @@ async def api_research_analysis_download(case_number: str): path = _research_file_path(case_number) if not path.exists(): raise HTTPException(404, "טרם בוצע ניתוח משפטי לתיק זה") - return FileResponse( + return await serve_blob( path, media_type="text/markdown; charset=utf-8", filename=f"analysis-{case_number}.md", @@ -2833,7 +2866,7 @@ async def api_research_analysis_export_docx(case_number: str): case_dir = config.find_case_dir(case_number) if case_dir.exists(): commit_and_push(case_dir, f"ניתוח משפטי: {path.name}") - return FileResponse( + return await serve_blob( path, media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document", filename=path.name, @@ -3095,7 +3128,7 @@ async def api_download_export(case_number: str, filename: str): path = export_dir / filename if not path.exists() or not path.parent.samefile(export_dir): raise HTTPException(404, "קובץ לא נמצא") - return FileResponse( + return await serve_blob( path, media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document", filename=filename, diff --git a/web/tests/test_serve_blob.py b/web/tests/test_serve_blob.py new file mode 100644 index 0000000..6ec8373 --- /dev/null +++ b/web/tests/test_serve_blob.py @@ -0,0 +1,80 @@ +"""Tests for #106.5 — web.app.serve_blob backend routing (INV-STG6). + +Verifies the tri-model-panel cutover-safety design (2026-06-11): +- filesystem → FileResponse from disk (behaviour-preserving, current prod). +- s3/dual → 302 redirect to a presigned URL when the object is in MinIO. +- dual + miss → disk fallback (covers files outside the DB-tracked migration set + — e.g. dynamically-built analysis DOCX / research markdown). +- s3 + miss + no disk → 404. + +Importing web/app.py needs a few env vars (it wires the Paperclip pool at import); +they're set before import. Skips cleanly if the heavy import can't be satisfied. +""" + +from __future__ import annotations + +import asyncio +import os +import sys +from pathlib import Path + +import pytest + +os.environ.setdefault("PAPERCLIP_DB_URL", "postgres://x:x@127.0.0.1:54329/paperclip") +os.environ.setdefault("DATA_DIR", "/home/chaim/legal-ai/data") +sys.path.insert(0, str(Path(__file__).resolve().parents[1])) # web/ +sys.path.insert(0, str(Path(__file__).resolve().parents[2] / "mcp-server" / "src")) # legal_mcp + +app = pytest.importorskip("app", reason="web/app.py import prerequisites unavailable") +from fastapi.responses import FileResponse, RedirectResponse # noqa: E402 +from legal_mcp.services import storage # noqa: E402 + +DATA_DIR = Path(os.environ["DATA_DIR"]) + + +class _FakeBackend: + def __init__(self, name: str, has: bool) -> None: + self.name, self._has = name, has + + async def exists(self, key, *, bucket) -> bool: # noqa: ANN001 + return self._has + + async def presign_get(self, key, *, bucket, download_name=None) -> str: # noqa: ANN001 + return f"https://s3.example/{key}" + + +@pytest.fixture() +def blob(tmp_path_factory, monkeypatch): + # a real file UNDER DATA_DIR so storage.normalize_key accepts it + d = DATA_DIR / "audit" + d.mkdir(parents=True, exist_ok=True) + f = d / "_serveblob_pytest.txt" + f.write_text("hi") + yield f + f.unlink(missing_ok=True) + + +def _serve(monkeypatch, name, has, path): + monkeypatch.setattr(storage, "get_storage", lambda: _FakeBackend(name, has)) + return asyncio.new_event_loop().run_until_complete( + app.serve_blob(str(path), media_type="text/plain", filename="x.txt")) + + +def test_filesystem_serves_from_disk(blob, monkeypatch): + assert isinstance(_serve(monkeypatch, "filesystem", False, blob), FileResponse) + + +def test_dual_in_s3_redirects_presigned(blob, monkeypatch): + assert isinstance(_serve(monkeypatch, "dual", True, blob), RedirectResponse) + + +def test_dual_missing_falls_back_to_disk(blob, monkeypatch): + # the panel's safety net: a file not yet in MinIO is still served from disk + assert isinstance(_serve(monkeypatch, "dual", False, blob), FileResponse) + + +def test_s3_missing_no_disk_404(monkeypatch): + from fastapi import HTTPException + with pytest.raises(HTTPException) as ei: + _serve(monkeypatch, "s3", False, DATA_DIR / "audit" / "_nope.txt") + assert ei.value.status_code == 404