feat(storage): X14 Phase 2a — route source-document writes through storage.py
Rewire the source-document staging writes onto the unified storage layer (INV-STG1), replacing direct shutil.copy2 calls: - tools/documents.py: case originals + training-corpus uploads - services/ingest.py: _stage_file (now async) — covers precedent-library, internal-decisions, and digests (the canonical intake helper) - services/digest_library.py: awaits the now-async _stage_file Each write goes through storage.put_file(..., bucket=DOCUMENTS) with the DATA_DIR-relative key; the Hebrew original filename rides as object metadata (INV-STG2), content-type is guessed from the extension. DB path columns are unchanged (still the absolute dest) — object_key backfill is Phase 3. Under the default STORAGE_BACKEND=filesystem the bytes land at the exact legacy on-disk location (put_file → shutil.copy2 to DATA_DIR/key), so this is zero behaviour change in prod. shutil import dropped where now unused. tests: +2 staging regression tests (file lands under DATA_DIR at the legacy path); 20 storage + 22 ingest tests green; 242 collected with no import breakage. Derived/export write sites (thumbnails, extracted text, DOCX exports) are Phase 2b. Keeps G2; advances INV-STG1. Spec: docs/spec/X14-storage-minio.md. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
48
mcp-server/tests/test_storage_staging.py
Normal file
48
mcp-server/tests/test_storage_staging.py
Normal file
@@ -0,0 +1,48 @@
|
||||
"""Regression tests for the write call-sites rewired onto storage.py (X14
|
||||
Phase 2). They assert the rewired staging lands bytes at the exact legacy
|
||||
on-disk location under the default filesystem backend — i.e. zero behaviour
|
||||
change.
|
||||
"""
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from legal_mcp import config
|
||||
from legal_mcp.services import ingest, storage
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _tmp_datadir(tmp_path, monkeypatch):
|
||||
monkeypatch.setattr(config, "DATA_DIR", tmp_path)
|
||||
monkeypatch.setattr(config, "STORAGE_BACKEND", "filesystem")
|
||||
storage.reset_storage_cache()
|
||||
yield tmp_path
|
||||
storage.reset_storage_cache()
|
||||
|
||||
|
||||
def run(coro):
|
||||
return asyncio.run(coro)
|
||||
|
||||
|
||||
def test_stage_file_lands_under_datadir(_tmp_datadir):
|
||||
src = _tmp_datadir / "src" / "כתב ערר.pdf"
|
||||
src.parent.mkdir(parents=True)
|
||||
src.write_bytes(b"%PDF-1.4 ...")
|
||||
root = _tmp_datadir / "precedent-library"
|
||||
dest = run(ingest._stage_file(src, root, "court_ruling"))
|
||||
# dest is under the staging subdir, prefixed with a uuid, original suffix kept
|
||||
assert dest.parent == root / "court_ruling"
|
||||
assert dest.exists()
|
||||
assert dest.read_bytes() == b"%PDF-1.4 ..."
|
||||
assert dest.suffix == ".pdf"
|
||||
# and the key is DATA_DIR-relative (what the DB column will store)
|
||||
assert dest.relative_to(_tmp_datadir).as_posix().startswith("precedent-library/court_ruling/")
|
||||
|
||||
|
||||
def test_stage_file_default_subdir(_tmp_datadir):
|
||||
src = _tmp_datadir / "x.docx"
|
||||
src.write_bytes(b"doc")
|
||||
dest = run(ingest._stage_file(src, _tmp_datadir / "digests", ""))
|
||||
assert dest.parent == _tmp_datadir / "digests" / "other"
|
||||
assert dest.exists()
|
||||
Reference in New Issue
Block a user