feat(storage): X14 Phase 2c — route remaining sync write-sites through storage.py
Completes the write-side rewiring (INV-STG1) for the call-sites that run in synchronous contexts, via a new blocking facade in storage.py (put_bytes_sync / put_file_sync — asyncio.run, or a worker thread when a loop is already running): - services/extractor.py: multimodal thumbnail JPEGs → DERIVED (rendered in a to_thread worker) - services/docx_reviser.py: track-changes save (_save_docx_xml) + empty-diff copy (copy_with_revisions) → DOCUMENTS - services/docx_retrofit.py: in-place retrofit backup → DOCUMENTS Each site keeps a fallback to a direct disk write when the target path is outside DATA_DIR (caller-provided). Under the default STORAGE_BACKEND= filesystem the bytes land exactly where they did before — zero behaviour change. Also: mcp_env_catalog MINIO_ENDPOINT default updated to the durable container-name endpoint (http://minio-bx2ykvw94xbutsex41hz4vv8:9000), matching the Coolify "Connect to Predefined Network" change made for network durability. All binary write-sites now flow through storage.py. git-tracked text (case.json/notes/research-md/draft-md) stays on disk by design (INV-STG7); court-fetch temp files are ephemeral. tests: +2 (thumbnail renderer routes through storage; put_bytes_sync round-trip); 55 storage/docx/track-changes green; 244 collected, no import breakage. Keeps G2; completes INV-STG1 write coverage. Spec: docs/spec/X14-storage-minio.md. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -46,3 +46,32 @@ def test_stage_file_default_subdir(_tmp_datadir):
|
||||
dest = run(ingest._stage_file(src, _tmp_datadir / "digests", ""))
|
||||
assert dest.parent == _tmp_datadir / "digests" / "other"
|
||||
assert dest.exists()
|
||||
|
||||
|
||||
def test_thumbnail_renderer_routes_through_storage(_tmp_datadir):
|
||||
"""extractor.render_pages_for_multimodal (a sync renderer) now persists the
|
||||
JPEG thumbnail via the sync storage facade — under filesystem it must land
|
||||
at the requested thumbnail_dir."""
|
||||
fitz = pytest.importorskip("fitz")
|
||||
from legal_mcp.services import extractor
|
||||
|
||||
pdf = _tmp_datadir / "doc.pdf"
|
||||
d = fitz.open()
|
||||
d.new_page(width=200, height=200)
|
||||
d.save(str(pdf))
|
||||
d.close()
|
||||
|
||||
thumb_dir = _tmp_datadir / "cases" / "1" / "thumbnails" / "docid"
|
||||
out = extractor.render_pages_for_multimodal(pdf, embed_dpi=72, thumb_dpi=36,
|
||||
thumbnail_dir=thumb_dir)
|
||||
assert len(out) == 1
|
||||
_img, thumb_path = out[0]
|
||||
assert thumb_path == thumb_dir / "p001.jpg"
|
||||
assert thumb_path.exists() # written through storage.put_bytes_sync (DERIVED)
|
||||
assert thumb_path.read_bytes()[:2] == b"\xff\xd8" # JPEG magic
|
||||
|
||||
|
||||
def test_put_bytes_sync_roundtrip(_tmp_datadir):
|
||||
src_key = "cases/1/exports/x.docx"
|
||||
storage.put_bytes_sync(src_key, b"PK\x03\x04zip", bucket=storage.Bucket.DOCUMENTS)
|
||||
assert (_tmp_datadir / src_key).read_bytes() == b"PK\x03\x04zip"
|
||||
|
||||
Reference in New Issue
Block a user