feat(storage): X14 Phase 2c — route remaining sync write-sites through storage.py
Completes the write-side rewiring (INV-STG1) for the call-sites that run in synchronous contexts, via a new blocking facade in storage.py (put_bytes_sync / put_file_sync — asyncio.run, or a worker thread when a loop is already running): - services/extractor.py: multimodal thumbnail JPEGs → DERIVED (rendered in a to_thread worker) - services/docx_reviser.py: track-changes save (_save_docx_xml) + empty-diff copy (copy_with_revisions) → DOCUMENTS - services/docx_retrofit.py: in-place retrofit backup → DOCUMENTS Each site keeps a fallback to a direct disk write when the target path is outside DATA_DIR (caller-provided). Under the default STORAGE_BACKEND= filesystem the bytes land exactly where they did before — zero behaviour change. Also: mcp_env_catalog MINIO_ENDPOINT default updated to the durable container-name endpoint (http://minio-bx2ykvw94xbutsex41hz4vv8:9000), matching the Coolify "Connect to Predefined Network" change made for network durability. All binary write-sites now flow through storage.py. git-tracked text (case.json/notes/research-md/draft-md) stays on disk by design (INV-STG7); court-fetch temp files are ephemeral. tests: +2 (thumbnail renderer routes through storage; put_bytes_sync round-trip); 55 storage/docx/track-changes green; 244 collected, no import breakage. Keeps G2; completes INV-STG1 write coverage. Spec: docs/spec/X14-storage-minio.md. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -13,6 +13,9 @@ from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import shutil
|
||||
|
||||
from legal_mcp import config
|
||||
from legal_mcp.services import storage
|
||||
import zipfile
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
@@ -98,6 +101,22 @@ def _load_docx_xml(docx_path: Path) -> tuple[dict[str, bytes], etree._Element, e
|
||||
return members, document_tree, settings_tree
|
||||
|
||||
|
||||
_DOCX_CTYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||
|
||||
|
||||
def _persist_docx_sync(output_path: Path, data: bytes) -> None:
|
||||
"""Persist DOCX bytes through the storage layer (INV-STG1); fall back to a
|
||||
direct disk write when output_path is outside DATA_DIR (caller-provided)."""
|
||||
out = Path(output_path)
|
||||
try:
|
||||
key = out.resolve().relative_to(Path(config.DATA_DIR).resolve()).as_posix()
|
||||
storage.put_bytes_sync(key, data, bucket=storage.Bucket.DOCUMENTS,
|
||||
content_type=_DOCX_CTYPE)
|
||||
except ValueError:
|
||||
out.parent.mkdir(parents=True, exist_ok=True)
|
||||
out.write_bytes(data)
|
||||
|
||||
|
||||
def _save_docx_xml(
|
||||
members: dict[str, bytes],
|
||||
document_tree: etree._Element,
|
||||
@@ -113,12 +132,11 @@ def _save_docx_xml(
|
||||
settings_tree, xml_declaration=True, encoding="UTF-8", standalone=True
|
||||
)
|
||||
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
buffer = BytesIO()
|
||||
with zipfile.ZipFile(buffer, "w", zipfile.ZIP_DEFLATED) as zf:
|
||||
for name, data in members.items():
|
||||
zf.writestr(name, data)
|
||||
output_path.write_bytes(buffer.getvalue())
|
||||
_persist_docx_sync(output_path, buffer.getvalue())
|
||||
|
||||
|
||||
def _ensure_track_revisions(settings_tree: etree._Element) -> None:
|
||||
@@ -511,4 +529,11 @@ def copy_with_revisions(
|
||||
source_path: str | Path, output_path: str | Path,
|
||||
) -> None:
|
||||
"""Copy source → output unchanged (used when revisions list is empty)."""
|
||||
shutil.copy2(str(source_path), str(output_path))
|
||||
out = Path(output_path)
|
||||
try:
|
||||
key = out.resolve().relative_to(Path(config.DATA_DIR).resolve()).as_posix()
|
||||
storage.put_file_sync(source_path, key, bucket=storage.Bucket.DOCUMENTS,
|
||||
content_type=_DOCX_CTYPE)
|
||||
except ValueError:
|
||||
out.parent.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy2(str(source_path), str(out))
|
||||
|
||||
Reference in New Issue
Block a user