Files
legal-ai/web/mcp_env_catalog.py
Chaim b2ea0c28dd feat(storage): X14 Phase 2c — route remaining sync write-sites through storage.py
Completes the write-side rewiring (INV-STG1) for the call-sites that run in
synchronous contexts, via a new blocking facade in storage.py
(put_bytes_sync / put_file_sync — asyncio.run, or a worker thread when a loop
is already running):
- services/extractor.py: multimodal thumbnail JPEGs → DERIVED (rendered in a
  to_thread worker)
- services/docx_reviser.py: track-changes save (_save_docx_xml) + empty-diff
  copy (copy_with_revisions) → DOCUMENTS
- services/docx_retrofit.py: in-place retrofit backup → DOCUMENTS

Each site keeps a fallback to a direct disk write when the target path is
outside DATA_DIR (caller-provided). Under the default STORAGE_BACKEND=
filesystem the bytes land exactly where they did before — zero behaviour
change.

Also: mcp_env_catalog MINIO_ENDPOINT default updated to the durable
container-name endpoint (http://minio-bx2ykvw94xbutsex41hz4vv8:9000), matching
the Coolify "Connect to Predefined Network" change made for network durability.

All binary write-sites now flow through storage.py. git-tracked text
(case.json/notes/research-md/draft-md) stays on disk by design (INV-STG7);
court-fetch temp files are ephemeral.

tests: +2 (thumbnail renderer routes through storage; put_bytes_sync
round-trip); 55 storage/docx/track-changes green; 244 collected, no import
breakage.

Keeps G2; completes INV-STG1 write coverage. Spec: docs/spec/X14-storage-minio.md.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-08 08:26:09 +00:00

260 lines
10 KiB
Python

# web/mcp_env_catalog.py
"""Static catalog of MCP server env vars exposed in the settings UI.
Source of truth: Coolify env vars (read/write via Coolify API).
This file defines the whitelist + types + display metadata.
Keys not in this catalog are not displayed or editable.
"""
from __future__ import annotations
from dataclasses import dataclass, asdict
from typing import Any, Literal
EnvType = Literal["bool", "int", "float", "string"]
EnvCategory = Literal[
"multimodal", "rerank", "halacha", "credentials", "connection",
"storage", "general"
]
@dataclass(frozen=True)
class EnvSpec:
key: str
category: EnvCategory
type: EnvType
description: str
is_secret: bool
is_editable: bool
default: Any = None
min: float | None = None
max: float | None = None
enum_values: tuple[str, ...] | None = None
def to_public_dict(self) -> dict[str, Any]:
"""Return spec fields as a dict. Does NOT mask secret values — caller must handle."""
return asdict(self)
ENV_CATALOG: dict[str, EnvSpec] = {
# ── multimodal ─────────────────────────────────────────────────
"MULTIMODAL_ENABLED": EnvSpec(
"MULTIMODAL_ENABLED", "multimodal", "bool",
"הפעלת page-image embeddings (voyage-multimodal-3)",
is_secret=False, is_editable=True, default=False,
),
"MULTIMODAL_MODEL": EnvSpec(
"MULTIMODAL_MODEL", "multimodal", "string",
"מודל multimodal של Voyage",
is_secret=False, is_editable=True, default="voyage-multimodal-3",
),
"MULTIMODAL_DPI": EnvSpec(
"MULTIMODAL_DPI", "multimodal", "int",
"DPI ל-rendering של עמוד למודל",
is_secret=False, is_editable=True, default=144, min=72, max=300,
),
"MULTIMODAL_THUMB_DPI": EnvSpec(
"MULTIMODAL_THUMB_DPI", "multimodal", "int",
"DPI ל-thumbnail בתצוגה",
is_secret=False, is_editable=True, default=96, min=72, max=200,
),
"MULTIMODAL_TEXT_WEIGHT": EnvSpec(
"MULTIMODAL_TEXT_WEIGHT", "multimodal", "float",
"משקל text vs image ב-RRF (0=image בלבד, 1=text בלבד)",
is_secret=False, is_editable=True, default=0.5, min=0.0, max=1.0,
),
"MULTIMODAL_RRF_K": EnvSpec(
"MULTIMODAL_RRF_K", "multimodal", "int",
"RRF damping constant",
is_secret=False, is_editable=True, default=60, min=1, max=200,
),
# ── rerank ─────────────────────────────────────────────────────
"VOYAGE_RERANK_ENABLED": EnvSpec(
"VOYAGE_RERANK_ENABLED", "rerank", "bool",
"הפעלת cross-encoder rerank",
is_secret=False, is_editable=True, default=False,
),
"VOYAGE_RERANK_MODEL": EnvSpec(
"VOYAGE_RERANK_MODEL", "rerank", "string",
"מודל rerank",
is_secret=False, is_editable=True, default="rerank-2",
),
"VOYAGE_RERANK_FETCH_K": EnvSpec(
"VOYAGE_RERANK_FETCH_K", "rerank", "int",
"מספר candidates לפני rerank",
is_secret=False, is_editable=True, default=50, min=10, max=200,
),
# ── halacha ────────────────────────────────────────────────────
"HALACHA_AUTO_APPROVE_THRESHOLD": EnvSpec(
"HALACHA_AUTO_APPROVE_THRESHOLD", "halacha", "float",
"סף confidence ל-auto-approve של הלכות שחולצו",
is_secret=False, is_editable=True, default=0.80, min=0.0, max=1.0,
),
# ── storage (X14 / MinIO) ──────────────────────────────────────
"STORAGE_BACKEND": EnvSpec(
"STORAGE_BACKEND", "storage", "string",
"מנוע אחסון: filesystem (דיסק) / dual (דיסק+S3) / s3 (MinIO בלבד)",
is_secret=False, is_editable=True, default="filesystem",
enum_values=("filesystem", "dual", "s3"),
),
"MINIO_ENDPOINT": EnvSpec(
"MINIO_ENDPOINT", "storage", "string",
"endpoint פנימי של MinIO (server-side, רשת Docker coolify — שם-קונטיינר עמיד)",
is_secret=False, is_editable=False,
default="http://minio-bx2ykvw94xbutsex41hz4vv8:9000",
),
"MINIO_PUBLIC_ENDPOINT": EnvSpec(
"MINIO_PUBLIC_ENDPOINT", "storage", "string",
"endpoint ציבורי ל-presigned URLs (גישת דפדפן)",
is_secret=False, is_editable=False, default="https://s3.nautilus.marcusgroup.org",
),
"MINIO_ACCESS_KEY": EnvSpec(
"MINIO_ACCESS_KEY", "storage", "string",
"MinIO access key (service-account מוגבל ל-3 הדליות)",
is_secret=True, is_editable=False,
),
"MINIO_SECRET_KEY": EnvSpec(
"MINIO_SECRET_KEY", "storage", "string",
"MinIO secret key",
is_secret=True, is_editable=False,
),
"MINIO_REGION": EnvSpec(
"MINIO_REGION", "storage", "string",
"אזור S3 (MinIO מתעלם — לחתימת SigV4)",
is_secret=False, is_editable=False, default="us-east-1",
),
"MINIO_BUCKET_DOCUMENTS": EnvSpec(
"MINIO_BUCKET_DOCUMENTS", "storage", "string",
"דלי מסמכי-מקור (versioning)",
is_secret=False, is_editable=False, default="legal-documents",
),
"MINIO_BUCKET_IMMUTABLE": EnvSpec(
"MINIO_BUCKET_IMMUTABLE", "storage", "string",
"דלי החלטות סופיות (versioning + Object-Lock COMPLIANCE)",
is_secret=False, is_editable=False, default="legal-immutable",
),
"MINIO_BUCKET_DERIVED": EnvSpec(
"MINIO_BUCKET_DERIVED", "storage", "string",
"דלי נגזרים (thumbnails / extracted — ניתן-לשחזור)",
is_secret=False, is_editable=False, default="legal-derived",
),
"MINIO_PRESIGN_TTL": EnvSpec(
"MINIO_PRESIGN_TTL", "storage", "int",
"תוקף presigned URL בשניות (מקס' SigV4 = 7 ימים)",
is_secret=False, is_editable=True, default=900, min=60, max=604800,
),
# ── general ────────────────────────────────────────────────────
"VOYAGE_MODEL": EnvSpec(
"VOYAGE_MODEL", "general", "string",
"מודל embedding ראשי",
is_secret=False, is_editable=True, default="voyage-law-2",
),
"AUDIT_ENABLED": EnvSpec(
"AUDIT_ENABLED", "general", "bool",
"הפעלת audit log",
is_secret=False, is_editable=True, default=True,
),
# ── credentials (read-only, masked) ────────────────────────────
"VOYAGE_API_KEY": EnvSpec(
"VOYAGE_API_KEY", "credentials", "string",
"Voyage AI API key",
is_secret=True, is_editable=False,
),
"GOOGLE_CLOUD_VISION_API_KEY": EnvSpec(
"GOOGLE_CLOUD_VISION_API_KEY", "credentials", "string",
"Google Cloud Vision API key (OCR)",
is_secret=True, is_editable=False,
),
"INFISICAL_TOKEN": EnvSpec(
"INFISICAL_TOKEN", "credentials", "string",
"Infisical SDK token",
is_secret=True, is_editable=False,
),
# ── connection (read-only — שינוי runtime מסוכן) ──────────────
"POSTGRES_URL": EnvSpec(
"POSTGRES_URL", "connection", "string",
"PostgreSQL connection URL",
is_secret=True, is_editable=False,
),
"REDIS_URL": EnvSpec(
"REDIS_URL", "connection", "string",
"Redis connection URL",
is_secret=False, is_editable=False,
),
"DATA_DIR": EnvSpec(
"DATA_DIR", "connection", "string",
"Data directory path",
is_secret=False, is_editable=False,
),
}
# ── helpers ────────────────────────────────────────────────────────
def mask_secret(value: str | None) -> str:
"""Mask a secret to **** + last 4 chars (or **** if shorter)."""
if value is None:
return ""
if len(value) <= 4:
return "****"
return "****" + value[-4:]
def coerce(spec: EnvSpec, raw: Any) -> Any:
"""Coerce raw input (str from JSON) to typed value, with validation.
Raises ValueError on invalid input.
"""
if raw is None or raw == "":
raise ValueError("ערך ריק")
if spec.type == "bool":
if isinstance(raw, bool):
return raw
s = str(raw).strip().lower()
if s in ("true", "1", "yes", "on"):
return True
if s in ("false", "0", "no", "off"):
return False
raise ValueError(f"ערך bool לא חוקי: {raw}")
if spec.type == "int":
if isinstance(raw, float) and not raw.is_integer():
raise ValueError(f"ערך int לא חוקי (שבר עשרוני): {raw}")
try:
v = int(raw)
except (TypeError, ValueError):
raise ValueError(f"ערך int לא חוקי: {raw}")
if spec.min is not None and v < spec.min:
raise ValueError(f"ערך {v} מתחת למינימום {spec.min}")
if spec.max is not None and v > spec.max:
raise ValueError(f"ערך {v} מעל המקסימום {spec.max}")
return v
if spec.type == "float":
try:
v = float(raw)
except (TypeError, ValueError):
raise ValueError(f"ערך float לא חוקי: {raw}")
if spec.min is not None and v < spec.min:
raise ValueError(f"ערך {v} מתחת למינימום {spec.min}")
if spec.max is not None and v > spec.max:
raise ValueError(f"ערך {v} מעל המקסימום {spec.max}")
return v
# string
s = str(raw)
if spec.enum_values and s not in spec.enum_values:
raise ValueError(f"ערך לא ברשימה: {spec.enum_values}")
return s
def normalize_for_compare(spec: EnvSpec, raw: str | None) -> str | None:
"""Normalize a raw env string to a canonical form for drift comparison."""
if not raw: # None or ""
return None
try:
v = coerce(spec, raw)
except ValueError:
return raw # invalid value — compare as-is, drift will surface
if spec.type == "bool":
return "true" if v else "false"
return str(v)