The single choke-point for all binary file I/O (originals, derived artifacts, exports), replacing the scattered open()/shutil/Path.write_bytes calls across ~8 services. Backend chosen by STORAGE_BACKEND: - filesystem (default): disk under DATA_DIR — byte-for-byte legacy behaviour - dual: write disk + S3, read S3→disk fallback (migration window) - s3: MinIO via aioboto3 (lazy import; absent in the filesystem path) Keys are DATA_DIR-relative POSIX paths; the FS backend ignores the logical bucket and keeps the existing single tree, so the default backend is zero behaviour change. S3 maps a governance bucket (documents/immutable/derived) → MinIO bucket; presigned URLs are minted against the public endpoint (browser-reachable) and carry the Hebrew filename via RFC-5987 Content-Disposition. - config: STORAGE_BACKEND + MINIO_* (endpoint, public-endpoint, creds, region, 3 bucket names, presign TTL) - mcp_env_catalog: new "storage" category + 10 specs (X10/INV-ENV1) - pyproject: aioboto3>=13 (consumed here, deployed with first use) - tests: 18 unit tests (FS round-trip, key normalization/traversal guard, bucket resolution, backend selection, dual write-both + S3-down fallback) No call-sites are rewired yet — that is Phase 2 (106.3). STORAGE_BACKEND stays filesystem in prod, so behaviour is unchanged. Invariants: keeps G2 (one storage path replaces scattered I/O); establishes INV-STG1 (single layer), INV-STG2 (atomic keys, Hebrew name in metadata), INV-STG3 (governance buckets), INV-STG6 (presigned serving). Spec: docs/spec/X14-storage-minio.md. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
259 lines
10 KiB
Python
259 lines
10 KiB
Python
# web/mcp_env_catalog.py
|
|
"""Static catalog of MCP server env vars exposed in the settings UI.
|
|
|
|
Source of truth: Coolify env vars (read/write via Coolify API).
|
|
This file defines the whitelist + types + display metadata.
|
|
Keys not in this catalog are not displayed or editable.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass, asdict
|
|
from typing import Any, Literal
|
|
|
|
EnvType = Literal["bool", "int", "float", "string"]
|
|
EnvCategory = Literal[
|
|
"multimodal", "rerank", "halacha", "credentials", "connection",
|
|
"storage", "general"
|
|
]
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class EnvSpec:
|
|
key: str
|
|
category: EnvCategory
|
|
type: EnvType
|
|
description: str
|
|
is_secret: bool
|
|
is_editable: bool
|
|
default: Any = None
|
|
min: float | None = None
|
|
max: float | None = None
|
|
enum_values: tuple[str, ...] | None = None
|
|
|
|
def to_public_dict(self) -> dict[str, Any]:
|
|
"""Return spec fields as a dict. Does NOT mask secret values — caller must handle."""
|
|
return asdict(self)
|
|
|
|
|
|
ENV_CATALOG: dict[str, EnvSpec] = {
|
|
# ── multimodal ─────────────────────────────────────────────────
|
|
"MULTIMODAL_ENABLED": EnvSpec(
|
|
"MULTIMODAL_ENABLED", "multimodal", "bool",
|
|
"הפעלת page-image embeddings (voyage-multimodal-3)",
|
|
is_secret=False, is_editable=True, default=False,
|
|
),
|
|
"MULTIMODAL_MODEL": EnvSpec(
|
|
"MULTIMODAL_MODEL", "multimodal", "string",
|
|
"מודל multimodal של Voyage",
|
|
is_secret=False, is_editable=True, default="voyage-multimodal-3",
|
|
),
|
|
"MULTIMODAL_DPI": EnvSpec(
|
|
"MULTIMODAL_DPI", "multimodal", "int",
|
|
"DPI ל-rendering של עמוד למודל",
|
|
is_secret=False, is_editable=True, default=144, min=72, max=300,
|
|
),
|
|
"MULTIMODAL_THUMB_DPI": EnvSpec(
|
|
"MULTIMODAL_THUMB_DPI", "multimodal", "int",
|
|
"DPI ל-thumbnail בתצוגה",
|
|
is_secret=False, is_editable=True, default=96, min=72, max=200,
|
|
),
|
|
"MULTIMODAL_TEXT_WEIGHT": EnvSpec(
|
|
"MULTIMODAL_TEXT_WEIGHT", "multimodal", "float",
|
|
"משקל text vs image ב-RRF (0=image בלבד, 1=text בלבד)",
|
|
is_secret=False, is_editable=True, default=0.5, min=0.0, max=1.0,
|
|
),
|
|
"MULTIMODAL_RRF_K": EnvSpec(
|
|
"MULTIMODAL_RRF_K", "multimodal", "int",
|
|
"RRF damping constant",
|
|
is_secret=False, is_editable=True, default=60, min=1, max=200,
|
|
),
|
|
# ── rerank ─────────────────────────────────────────────────────
|
|
"VOYAGE_RERANK_ENABLED": EnvSpec(
|
|
"VOYAGE_RERANK_ENABLED", "rerank", "bool",
|
|
"הפעלת cross-encoder rerank",
|
|
is_secret=False, is_editable=True, default=False,
|
|
),
|
|
"VOYAGE_RERANK_MODEL": EnvSpec(
|
|
"VOYAGE_RERANK_MODEL", "rerank", "string",
|
|
"מודל rerank",
|
|
is_secret=False, is_editable=True, default="rerank-2",
|
|
),
|
|
"VOYAGE_RERANK_FETCH_K": EnvSpec(
|
|
"VOYAGE_RERANK_FETCH_K", "rerank", "int",
|
|
"מספר candidates לפני rerank",
|
|
is_secret=False, is_editable=True, default=50, min=10, max=200,
|
|
),
|
|
# ── halacha ────────────────────────────────────────────────────
|
|
"HALACHA_AUTO_APPROVE_THRESHOLD": EnvSpec(
|
|
"HALACHA_AUTO_APPROVE_THRESHOLD", "halacha", "float",
|
|
"סף confidence ל-auto-approve של הלכות שחולצו",
|
|
is_secret=False, is_editable=True, default=0.80, min=0.0, max=1.0,
|
|
),
|
|
# ── storage (X14 / MinIO) ──────────────────────────────────────
|
|
"STORAGE_BACKEND": EnvSpec(
|
|
"STORAGE_BACKEND", "storage", "string",
|
|
"מנוע אחסון: filesystem (דיסק) / dual (דיסק+S3) / s3 (MinIO בלבד)",
|
|
is_secret=False, is_editable=True, default="filesystem",
|
|
enum_values=("filesystem", "dual", "s3"),
|
|
),
|
|
"MINIO_ENDPOINT": EnvSpec(
|
|
"MINIO_ENDPOINT", "storage", "string",
|
|
"endpoint פנימי של MinIO (server-side, רשת Docker)",
|
|
is_secret=False, is_editable=False, default="http://minio:9000",
|
|
),
|
|
"MINIO_PUBLIC_ENDPOINT": EnvSpec(
|
|
"MINIO_PUBLIC_ENDPOINT", "storage", "string",
|
|
"endpoint ציבורי ל-presigned URLs (גישת דפדפן)",
|
|
is_secret=False, is_editable=False, default="https://s3.nautilus.marcusgroup.org",
|
|
),
|
|
"MINIO_ACCESS_KEY": EnvSpec(
|
|
"MINIO_ACCESS_KEY", "storage", "string",
|
|
"MinIO access key (service-account מוגבל ל-3 הדליות)",
|
|
is_secret=True, is_editable=False,
|
|
),
|
|
"MINIO_SECRET_KEY": EnvSpec(
|
|
"MINIO_SECRET_KEY", "storage", "string",
|
|
"MinIO secret key",
|
|
is_secret=True, is_editable=False,
|
|
),
|
|
"MINIO_REGION": EnvSpec(
|
|
"MINIO_REGION", "storage", "string",
|
|
"אזור S3 (MinIO מתעלם — לחתימת SigV4)",
|
|
is_secret=False, is_editable=False, default="us-east-1",
|
|
),
|
|
"MINIO_BUCKET_DOCUMENTS": EnvSpec(
|
|
"MINIO_BUCKET_DOCUMENTS", "storage", "string",
|
|
"דלי מסמכי-מקור (versioning)",
|
|
is_secret=False, is_editable=False, default="legal-documents",
|
|
),
|
|
"MINIO_BUCKET_IMMUTABLE": EnvSpec(
|
|
"MINIO_BUCKET_IMMUTABLE", "storage", "string",
|
|
"דלי החלטות סופיות (versioning + Object-Lock COMPLIANCE)",
|
|
is_secret=False, is_editable=False, default="legal-immutable",
|
|
),
|
|
"MINIO_BUCKET_DERIVED": EnvSpec(
|
|
"MINIO_BUCKET_DERIVED", "storage", "string",
|
|
"דלי נגזרים (thumbnails / extracted — ניתן-לשחזור)",
|
|
is_secret=False, is_editable=False, default="legal-derived",
|
|
),
|
|
"MINIO_PRESIGN_TTL": EnvSpec(
|
|
"MINIO_PRESIGN_TTL", "storage", "int",
|
|
"תוקף presigned URL בשניות (מקס' SigV4 = 7 ימים)",
|
|
is_secret=False, is_editable=True, default=900, min=60, max=604800,
|
|
),
|
|
# ── general ────────────────────────────────────────────────────
|
|
"VOYAGE_MODEL": EnvSpec(
|
|
"VOYAGE_MODEL", "general", "string",
|
|
"מודל embedding ראשי",
|
|
is_secret=False, is_editable=True, default="voyage-law-2",
|
|
),
|
|
"AUDIT_ENABLED": EnvSpec(
|
|
"AUDIT_ENABLED", "general", "bool",
|
|
"הפעלת audit log",
|
|
is_secret=False, is_editable=True, default=True,
|
|
),
|
|
# ── credentials (read-only, masked) ────────────────────────────
|
|
"VOYAGE_API_KEY": EnvSpec(
|
|
"VOYAGE_API_KEY", "credentials", "string",
|
|
"Voyage AI API key",
|
|
is_secret=True, is_editable=False,
|
|
),
|
|
"GOOGLE_CLOUD_VISION_API_KEY": EnvSpec(
|
|
"GOOGLE_CLOUD_VISION_API_KEY", "credentials", "string",
|
|
"Google Cloud Vision API key (OCR)",
|
|
is_secret=True, is_editable=False,
|
|
),
|
|
"INFISICAL_TOKEN": EnvSpec(
|
|
"INFISICAL_TOKEN", "credentials", "string",
|
|
"Infisical SDK token",
|
|
is_secret=True, is_editable=False,
|
|
),
|
|
# ── connection (read-only — שינוי runtime מסוכן) ──────────────
|
|
"POSTGRES_URL": EnvSpec(
|
|
"POSTGRES_URL", "connection", "string",
|
|
"PostgreSQL connection URL",
|
|
is_secret=True, is_editable=False,
|
|
),
|
|
"REDIS_URL": EnvSpec(
|
|
"REDIS_URL", "connection", "string",
|
|
"Redis connection URL",
|
|
is_secret=False, is_editable=False,
|
|
),
|
|
"DATA_DIR": EnvSpec(
|
|
"DATA_DIR", "connection", "string",
|
|
"Data directory path",
|
|
is_secret=False, is_editable=False,
|
|
),
|
|
}
|
|
|
|
|
|
# ── helpers ────────────────────────────────────────────────────────
|
|
|
|
|
|
def mask_secret(value: str | None) -> str:
|
|
"""Mask a secret to **** + last 4 chars (or **** if shorter)."""
|
|
if value is None:
|
|
return ""
|
|
if len(value) <= 4:
|
|
return "****"
|
|
return "****" + value[-4:]
|
|
|
|
|
|
def coerce(spec: EnvSpec, raw: Any) -> Any:
|
|
"""Coerce raw input (str from JSON) to typed value, with validation.
|
|
|
|
Raises ValueError on invalid input.
|
|
"""
|
|
if raw is None or raw == "":
|
|
raise ValueError("ערך ריק")
|
|
if spec.type == "bool":
|
|
if isinstance(raw, bool):
|
|
return raw
|
|
s = str(raw).strip().lower()
|
|
if s in ("true", "1", "yes", "on"):
|
|
return True
|
|
if s in ("false", "0", "no", "off"):
|
|
return False
|
|
raise ValueError(f"ערך bool לא חוקי: {raw}")
|
|
if spec.type == "int":
|
|
if isinstance(raw, float) and not raw.is_integer():
|
|
raise ValueError(f"ערך int לא חוקי (שבר עשרוני): {raw}")
|
|
try:
|
|
v = int(raw)
|
|
except (TypeError, ValueError):
|
|
raise ValueError(f"ערך int לא חוקי: {raw}")
|
|
if spec.min is not None and v < spec.min:
|
|
raise ValueError(f"ערך {v} מתחת למינימום {spec.min}")
|
|
if spec.max is not None and v > spec.max:
|
|
raise ValueError(f"ערך {v} מעל המקסימום {spec.max}")
|
|
return v
|
|
if spec.type == "float":
|
|
try:
|
|
v = float(raw)
|
|
except (TypeError, ValueError):
|
|
raise ValueError(f"ערך float לא חוקי: {raw}")
|
|
if spec.min is not None and v < spec.min:
|
|
raise ValueError(f"ערך {v} מתחת למינימום {spec.min}")
|
|
if spec.max is not None and v > spec.max:
|
|
raise ValueError(f"ערך {v} מעל המקסימום {spec.max}")
|
|
return v
|
|
# string
|
|
s = str(raw)
|
|
if spec.enum_values and s not in spec.enum_values:
|
|
raise ValueError(f"ערך לא ברשימה: {spec.enum_values}")
|
|
return s
|
|
|
|
|
|
def normalize_for_compare(spec: EnvSpec, raw: str | None) -> str | None:
|
|
"""Normalize a raw env string to a canonical form for drift comparison."""
|
|
if not raw: # None or ""
|
|
return None
|
|
try:
|
|
v = coerce(spec, raw)
|
|
except ValueError:
|
|
return raw # invalid value — compare as-is, drift will surface
|
|
if spec.type == "bool":
|
|
return "true" if v else "false"
|
|
return str(v)
|