ליבת-המשיכה למרשם-התכניות (V38): מספר-תכנית → זהות+תוקף מ-mavat דרך גשר-Camoufox הקיים (G2 — אותו שירות/פורט/סוד כמו X13, בלי חדשים). - court_fetch_service/mavat_client.py (חדש): דרייבר Camoufox מול mavat — עוקף F5-ASM (דפדפן-JS), search→auto-nav ל-SV4, לוכד GET /rest/api/SV4/1, מפענח planDetails (E_NAME/AUTH/ENTITY_SUBTYPE/GOALS) + rsInternet (פרסום-לאישור→ED_PUBLICATION_FILE=י"פ + DETAILS→תאריך/עמוד). מלכודת- דרייבר: init-script window.onerror swallow. reCAPTCHA נשאר דלוק (token). - court_fetch_service/server.py: POST /plan-fetch (אותו Bearer). - services/plans_fetch.py (חדש): צד-קונטיינר — httpx לגשר, מנרמל שדות. - tools/plans.py + server.py: כלי-MCP plan_fetch (מועמד, לא כותב). - web/app.py: POST /api/plans/fetch (503 גשר-למטה, 404 לא-נמצא). אומת חי מול mavat: 101-1031020→י"פ 13697 (עמ' 8758, 30/07/2025), 101-1053933→י"פ 13836. מקור-אמת עשיר מתב"ע-עכשיו (שחסר י"פ). INV-AH: כל ערך נושא source_url; שדה-חסר ריק לא מומצא. G10: מחזיר מועמד בלבד — שער-יו"ר (review_status) נשמר. G2: מרחיב גשר+מרשם קיימים. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
480 lines
20 KiB
Python
480 lines
20 KiB
Python
"""Host-side HTTP bridge for Tier-1 verdict fetching (X13).
|
|
|
|
Mirrors ``legal_mcp.chat_service.server`` — the proven host-side pattern: an
|
|
aiohttp app, bound to the docker bridge gateway, Bearer-auth, that does the one
|
|
thing the container can't (here: drive a real browser against נט המשפט).
|
|
|
|
Endpoints:
|
|
POST /fetch body {file_number, month, year, case_number, court}
|
|
→ {ok, content_b64, filename, source_url, court, reason}
|
|
REQUIRES Authorization: Bearer <COURT_FETCH_SHARED_SECRET>.
|
|
GET /health liveness (no auth); reports camofox + VNC URL if available.
|
|
GET /pm2 read-only pm2 status of legal-* / paperclip services (no auth).
|
|
POST /pm2/control body {name, action: restart|stop|start} → run pm2 on a
|
|
whitelisted legal-* process. REQUIRES Bearer (mutating).
|
|
|
|
Run with pm2:
|
|
pm2 start scripts/legal-court-fetch-service.config.cjs
|
|
|
|
Security posture (identical rationale to legal-chat-service):
|
|
1. Bind defaults to ``10.0.1.1`` (docker0 bridge gateway) — reachable from
|
|
the host + containers on docker bridges, invisible to outside networks.
|
|
2. ``/fetch`` requires a Bearer token (constant-time compare); the service
|
|
refuses to start without ``COURT_FETCH_SHARED_SECRET`` set.
|
|
3. ``/health`` is unauthenticated and spawns nothing.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import base64
|
|
import hmac
|
|
import json
|
|
import logging
|
|
import os
|
|
import sys
|
|
import time
|
|
|
|
import aiohttp
|
|
from aiohttp import web
|
|
|
|
_pkg_root = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
|
|
if _pkg_root not in sys.path:
|
|
sys.path.insert(0, _pkg_root)
|
|
|
|
from legal_mcp.court_fetch_service import camofox_client # noqa: E402
|
|
from legal_mcp.court_fetch_service import mavat_client # noqa: E402
|
|
from legal_mcp.services import usage_limits # noqa: E402
|
|
from legal_mcp.services import script_runner # noqa: E402
|
|
|
|
logger = logging.getLogger("legal_court_fetch_service")
|
|
|
|
_SHARED_SECRET: str = ""
|
|
|
|
|
|
async def health(request: web.Request) -> web.Response:
|
|
info = {"ok": True, "service": "legal-court-fetch-service",
|
|
"camofox_enabled": camofox_client.is_enabled()}
|
|
if camofox_client.is_enabled():
|
|
try:
|
|
info["camofox"] = await camofox_client.health()
|
|
except Exception as e: # health must never throw
|
|
info["camofox_error"] = str(e)
|
|
return web.json_response(info)
|
|
|
|
|
|
# Background services we surface on the /operations dashboard. pm2 jlist is a
|
|
# host-only capability (the legal-ai container can't run pm2), so the container's
|
|
# FastAPI proxies this read-only endpoint over the docker bridge. No secret:
|
|
# pm2 status (names/cpu/mem) carries nothing sensitive and the bind (10.0.1.1)
|
|
# is already host/container-only.
|
|
_PM2_PREFIXES = ("legal-", "paperclip")
|
|
|
|
|
|
def _trim_service(a: dict) -> dict:
|
|
"""Project a pm2 jlist app entry into the fields the dashboard needs."""
|
|
env = a.get("pm2_env", {}) or {}
|
|
return {
|
|
"name": a.get("name", ""),
|
|
"status": env.get("status", ""),
|
|
"restarts": env.get("restart_time", 0),
|
|
"uptime_ms": env.get("pm_uptime", 0),
|
|
"cpu": (a.get("monit") or {}).get("cpu", 0),
|
|
"memory_bytes": (a.get("monit") or {}).get("memory", 0),
|
|
"cron": env.get("cron_restart") or "",
|
|
"autorestart": env.get("autorestart", True),
|
|
}
|
|
|
|
|
|
async def _pm2_run(*args: str, timeout: float = 10) -> tuple[int, bytes, bytes]:
|
|
"""Run a pm2 subcommand; returns (returncode, stdout, stderr)."""
|
|
import asyncio as _asyncio
|
|
|
|
proc = await _asyncio.create_subprocess_exec(
|
|
"pm2", *args,
|
|
stdout=_asyncio.subprocess.PIPE, stderr=_asyncio.subprocess.PIPE,
|
|
)
|
|
out, err = await _asyncio.wait_for(proc.communicate(), timeout=timeout)
|
|
return proc.returncode or 0, out, err
|
|
|
|
|
|
# /operations polls every 5s; the usage endpoint 429s if hit that often (it's
|
|
# meant for a status bar, not a poll loop). Cache the last good payload and only
|
|
# re-fetch when older than this — Anthropic sees ~1 req/min regardless of how
|
|
# many dashboards poll. The 5-hour window moves slowly, so 60s is plenty fresh.
|
|
_USAGE_TTL_SEC = 60.0
|
|
_usage_cache: dict = {"ts": 0.0, "data": None}
|
|
|
|
|
|
async def usage_status(request: web.Request) -> web.Response:
|
|
"""Proxy the claude.ai subscription usage % (host-only — needs the local
|
|
OAuth token), cached for _USAGE_TTL_SEC. On a fetch failure (e.g. the
|
|
endpoint's own 429) serve the last good payload if we have one, so a
|
|
transient limit doesn't blank the dashboard.
|
|
|
|
The raw OAuth read is the SHARED single source of truth
|
|
(legal_mcp.services.usage_limits.subscription_usage) — the SAME reader the
|
|
halacha drain + supervisor gate on (G1/G2; no triplicated endpoint/creds/UA
|
|
constants). It's synchronous urllib, so run it in a thread to keep the aiohttp
|
|
event loop responsive."""
|
|
now = time.monotonic()
|
|
if _usage_cache["data"] is not None and (now - _usage_cache["ts"]) < _USAGE_TTL_SEC:
|
|
return web.json_response(_usage_cache["data"])
|
|
|
|
import asyncio as _asyncio
|
|
# subscription_usage returns None on ANY failure (creds missing / endpoint
|
|
# 429 / network) — it never throws; serve stale if we have it.
|
|
data = await _asyncio.get_event_loop().run_in_executor(
|
|
None, usage_limits.subscription_usage)
|
|
if data is None:
|
|
if _usage_cache["data"] is not None:
|
|
return web.json_response(_usage_cache["data"])
|
|
return web.json_response({"error": "usage unavailable"}, status=502)
|
|
|
|
_usage_cache["ts"] = now
|
|
_usage_cache["data"] = data
|
|
return web.json_response(data)
|
|
|
|
|
|
async def pm2_status(request: web.Request) -> web.Response:
|
|
"""Return a trimmed ``pm2 jlist`` for the legal-ai background services."""
|
|
try:
|
|
rc, out, err = await _pm2_run("jlist")
|
|
if rc != 0:
|
|
return web.json_response(
|
|
{"error": f"pm2 jlist failed: {err.decode('utf-8','replace')[:200]}"},
|
|
status=502,
|
|
)
|
|
apps = json.loads(out.decode("utf-8", "replace"))
|
|
except FileNotFoundError:
|
|
return web.json_response({"error": "pm2 not found on PATH"}, status=502)
|
|
except Exception as e: # never throw
|
|
return web.json_response({"error": f"pm2 error: {e}"}, status=502)
|
|
|
|
services = [
|
|
_trim_service(a) for a in apps
|
|
if any(str(a.get("name", "")).startswith(p) for p in _PM2_PREFIXES)
|
|
]
|
|
services.sort(key=lambda s: s["name"])
|
|
return web.json_response({"services": services})
|
|
|
|
|
|
# Process control (restart/stop/start) for the dashboard's "Windows-services"
|
|
# panel. Mutating, so it requires the Bearer secret (unlike read-only /pm2).
|
|
# Whitelisted to ``legal-`` names only — never paperclip or arbitrary processes.
|
|
_PM2_ACTIONS = {"restart", "stop", "start"}
|
|
|
|
# Our own pm2 process name. Restarting/stopping ourselves kills this process
|
|
# mid-reply, so those self-actions are detached (see pm2_control).
|
|
_OWN_PM2_NAME = os.environ.get("COURT_FETCH_SERVICE_PM2_NAME", "legal-court-fetch-service")
|
|
|
|
|
|
async def pm2_control(request: web.Request) -> web.Response:
|
|
"""Run ``pm2 <action> <name>`` for a whitelisted legal-* process."""
|
|
unauth = _check_bearer(request)
|
|
if unauth is not None:
|
|
return unauth
|
|
try:
|
|
body = await request.json()
|
|
except json.JSONDecodeError:
|
|
return web.json_response({"error": "invalid JSON body"}, status=400)
|
|
|
|
name = str(body.get("name", "")).strip()
|
|
action = str(body.get("action", "")).strip()
|
|
if action not in _PM2_ACTIONS:
|
|
return web.json_response(
|
|
{"error": f"action must be one of {sorted(_PM2_ACTIONS)}"}, status=400
|
|
)
|
|
if not name.startswith("legal-"):
|
|
return web.json_response(
|
|
{"error": "name must be a legal-* process"}, status=403
|
|
)
|
|
|
|
# Self restart/stop kills this process before it can reply (client sees a
|
|
# dropped connection / 502) even though pm2 does perform the action. Detach
|
|
# it with a brief delay so the HTTP response flushes first, then report
|
|
# success optimistically.
|
|
if name == _OWN_PM2_NAME and action in ("restart", "stop"):
|
|
import asyncio as _asyncio
|
|
|
|
await _asyncio.create_subprocess_shell(f"sleep 1; pm2 {action} {name} --silent")
|
|
return web.json_response(
|
|
{"ok": True, "action": action, "deferred": True, "service": None}
|
|
)
|
|
|
|
try:
|
|
rc, out, err = await _pm2_run(action, name, "--silent", timeout=30)
|
|
if rc != 0:
|
|
return web.json_response(
|
|
{"ok": False,
|
|
"error": f"pm2 {action} {name} failed: "
|
|
f"{err.decode('utf-8','replace')[:200]}"},
|
|
status=502,
|
|
)
|
|
# Re-read just this process so the UI settles on the real new state.
|
|
rc2, out2, _ = await _pm2_run("jlist")
|
|
svc = None
|
|
if rc2 == 0:
|
|
for a in json.loads(out2.decode("utf-8", "replace")):
|
|
if a.get("name") == name:
|
|
svc = _trim_service(a)
|
|
break
|
|
return web.json_response({"ok": True, "action": action, "service": svc})
|
|
except FileNotFoundError:
|
|
return web.json_response({"error": "pm2 not found on PATH"}, status=502)
|
|
except Exception as e: # never throw
|
|
return web.json_response({"ok": False, "error": f"pm2 error: {e}"}, status=502)
|
|
|
|
|
|
def _check_bearer(request: web.Request) -> web.Response | None:
|
|
auth = request.headers.get("Authorization", "")
|
|
expected = "Bearer " + _SHARED_SECRET
|
|
if not auth or not hmac.compare_digest(auth, expected):
|
|
return web.json_response(
|
|
{"error": "unauthorized: missing or invalid Bearer token"}, status=401
|
|
)
|
|
return None
|
|
|
|
|
|
async def fetch(request: web.Request) -> web.Response:
|
|
unauth = _check_bearer(request)
|
|
if unauth is not None:
|
|
return unauth
|
|
try:
|
|
body = await request.json()
|
|
except json.JSONDecodeError:
|
|
return web.json_response({"error": "invalid JSON body"}, status=400)
|
|
|
|
required = ("file_number", "month", "year")
|
|
if not all(body.get(k) for k in required):
|
|
return web.json_response(
|
|
{"ok": False, "reason": f"missing one of {required}"}, status=400
|
|
)
|
|
|
|
try:
|
|
result = await camofox_client.fetch_admin_verdict(
|
|
file_number=str(body["file_number"]),
|
|
month=str(body["month"]),
|
|
year=str(body["year"]),
|
|
case_number=str(body.get("case_number", "")),
|
|
court=str(body.get("court", "")),
|
|
)
|
|
return web.json_response({
|
|
"ok": True,
|
|
"content_b64": base64.b64encode(result["content"]).decode("ascii"),
|
|
"filename": result.get("filename", ""),
|
|
"source_url": result.get("source_url", ""),
|
|
"court": result.get("court", ""),
|
|
})
|
|
except (camofox_client.CamofoxUnavailable, camofox_client.NgcsFlowError) as e:
|
|
# Expected, recoverable failure → orchestrator escalates (INV-CF3).
|
|
return web.json_response({"ok": False, "reason": str(e)}, status=200)
|
|
except Exception as e: # noqa: BLE001
|
|
logger.exception("fetch failed")
|
|
return web.json_response({"ok": False, "reason": f"unexpected: {e}"}, status=200)
|
|
|
|
|
|
async def plan_fetch(request: web.Request) -> web.Response:
|
|
"""Fetch one תב"ע's identity + validity from mavat (מנהל התכנון).
|
|
|
|
Body ``{plan_number}`` → ``{ok, plan: {...}, reason}``. Same Bearer + bind as
|
|
/fetch. The browser work (Camoufox over Xvfb past F5 ASM) lives in
|
|
``mavat_client``; expected failures (not found / blocked) come back ok=false
|
|
at HTTP 200 so the caller renders a reason rather than treating it as a 5xx.
|
|
"""
|
|
unauth = _check_bearer(request)
|
|
if unauth is not None:
|
|
return unauth
|
|
try:
|
|
body = await request.json()
|
|
except json.JSONDecodeError:
|
|
return web.json_response({"error": "invalid JSON body"}, status=400)
|
|
|
|
plan_number = str(body.get("plan_number", "")).strip()
|
|
if not plan_number:
|
|
return web.json_response({"ok": False, "reason": "missing plan_number"}, status=400)
|
|
|
|
try:
|
|
plan = await mavat_client.fetch_plan(plan_number)
|
|
return web.json_response({"ok": True, "plan": plan})
|
|
except (mavat_client.MavatUnavailable, mavat_client.MavatFlowError) as e:
|
|
# Expected, recoverable (browser unavailable / plan not found / blocked).
|
|
return web.json_response({"ok": False, "reason": str(e)}, status=200)
|
|
except Exception as e: # noqa: BLE001
|
|
logger.exception("plan_fetch failed")
|
|
return web.json_response({"ok": False, "reason": f"unexpected: {e}"}, status=200)
|
|
|
|
|
|
# ─── adapter-migration: host-side runner for scripts/migrate_agent_adapter.py ───
|
|
# The legal-ai container can't perform the migration itself (it needs the host
|
|
# filesystem — generated instruction copies, the gemini settings file — plus the
|
|
# embedded board DB), so the dashboard proxies the action here. Mutating, so it
|
|
# requires the Bearer secret like /pm2/control. We launch exactly one fixed,
|
|
# in-repo script with create_subprocess_exec (no shell) and an action allowlist;
|
|
# every other argument is passed through opaque and validated by the script
|
|
# itself. Kept deliberately symbol-light so this host bridge stays generic.
|
|
_MIGRATE_SCRIPT = "/home/chaim/legal-ai/scripts/migrate_agent_adapter.py"
|
|
_MIGRATE_PYTHON = "/home/chaim/legal-ai/mcp-server/.venv/bin/python"
|
|
_MIGRATE_ACTIONS = {"check", "apply", "revert", "verify"}
|
|
|
|
|
|
async def adapter_migration(request: web.Request) -> web.Response:
|
|
"""Run scripts/migrate_agent_adapter.py on the host and relay its result."""
|
|
unauth = _check_bearer(request)
|
|
if unauth is not None:
|
|
return unauth
|
|
try:
|
|
body = await request.json()
|
|
except json.JSONDecodeError:
|
|
return web.json_response({"error": "invalid JSON body"}, status=400)
|
|
|
|
action = str(body.get("action", "")).strip()
|
|
if action not in _MIGRATE_ACTIONS:
|
|
return web.json_response(
|
|
{"error": f"action must be one of {sorted(_MIGRATE_ACTIONS)}"}, status=400
|
|
)
|
|
|
|
argv = [_MIGRATE_PYTHON, _MIGRATE_SCRIPT, f"--{action}"]
|
|
agent = str(body.get("agent", "")).strip()
|
|
target = str(body.get("to", "")).strip()
|
|
model = str(body.get("model", "")).strip()
|
|
if action in ("check", "apply", "revert"):
|
|
if not agent:
|
|
return web.json_response({"error": "agent required"}, status=400)
|
|
argv += ["--agent", agent]
|
|
if action in ("check", "apply"):
|
|
if not target:
|
|
return web.json_response({"error": "to (target) required"}, status=400)
|
|
argv += ["--to", target]
|
|
if model:
|
|
argv += ["--model", model]
|
|
if bool(body.get("relax_tools")):
|
|
argv += ["--relax-tools"]
|
|
|
|
import asyncio as _asyncio
|
|
|
|
env = {**os.environ, "HOME": "/home/chaim"}
|
|
try:
|
|
proc = await _asyncio.create_subprocess_exec(
|
|
*argv, cwd="/home/chaim/legal-ai", env=env,
|
|
stdout=_asyncio.subprocess.PIPE, stderr=_asyncio.subprocess.PIPE,
|
|
)
|
|
out, err = await _asyncio.wait_for(proc.communicate(), timeout=180)
|
|
except _asyncio.TimeoutError:
|
|
return web.json_response({"ok": False, "error": "migration timed out"}, status=504)
|
|
except Exception as e: # never throw — relay the failure
|
|
return web.json_response({"ok": False, "error": f"launch failed: {e}"}, status=502)
|
|
|
|
# 200 regardless of exit code: a non-zero --check (preflight refusal) is an
|
|
# informative result the caller renders, not a transport error.
|
|
return web.json_response({
|
|
"ok": (proc.returncode == 0),
|
|
"exit_code": proc.returncode,
|
|
"stdout": out.decode("utf-8", "replace"),
|
|
"stderr": err.decode("utf-8", "replace"),
|
|
})
|
|
|
|
|
|
# ─── run-script: host-side runner for read-only/audit scripts (#4) ─────────────
|
|
# Same shape as /adapter-migration but for the SCRIPT_RUN_ALLOWLIST — a fixed set
|
|
# of read-only scripts each with a hard-coded safe argv. The request body's only
|
|
# meaningful field is ``name``; arguments are NEVER taken from the caller (so no
|
|
# --apply/--force injection). Allowlist enforcement lives here, on the host.
|
|
async def run_script(request: web.Request) -> web.Response:
|
|
"""Run an allowlisted read-only script on the host and relay its result."""
|
|
unauth = _check_bearer(request)
|
|
if unauth is not None:
|
|
return unauth
|
|
try:
|
|
body = await request.json()
|
|
except json.JSONDecodeError:
|
|
return web.json_response({"error": "invalid JSON body"}, status=400)
|
|
|
|
name = str(body.get("name", "")).strip()
|
|
argv = script_runner.build_argv(name)
|
|
if argv is None:
|
|
return web.json_response(
|
|
{"ok": False, "error": f"script not runnable (not in allowlist): {name!r}"},
|
|
status=403,
|
|
)
|
|
|
|
import asyncio as _asyncio
|
|
|
|
env = {**os.environ, "HOME": "/home/chaim"}
|
|
try:
|
|
proc = await _asyncio.create_subprocess_exec(
|
|
*argv, cwd="/home/chaim/legal-ai", env=env,
|
|
stdout=_asyncio.subprocess.PIPE, stderr=_asyncio.subprocess.PIPE,
|
|
)
|
|
out, err = await _asyncio.wait_for(proc.communicate(), timeout=600)
|
|
except _asyncio.TimeoutError:
|
|
return web.json_response({"ok": False, "error": "script timed out"}, status=504)
|
|
except Exception as e: # never throw — relay the failure
|
|
return web.json_response({"ok": False, "error": f"launch failed: {e}"}, status=502)
|
|
|
|
# best-effort audit trail — one line per run
|
|
try:
|
|
os.makedirs("/home/chaim/legal-ai/data/logs", exist_ok=True)
|
|
stamp = time.strftime("%Y-%m-%dT%H:%M:%S%z")
|
|
with open("/home/chaim/legal-ai/data/logs/script-runs.log", "a") as fh:
|
|
fh.write(f"{stamp}\t{name}\texit={proc.returncode}\n")
|
|
except Exception:
|
|
pass
|
|
|
|
# 200 regardless of exit code — a non-zero audit result is informative output
|
|
# the caller renders, not a transport error.
|
|
return web.json_response({
|
|
"ok": (proc.returncode == 0),
|
|
"exit_code": proc.returncode,
|
|
"stdout": out.decode("utf-8", "replace"),
|
|
"stderr": err.decode("utf-8", "replace"),
|
|
})
|
|
|
|
|
|
def build_app() -> web.Application:
|
|
app = web.Application(client_max_size=64 * 1024 * 1024)
|
|
app.router.add_get("/health", health)
|
|
app.router.add_get("/pm2", pm2_status)
|
|
app.router.add_get("/usage", usage_status)
|
|
app.router.add_post("/pm2/control", pm2_control)
|
|
app.router.add_post("/fetch", fetch)
|
|
app.router.add_post("/plan-fetch", plan_fetch)
|
|
app.router.add_post("/adapter-migration", adapter_migration)
|
|
app.router.add_post("/run-script", run_script)
|
|
return app
|
|
|
|
|
|
def main() -> int:
|
|
parser = argparse.ArgumentParser(description="legal-court-fetch-service")
|
|
parser.add_argument("--port", type=int, default=8771)
|
|
parser.add_argument("--host", default="10.0.1.1",
|
|
help="bind address; default = docker0 bridge gateway")
|
|
parser.add_argument("--log-level", default="INFO")
|
|
args = parser.parse_args()
|
|
|
|
logging.basicConfig(level=args.log_level.upper(),
|
|
format="%(asctime)s %(name)s %(levelname)s %(message)s")
|
|
|
|
secret = os.environ.get("COURT_FETCH_SHARED_SECRET", "").strip()
|
|
if not secret:
|
|
logger.error(
|
|
"COURT_FETCH_SHARED_SECRET is empty; refusing to start. Set it in "
|
|
"/home/chaim/.legal-court-fetch-service.env (loaded by pm2) and "
|
|
"mirror it as a Coolify env var on the legal-ai app."
|
|
)
|
|
return 2
|
|
if len(secret) < 24:
|
|
logger.error("COURT_FETCH_SHARED_SECRET too short (>=32 chars expected).")
|
|
return 2
|
|
global _SHARED_SECRET
|
|
_SHARED_SECRET = secret
|
|
|
|
app = build_app()
|
|
logger.info("legal-court-fetch-service listening on %s:%d", args.host, args.port)
|
|
web.run_app(app, host=args.host, port=args.port, print=lambda _m: None)
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|