feat(X13): auto-fetch court verdicts from נט המשפט → corpus (Tier 0 + scaffold)
תת-מערכת אחזור-פסיקה אוטומטי: כשיומון מצביע על פס"ד בית-משפט, מסווגים את הערכאה, מורידים מהמקור הציבורי המתאים, וקולטים דרך צינור-הקליטה הקנוני. - spec-first: docs/spec/X13-court-fetch.md (INV-CF1..CF7) + אינדקס - מסווג court_citation.py (supreme/admin/skip) + 10 בדיקות (עת"מ 46111-12-22 → admin) - Tier 0: court_fetch_supreme.py — supremedecisions API (reverse-engineered), httpx + browser-headers (אומת 200) + politeness - תור court_fetch_jobs (SCHEMA_V30) + DB helpers + court_fetch_orchestrator.py - Tier 1 scaffold: legal-court-fetch-service (aiohttp+Bearer, מראת legal-chat-service) + camofox_client (Camoufox open-source) + recaptcha_audio (Whisper מקומי) + pm2 - Tier 2 fallback חינני: manual + missing_precedent (INV-CF2/CF3 — אין drop שקט) - כלי-MCP court_verdict_fetch / court_fetch_status; SCRIPTS.md Invariants: מקיים G2 (מסלול-קליטה יחיד, INV-CF1) · G3/G1 (idempotent+נרמול, INV-CF5) · G4/§6 (אין בליעה שקטה, INV-CF2) · G10 (שער-אנושי, INV-CF3) · G5 (source_type, INV-CF6) · G9 (provenance+audit, INV-CF7). מקורות INV-CF4: RFC 9309 · Google crawler · OWASP OAT. Follow-ups (טרם אומתו חי): live Tier-0 validation · התקנת camofox-browser+whisper · כיול selectors Tier-1 · COURT_FETCH_SHARED_SECRET (Infisical+Coolify) · טריגר מ-digest try_autolink (worktree-digests-radar). V30 עלול להתנגש עם digests-radar. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
145
mcp-server/src/legal_mcp/court_fetch_service/server.py
Normal file
145
mcp-server/src/legal_mcp/court_fetch_service/server.py
Normal file
@@ -0,0 +1,145 @@
|
||||
"""Host-side HTTP bridge for Tier-1 verdict fetching (X13).
|
||||
|
||||
Mirrors ``legal_mcp.chat_service.server`` — the proven host-side pattern: an
|
||||
aiohttp app, bound to the docker bridge gateway, Bearer-auth, that does the one
|
||||
thing the container can't (here: drive a real browser against נט המשפט).
|
||||
|
||||
Endpoints:
|
||||
POST /fetch body {file_number, month, year, case_number, court}
|
||||
→ {ok, content_b64, filename, source_url, court, reason}
|
||||
REQUIRES Authorization: Bearer <COURT_FETCH_SHARED_SECRET>.
|
||||
GET /health liveness (no auth); reports camofox + VNC URL if available.
|
||||
|
||||
Run with pm2:
|
||||
pm2 start scripts/legal-court-fetch-service.config.cjs
|
||||
|
||||
Security posture (identical rationale to legal-chat-service):
|
||||
1. Bind defaults to ``10.0.1.1`` (docker0 bridge gateway) — reachable from
|
||||
the host + containers on docker bridges, invisible to outside networks.
|
||||
2. ``/fetch`` requires a Bearer token (constant-time compare); the service
|
||||
refuses to start without ``COURT_FETCH_SHARED_SECRET`` set.
|
||||
3. ``/health`` is unauthenticated and spawns nothing.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import base64
|
||||
import hmac
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
|
||||
from aiohttp import web
|
||||
|
||||
_pkg_root = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
|
||||
if _pkg_root not in sys.path:
|
||||
sys.path.insert(0, _pkg_root)
|
||||
|
||||
from legal_mcp.court_fetch_service import camofox_client # noqa: E402
|
||||
|
||||
logger = logging.getLogger("legal_court_fetch_service")
|
||||
|
||||
_SHARED_SECRET: str = ""
|
||||
|
||||
|
||||
async def health(request: web.Request) -> web.Response:
|
||||
info = {"ok": True, "service": "legal-court-fetch-service",
|
||||
"camofox_enabled": camofox_client.is_enabled()}
|
||||
if camofox_client.is_enabled():
|
||||
try:
|
||||
info["camofox"] = await camofox_client.health()
|
||||
except Exception as e: # health must never throw
|
||||
info["camofox_error"] = str(e)
|
||||
return web.json_response(info)
|
||||
|
||||
|
||||
def _check_bearer(request: web.Request) -> web.Response | None:
|
||||
auth = request.headers.get("Authorization", "")
|
||||
expected = "Bearer " + _SHARED_SECRET
|
||||
if not auth or not hmac.compare_digest(auth, expected):
|
||||
return web.json_response(
|
||||
{"error": "unauthorized: missing or invalid Bearer token"}, status=401
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
async def fetch(request: web.Request) -> web.Response:
|
||||
unauth = _check_bearer(request)
|
||||
if unauth is not None:
|
||||
return unauth
|
||||
try:
|
||||
body = await request.json()
|
||||
except json.JSONDecodeError:
|
||||
return web.json_response({"error": "invalid JSON body"}, status=400)
|
||||
|
||||
required = ("file_number", "month", "year")
|
||||
if not all(body.get(k) for k in required):
|
||||
return web.json_response(
|
||||
{"ok": False, "reason": f"missing one of {required}"}, status=400
|
||||
)
|
||||
|
||||
try:
|
||||
result = await camofox_client.fetch_admin_verdict(
|
||||
file_number=str(body["file_number"]),
|
||||
month=str(body["month"]),
|
||||
year=str(body["year"]),
|
||||
case_number=str(body.get("case_number", "")),
|
||||
court=str(body.get("court", "")),
|
||||
)
|
||||
return web.json_response({
|
||||
"ok": True,
|
||||
"content_b64": base64.b64encode(result["content"]).decode("ascii"),
|
||||
"filename": result.get("filename", ""),
|
||||
"source_url": result.get("source_url", ""),
|
||||
"court": result.get("court", ""),
|
||||
})
|
||||
except (camofox_client.CamofoxUnavailable, camofox_client.NgcsFlowError) as e:
|
||||
# Expected, recoverable failure → orchestrator escalates (INV-CF3).
|
||||
return web.json_response({"ok": False, "reason": str(e)}, status=200)
|
||||
except Exception as e: # noqa: BLE001
|
||||
logger.exception("fetch failed")
|
||||
return web.json_response({"ok": False, "reason": f"unexpected: {e}"}, status=200)
|
||||
|
||||
|
||||
def build_app() -> web.Application:
|
||||
app = web.Application(client_max_size=64 * 1024 * 1024)
|
||||
app.router.add_get("/health", health)
|
||||
app.router.add_post("/fetch", fetch)
|
||||
return app
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description="legal-court-fetch-service")
|
||||
parser.add_argument("--port", type=int, default=8771)
|
||||
parser.add_argument("--host", default="10.0.1.1",
|
||||
help="bind address; default = docker0 bridge gateway")
|
||||
parser.add_argument("--log-level", default="INFO")
|
||||
args = parser.parse_args()
|
||||
|
||||
logging.basicConfig(level=args.log_level.upper(),
|
||||
format="%(asctime)s %(name)s %(levelname)s %(message)s")
|
||||
|
||||
secret = os.environ.get("COURT_FETCH_SHARED_SECRET", "").strip()
|
||||
if not secret:
|
||||
logger.error(
|
||||
"COURT_FETCH_SHARED_SECRET is empty; refusing to start. Set it in "
|
||||
"/home/chaim/.legal-court-fetch-service.env (loaded by pm2) and "
|
||||
"mirror it as a Coolify env var on the legal-ai app."
|
||||
)
|
||||
return 2
|
||||
if len(secret) < 24:
|
||||
logger.error("COURT_FETCH_SHARED_SECRET too short (>=32 chars expected).")
|
||||
return 2
|
||||
global _SHARED_SECRET
|
||||
_SHARED_SECRET = secret
|
||||
|
||||
app = build_app()
|
||||
logger.info("legal-court-fetch-service listening on %s:%d", args.host, args.port)
|
||||
web.run_app(app, host=args.host, port=args.port, print=lambda _m: None)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user