feat(plans): משיכת תב"ע מ-מנהל-התכנון (mavat) — Phase C backend-slice
ליבת-המשיכה למרשם-התכניות (V38): מספר-תכנית → זהות+תוקף מ-mavat דרך גשר-Camoufox הקיים (G2 — אותו שירות/פורט/סוד כמו X13, בלי חדשים). - court_fetch_service/mavat_client.py (חדש): דרייבר Camoufox מול mavat — עוקף F5-ASM (דפדפן-JS), search→auto-nav ל-SV4, לוכד GET /rest/api/SV4/1, מפענח planDetails (E_NAME/AUTH/ENTITY_SUBTYPE/GOALS) + rsInternet (פרסום-לאישור→ED_PUBLICATION_FILE=י"פ + DETAILS→תאריך/עמוד). מלכודת- דרייבר: init-script window.onerror swallow. reCAPTCHA נשאר דלוק (token). - court_fetch_service/server.py: POST /plan-fetch (אותו Bearer). - services/plans_fetch.py (חדש): צד-קונטיינר — httpx לגשר, מנרמל שדות. - tools/plans.py + server.py: כלי-MCP plan_fetch (מועמד, לא כותב). - web/app.py: POST /api/plans/fetch (503 גשר-למטה, 404 לא-נמצא). אומת חי מול mavat: 101-1031020→י"פ 13697 (עמ' 8758, 30/07/2025), 101-1053933→י"פ 13836. מקור-אמת עשיר מתב"ע-עכשיו (שחסר י"פ). INV-AH: כל ערך נושא source_url; שדה-חסר ריק לא מומצא. G10: מחזיר מועמד בלבד — שער-יו"ר (review_status) נשמר. G2: מרחיב גשר+מרשם קיימים. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
95
mcp-server/src/legal_mcp/services/plans_fetch.py
Normal file
95
mcp-server/src/legal_mcp/services/plans_fetch.py
Normal file
@@ -0,0 +1,95 @@
|
||||
"""Pull תב"ע identity + validity from mavat (מנהל התכנון) — container/MCP side.
|
||||
|
||||
The thin container-side half of the mavat plan fetcher. The actual browser work
|
||||
happens on the **host** (`court_fetch_service` + `mavat_client`, Camoufox over
|
||||
Xvfb) because mavat sits behind an F5 BIG-IP ASM bot-wall that only a real
|
||||
JS-executing browser clears — a scripted httpx from the container gets a
|
||||
302→maintenance. This module just calls that host bridge over the docker0
|
||||
loopback (same bridge, secret and bind as X13 court-fetch — G2: no second
|
||||
service/port/secret) and normalises the result into registry fields.
|
||||
|
||||
INV-AH: every pulled value carries `source_url` (the mavat plan page); a field
|
||||
the source doesn't expose (notably yalkut on some plans) comes back empty rather
|
||||
than guessed. The chair still gates the row (review_status) before block-ט cites
|
||||
it — this fetcher never writes the registry, it only returns a candidate dict.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Same host bridge as X13 (pm2 `legal-court-fetch-service`, docker0 gateway). The
|
||||
# container and the host MCP server both reach 10.0.1.1:8771; the secret is the
|
||||
# shared COURT_FETCH_SHARED_SECRET (Coolify env on the container).
|
||||
_SERVICE_URL = os.environ.get("COURT_FETCH_SERVICE_URL", "http://10.0.1.1:8771")
|
||||
_SHARED_SECRET = os.environ.get("COURT_FETCH_SHARED_SECRET", "").strip()
|
||||
# mavat is slow (F5 challenge + SPA hydration + SV4); give the browser room but
|
||||
# stay under the host driver's own hard cap.
|
||||
_TIMEOUT_S = float(os.environ.get("PLAN_FETCH_TIMEOUT_S", "180"))
|
||||
|
||||
# The fields the bridge returns and we surface to the form / upsert.
|
||||
_PLAN_FIELDS = (
|
||||
"plan_number", "display_name", "plan_type", "purpose",
|
||||
"gazette_date", "yalkut_number", "yalkut_page", "source_url",
|
||||
)
|
||||
|
||||
|
||||
class PlanFetchUnavailable(RuntimeError):
|
||||
"""The host browser bridge isn't reachable / not configured."""
|
||||
|
||||
|
||||
class PlanFetchError(RuntimeError):
|
||||
"""mavat was reached but the plan couldn't be fetched/parsed."""
|
||||
|
||||
|
||||
async def fetch_plan(plan_number: str) -> dict:
|
||||
"""Fetch one plan's metadata from mavat via the host bridge.
|
||||
|
||||
Returns a dict with the keys in ``_PLAN_FIELDS`` (missing values empty, never
|
||||
invented). Raises ``PlanFetchUnavailable`` if the bridge is down/unset, or
|
||||
``PlanFetchError`` if mavat was reached but the plan wasn't found/parsed.
|
||||
"""
|
||||
plan_number = (plan_number or "").strip()
|
||||
if not plan_number:
|
||||
raise PlanFetchError("חסר מספר-תכנית")
|
||||
if not _SHARED_SECRET:
|
||||
raise PlanFetchUnavailable(
|
||||
"COURT_FETCH_SHARED_SECRET אינו מוגדר — לא ניתן לפנות לשירות-המשיכה."
|
||||
)
|
||||
|
||||
headers = {"Authorization": f"Bearer {_SHARED_SECRET}"}
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=_TIMEOUT_S) as client:
|
||||
resp = await client.post(
|
||||
f"{_SERVICE_URL}/plan-fetch",
|
||||
json={"plan_number": plan_number},
|
||||
headers=headers,
|
||||
)
|
||||
except httpx.ConnectError as e:
|
||||
raise PlanFetchUnavailable(
|
||||
f"שירות-המשיכה (legal-court-fetch-service) אינו זמין ב-{_SERVICE_URL}: {e}"
|
||||
) from e
|
||||
except httpx.HTTPError as e:
|
||||
raise PlanFetchUnavailable(f"שגיאת-תקשורת לשירות-המשיכה: {e}") from e
|
||||
|
||||
if resp.status_code == 401:
|
||||
raise PlanFetchUnavailable("שירות-המשיכה דחה את הסוד (401) — בדוק drift של COURT_FETCH_SHARED_SECRET.")
|
||||
if resp.status_code != 200:
|
||||
raise PlanFetchError(f"שירות-המשיכה החזיר {resp.status_code}: {resp.text[:200]}")
|
||||
|
||||
body = resp.json()
|
||||
if not body.get("ok"):
|
||||
raise PlanFetchError(body.get("reason") or "התכנית לא נמצאה ב-מנהל-התכנון")
|
||||
|
||||
plan = body.get("plan") or {}
|
||||
# Normalise to exactly our fields; keep source_url mandatory (INV-AH).
|
||||
out = {k: (plan.get(k) or "") for k in _PLAN_FIELDS}
|
||||
out["plan_number"] = out["plan_number"] or plan_number
|
||||
if not out["source_url"]:
|
||||
raise PlanFetchError("התקבלה תכנית ללא source_url — נדחה (INV-AH).")
|
||||
return out
|
||||
Reference in New Issue
Block a user