feat(plans): העשרה-אוטומטית של תוקף-תב"ע מ-mavat בחילוץ (Phase C טריגר 2)
כש-extract_plans מוצא מספר-תכנית עם תוקף-חסר (תאריך-רשומות / י"פ),
upsert_candidates ממלא את החוסר מ-מנהל-התכנון לפני ה-upsert. הרשומה
עדיין נכנסת pending_review — ההעשרה משנה את המועמד, לא את שער-היו"ר.
שמרני בכוונה:
- ממלא רק שדות-חסרים — לא דורס ערכים מעוגני-תיק (display_name/purpose
מהחילוץ נשמרים).
- מגודר לפורמט-mavat מודרני (\d{2,4}-\d{6,8}); מספרים-ישנים (מי/820,
תמ"א 38) מדולגים (לא יבזבזו השקת-דפדפן).
- תקרה PLAN_ENRICH_MAX_PER_CALL=8 (מתועד אם נחצה — בלי silent-cap).
- fail-soft: גשר-למטה / לא-נמצא / חסום → המועמד נשאר כפי-שחולץ (לוג,
לא בליעה שקטה).
- דגל-כיבוי PLAN_ENRICH_FROM_MAVAT=0.
- מקור-ההעשרה מסומן ב-model_used="claude_local+mavat".
INV-AH: ערך-תוקף שנמשך נושא מקור (mavat); שדה-חסר נשאר ריק. G10: שער-
היו"ר נשמר. G2: מרחיב את plans_fetch (#292), לא מסלול מקביל.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -13,6 +13,8 @@ claude_session המקומי בלבד (כמו שאר המחלצים) — לא Ant
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from uuid import UUID
|
||||
|
||||
from legal_mcp.services import claude_session, db
|
||||
@@ -23,6 +25,18 @@ logger = logging.getLogger(__name__)
|
||||
# not a pinned model id — the session model is whatever is configured).
|
||||
MODEL_TAG = "claude_local"
|
||||
|
||||
# ── mavat auto-enrichment (Phase C trigger 2) ──────────────────────────────────
|
||||
# When an extracted candidate is missing its validity (gazette_date / yalkut), we
|
||||
# fill the gaps from the official source (mavat) via the host bridge. Conservative
|
||||
# by design: only modern numeric plan numbers resolve on mavat search, each fetch
|
||||
# drives a real browser (~30-60s, serial), so we gate by format + cap per call and
|
||||
# fail soft. Set PLAN_ENRICH_FROM_MAVAT=0 to disable.
|
||||
_ENRICH_ENABLED = os.environ.get("PLAN_ENRICH_FROM_MAVAT", "1").strip() not in ("0", "false", "")
|
||||
_ENRICH_MAX_PER_CALL = int(os.environ.get("PLAN_ENRICH_MAX_PER_CALL", "8"))
|
||||
# mavat search resolves the modern "NN-NNNNNNN" identifiers; legacy forms
|
||||
# (מי/820, 5166/ב, תמ"א 38) don't, so don't waste a browser launch on them.
|
||||
_MAVAT_NUM_RE = re.compile(r"^\d{2,4}-\d{6,8}$")
|
||||
|
||||
|
||||
EXTRACT_PLANS_PROMPT = """אתה מחלץ מידע עובדתי על תכניות בניין-עיר (תב"ע) עבור מרשם-תכניות של ועדת ערר.
|
||||
|
||||
@@ -112,16 +126,69 @@ async def extract_plans_from_text(text: str) -> list[dict]:
|
||||
return candidates
|
||||
|
||||
|
||||
def _needs_enrichment(c: dict) -> bool:
|
||||
"""A candidate is worth enriching iff its validity is incomplete AND its
|
||||
number is a mavat-resolvable modern identifier."""
|
||||
if not (_ENRICH_ENABLED and _MAVAT_NUM_RE.match((c.get("plan_number") or "").strip())):
|
||||
return False
|
||||
return not (c.get("gazette_date") and c.get("yalkut_number"))
|
||||
|
||||
|
||||
async def _enrich_from_mavat(c: dict) -> tuple[dict, bool]:
|
||||
"""Fill a candidate's MISSING fields from mavat (never override case-grounded
|
||||
values). Returns (candidate, enriched?). Fails soft — a bridge-down / not-found
|
||||
/ blocked fetch leaves the candidate untouched (logged, never swallowed)."""
|
||||
from legal_mcp.services import plans_fetch
|
||||
|
||||
num = c["plan_number"].strip()
|
||||
try:
|
||||
fetched = await plans_fetch.fetch_plan(num)
|
||||
except plans_fetch.PlanFetchUnavailable as e:
|
||||
logger.info("plan-enrich: bridge unavailable for %s — %s", num, e)
|
||||
return c, False
|
||||
except plans_fetch.PlanFetchError as e:
|
||||
logger.info("plan-enrich: mavat had no usable result for %s — %s", num, e)
|
||||
return c, False
|
||||
except Exception as e: # noqa: BLE001 — never let enrichment break extraction
|
||||
logger.warning("plan-enrich: unexpected error for %s — %s", num, e)
|
||||
return c, False
|
||||
|
||||
enriched = dict(c)
|
||||
filled: list[str] = []
|
||||
for f in ("gazette_date", "yalkut_number", "display_name", "plan_type", "purpose"):
|
||||
if not enriched.get(f) and fetched.get(f):
|
||||
enriched[f] = fetched[f]
|
||||
filled.append(f)
|
||||
if filled:
|
||||
logger.info("plan-enrich: %s filled %s from mavat (%s)",
|
||||
num, ",".join(filled), fetched.get("source_url", ""))
|
||||
return enriched, True
|
||||
return c, False
|
||||
|
||||
|
||||
async def upsert_candidates(
|
||||
candidates: list[dict],
|
||||
*,
|
||||
source_case_number: str = "",
|
||||
source_document_id: UUID | None = None,
|
||||
model_used: str = MODEL_TAG,
|
||||
enrich: bool = True,
|
||||
) -> list[dict]:
|
||||
"""Upsert extracted candidates into the registry as pending_review (G10)."""
|
||||
"""Upsert extracted candidates into the registry as pending_review (G10).
|
||||
|
||||
When ``enrich`` (default) and a candidate's validity is incomplete, its
|
||||
missing fields are pulled from mavat first (capped per call). The row still
|
||||
enters pending_review — enrichment changes the candidate, not the chair gate.
|
||||
"""
|
||||
out: list[dict] = []
|
||||
enriched_count = 0
|
||||
for c in candidates:
|
||||
used = model_used
|
||||
if enrich and enriched_count < _ENRICH_MAX_PER_CALL and _needs_enrichment(c):
|
||||
c, did = await _enrich_from_mavat(c)
|
||||
if did:
|
||||
enriched_count += 1
|
||||
used = f"{model_used}+mavat"
|
||||
try:
|
||||
plan = await db.upsert_plan(
|
||||
plan_number=c["plan_number"],
|
||||
@@ -133,12 +200,18 @@ async def upsert_candidates(
|
||||
review_status="pending_review",
|
||||
source_case_number=source_case_number,
|
||||
source_document_id=source_document_id,
|
||||
model_used=model_used,
|
||||
model_used=used,
|
||||
)
|
||||
out.append(plan)
|
||||
except ValueError as e:
|
||||
# Don't swallow — surface the bad candidate so it isn't silently dropped.
|
||||
logger.warning("upsert_candidates: skipped %r — %s", c.get("plan_number"), e)
|
||||
if enrich and enriched_count >= _ENRICH_MAX_PER_CALL:
|
||||
logger.warning(
|
||||
"plan-enrich: hit the per-call cap (%d) — remaining candidates kept "
|
||||
"as-extracted (no silent truncation; raise PLAN_ENRICH_MAX_PER_CALL).",
|
||||
_ENRICH_MAX_PER_CALL,
|
||||
)
|
||||
return out
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user