feat(plans): העשרה-אוטומטית של תוקף-תב"ע מ-mavat בחילוץ (Phase C טריגר 2)
כש-extract_plans מוצא מספר-תכנית עם תוקף-חסר (תאריך-רשומות / י"פ),
upsert_candidates ממלא את החוסר מ-מנהל-התכנון לפני ה-upsert. הרשומה
עדיין נכנסת pending_review — ההעשרה משנה את המועמד, לא את שער-היו"ר.
שמרני בכוונה:
- ממלא רק שדות-חסרים — לא דורס ערכים מעוגני-תיק (display_name/purpose
מהחילוץ נשמרים).
- מגודר לפורמט-mavat מודרני (\d{2,4}-\d{6,8}); מספרים-ישנים (מי/820,
תמ"א 38) מדולגים (לא יבזבזו השקת-דפדפן).
- תקרה PLAN_ENRICH_MAX_PER_CALL=8 (מתועד אם נחצה — בלי silent-cap).
- fail-soft: גשר-למטה / לא-נמצא / חסום → המועמד נשאר כפי-שחולץ (לוג,
לא בליעה שקטה).
- דגל-כיבוי PLAN_ENRICH_FROM_MAVAT=0.
- מקור-ההעשרה מסומן ב-model_used="claude_local+mavat".
INV-AH: ערך-תוקף שנמשך נושא מקור (mavat); שדה-חסר נשאר ריק. G10: שער-
היו"ר נשמר. G2: מרחיב את plans_fetch (#292), לא מסלול מקביל.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -13,6 +13,8 @@ claude_session המקומי בלבד (כמו שאר המחלצים) — לא Ant
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
|
|
||||||
from legal_mcp.services import claude_session, db
|
from legal_mcp.services import claude_session, db
|
||||||
@@ -23,6 +25,18 @@ logger = logging.getLogger(__name__)
|
|||||||
# not a pinned model id — the session model is whatever is configured).
|
# not a pinned model id — the session model is whatever is configured).
|
||||||
MODEL_TAG = "claude_local"
|
MODEL_TAG = "claude_local"
|
||||||
|
|
||||||
|
# ── mavat auto-enrichment (Phase C trigger 2) ──────────────────────────────────
|
||||||
|
# When an extracted candidate is missing its validity (gazette_date / yalkut), we
|
||||||
|
# fill the gaps from the official source (mavat) via the host bridge. Conservative
|
||||||
|
# by design: only modern numeric plan numbers resolve on mavat search, each fetch
|
||||||
|
# drives a real browser (~30-60s, serial), so we gate by format + cap per call and
|
||||||
|
# fail soft. Set PLAN_ENRICH_FROM_MAVAT=0 to disable.
|
||||||
|
_ENRICH_ENABLED = os.environ.get("PLAN_ENRICH_FROM_MAVAT", "1").strip() not in ("0", "false", "")
|
||||||
|
_ENRICH_MAX_PER_CALL = int(os.environ.get("PLAN_ENRICH_MAX_PER_CALL", "8"))
|
||||||
|
# mavat search resolves the modern "NN-NNNNNNN" identifiers; legacy forms
|
||||||
|
# (מי/820, 5166/ב, תמ"א 38) don't, so don't waste a browser launch on them.
|
||||||
|
_MAVAT_NUM_RE = re.compile(r"^\d{2,4}-\d{6,8}$")
|
||||||
|
|
||||||
|
|
||||||
EXTRACT_PLANS_PROMPT = """אתה מחלץ מידע עובדתי על תכניות בניין-עיר (תב"ע) עבור מרשם-תכניות של ועדת ערר.
|
EXTRACT_PLANS_PROMPT = """אתה מחלץ מידע עובדתי על תכניות בניין-עיר (תב"ע) עבור מרשם-תכניות של ועדת ערר.
|
||||||
|
|
||||||
@@ -112,16 +126,69 @@ async def extract_plans_from_text(text: str) -> list[dict]:
|
|||||||
return candidates
|
return candidates
|
||||||
|
|
||||||
|
|
||||||
|
def _needs_enrichment(c: dict) -> bool:
|
||||||
|
"""A candidate is worth enriching iff its validity is incomplete AND its
|
||||||
|
number is a mavat-resolvable modern identifier."""
|
||||||
|
if not (_ENRICH_ENABLED and _MAVAT_NUM_RE.match((c.get("plan_number") or "").strip())):
|
||||||
|
return False
|
||||||
|
return not (c.get("gazette_date") and c.get("yalkut_number"))
|
||||||
|
|
||||||
|
|
||||||
|
async def _enrich_from_mavat(c: dict) -> tuple[dict, bool]:
|
||||||
|
"""Fill a candidate's MISSING fields from mavat (never override case-grounded
|
||||||
|
values). Returns (candidate, enriched?). Fails soft — a bridge-down / not-found
|
||||||
|
/ blocked fetch leaves the candidate untouched (logged, never swallowed)."""
|
||||||
|
from legal_mcp.services import plans_fetch
|
||||||
|
|
||||||
|
num = c["plan_number"].strip()
|
||||||
|
try:
|
||||||
|
fetched = await plans_fetch.fetch_plan(num)
|
||||||
|
except plans_fetch.PlanFetchUnavailable as e:
|
||||||
|
logger.info("plan-enrich: bridge unavailable for %s — %s", num, e)
|
||||||
|
return c, False
|
||||||
|
except plans_fetch.PlanFetchError as e:
|
||||||
|
logger.info("plan-enrich: mavat had no usable result for %s — %s", num, e)
|
||||||
|
return c, False
|
||||||
|
except Exception as e: # noqa: BLE001 — never let enrichment break extraction
|
||||||
|
logger.warning("plan-enrich: unexpected error for %s — %s", num, e)
|
||||||
|
return c, False
|
||||||
|
|
||||||
|
enriched = dict(c)
|
||||||
|
filled: list[str] = []
|
||||||
|
for f in ("gazette_date", "yalkut_number", "display_name", "plan_type", "purpose"):
|
||||||
|
if not enriched.get(f) and fetched.get(f):
|
||||||
|
enriched[f] = fetched[f]
|
||||||
|
filled.append(f)
|
||||||
|
if filled:
|
||||||
|
logger.info("plan-enrich: %s filled %s from mavat (%s)",
|
||||||
|
num, ",".join(filled), fetched.get("source_url", ""))
|
||||||
|
return enriched, True
|
||||||
|
return c, False
|
||||||
|
|
||||||
|
|
||||||
async def upsert_candidates(
|
async def upsert_candidates(
|
||||||
candidates: list[dict],
|
candidates: list[dict],
|
||||||
*,
|
*,
|
||||||
source_case_number: str = "",
|
source_case_number: str = "",
|
||||||
source_document_id: UUID | None = None,
|
source_document_id: UUID | None = None,
|
||||||
model_used: str = MODEL_TAG,
|
model_used: str = MODEL_TAG,
|
||||||
|
enrich: bool = True,
|
||||||
) -> list[dict]:
|
) -> list[dict]:
|
||||||
"""Upsert extracted candidates into the registry as pending_review (G10)."""
|
"""Upsert extracted candidates into the registry as pending_review (G10).
|
||||||
|
|
||||||
|
When ``enrich`` (default) and a candidate's validity is incomplete, its
|
||||||
|
missing fields are pulled from mavat first (capped per call). The row still
|
||||||
|
enters pending_review — enrichment changes the candidate, not the chair gate.
|
||||||
|
"""
|
||||||
out: list[dict] = []
|
out: list[dict] = []
|
||||||
|
enriched_count = 0
|
||||||
for c in candidates:
|
for c in candidates:
|
||||||
|
used = model_used
|
||||||
|
if enrich and enriched_count < _ENRICH_MAX_PER_CALL and _needs_enrichment(c):
|
||||||
|
c, did = await _enrich_from_mavat(c)
|
||||||
|
if did:
|
||||||
|
enriched_count += 1
|
||||||
|
used = f"{model_used}+mavat"
|
||||||
try:
|
try:
|
||||||
plan = await db.upsert_plan(
|
plan = await db.upsert_plan(
|
||||||
plan_number=c["plan_number"],
|
plan_number=c["plan_number"],
|
||||||
@@ -133,12 +200,18 @@ async def upsert_candidates(
|
|||||||
review_status="pending_review",
|
review_status="pending_review",
|
||||||
source_case_number=source_case_number,
|
source_case_number=source_case_number,
|
||||||
source_document_id=source_document_id,
|
source_document_id=source_document_id,
|
||||||
model_used=model_used,
|
model_used=used,
|
||||||
)
|
)
|
||||||
out.append(plan)
|
out.append(plan)
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
# Don't swallow — surface the bad candidate so it isn't silently dropped.
|
# Don't swallow — surface the bad candidate so it isn't silently dropped.
|
||||||
logger.warning("upsert_candidates: skipped %r — %s", c.get("plan_number"), e)
|
logger.warning("upsert_candidates: skipped %r — %s", c.get("plan_number"), e)
|
||||||
|
if enrich and enriched_count >= _ENRICH_MAX_PER_CALL:
|
||||||
|
logger.warning(
|
||||||
|
"plan-enrich: hit the per-call cap (%d) — remaining candidates kept "
|
||||||
|
"as-extracted (no silent truncation; raise PLAN_ENRICH_MAX_PER_CALL).",
|
||||||
|
_ENRICH_MAX_PER_CALL,
|
||||||
|
)
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user