feat(learning): מטא-דאטה מלא להחלטות-פנימיות בקליטה + חילוץ-הלכות אוטומטי
סוגר את הפער שעלה על בל"מ 8126: החלטה שנכנסה לספריית-הפסיקה הופיעה ללא מטא-דאטה (summary/citation/date ריקים, proceeding_type שגוי) כי מחלץ-ה-Gemini מיועד לפסיקה חיצונית ומחזיר no_metadata לפנימיות, והחילוץ-הלכות נשאר pending. web/app.py — `_enroll_final_in_library` עכשיו ממלא **דטרמיניסטית** מהתיק (בלי LLM): - proceeding_type (מהתיק — בל"מ/ערר, גם idempotency key נכון מהקליטה הראשונה), decision_date (fallback ל-hearing_date), subject_tags, summary (=subject). - `citation_formatted` נבנה דטרמיניסטית (`_build_internal_citation`): 'ועדת ערר ... בל"מ <num> <עורר> נ' <משיב> (יו"ר עו"ד <chair>)'. scripts/final_halacha_pipeline.py — שלב [0] חדש: `precedent_extract_halachot` על ההחלטה עצמה (idempotent — מדלג כש-completed/dry-run), כך שהלכות-ההחלטה לא נשארות pending. אומת: py_compile ✓ · ה-pipeline רץ dry-run נקי (4 שלבים). 8126 כבר תוקן ידנית; מכאן זה אוטומטי לכל החלטה. Invariants: INV-LRN4/X11 · G1 (נרמול-במקור) · DM7 · feedback_silent_swallow. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
59
web/app.py
59
web/app.py
@@ -3387,6 +3387,33 @@ def _committee_chair_for_case(case: dict, case_number: str) -> str:
|
||||
return COMMITTEE_CHAIR_BY_PREFIX.get(case_number[:1], COMMITTEE_CHAIR_DEFAULT)
|
||||
|
||||
|
||||
def _party_name(parties) -> str:
|
||||
"""First party's display name from a list of {name|party_name} dicts or strings."""
|
||||
if isinstance(parties, list) and parties:
|
||||
p = parties[0]
|
||||
if isinstance(p, dict):
|
||||
return (p.get("name") or p.get("party_name") or "").strip()
|
||||
return str(p).strip()
|
||||
return ""
|
||||
|
||||
|
||||
def _build_internal_citation(
|
||||
case: dict, case_number: str, chair_name: str, proceeding_type: str, district: str,
|
||||
) -> str:
|
||||
"""Deterministic uniform citation for OUR committee decisions — the Gemini metadata
|
||||
extractor targets external rulings and returns nothing for internal ones, so we build
|
||||
it from the case record instead. E.g.:
|
||||
'ועדת ערר מחוזית לתכנון ובניה ירושלים, בל"מ 8126-03-25 פלוני נ' הוועדה המקומית (יו"ר עו"ד דפנה תמיר)'."""
|
||||
appellant = _party_name(case.get("appellants"))
|
||||
respondent = _party_name(case.get("respondents"))
|
||||
parties = f" {appellant} נ' {respondent}" if (appellant and respondent) else (
|
||||
f" {appellant}" if appellant else "")
|
||||
proc = proceeding_type or "ערר"
|
||||
chair_clause = f' (יו"ר עו"ד {chair_name})' if chair_name else ""
|
||||
return (f"ועדת ערר מחוזית לתכנון ובניה {district}, {proc} {case_number}"
|
||||
f"{parties}{chair_clause}").strip()
|
||||
|
||||
|
||||
async def _enroll_final_in_library(
|
||||
case: dict, case_number: str, final_text: str, chair_name: str,
|
||||
) -> dict:
|
||||
@@ -3404,12 +3431,23 @@ async def _enroll_final_in_library(
|
||||
if not final_text.strip():
|
||||
out["error"] = "no final text extracted"
|
||||
return out
|
||||
|
||||
# Deterministic metadata from the case record — the Gemini metadata extractor is
|
||||
# tuned for EXTERNAL rulings and returns no_metadata for internal decisions, so we
|
||||
# populate proceeding_type / date / tags / summary / citation ourselves (no LLM).
|
||||
district = "ירושלים"
|
||||
proceeding_type = (case.get("proceeding_type") or "ערר").strip()
|
||||
decision_date = case.get("decision_date") or case.get("hearing_date")
|
||||
subject_tags = case.get("subject_categories") or []
|
||||
summary = (case.get("subject") or case.get("title") or "").strip()
|
||||
try:
|
||||
res = await int_svc.ingest_internal_decision(
|
||||
case_number=case_number, case_name=case.get("title", ""),
|
||||
decision_date=case.get("decision_date"), chair_name=chair_name,
|
||||
district="ירושלים", practice_area=case.get("practice_area", ""),
|
||||
appeal_subtype=case.get("appeal_subtype", ""), text=final_text,
|
||||
decision_date=decision_date, chair_name=chair_name,
|
||||
district=district, practice_area=case.get("practice_area", ""),
|
||||
appeal_subtype=case.get("appeal_subtype", ""),
|
||||
proceeding_type=proceeding_type, subject_tags=subject_tags,
|
||||
summary=summary, text=final_text,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("library enrollment failed for %s: %s", case_number, e)
|
||||
@@ -3421,6 +3459,21 @@ async def _enroll_final_in_library(
|
||||
if not case_law_id:
|
||||
return out
|
||||
|
||||
# Build the uniform citation deterministically (Gemini won't, for internal rows).
|
||||
try:
|
||||
citation = _build_internal_citation(
|
||||
case, case_number, chair_name, proceeding_type, district)
|
||||
pool = await db.get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
await conn.execute(
|
||||
"UPDATE case_law SET citation_formatted = $2 "
|
||||
"WHERE id = $1 AND COALESCE(citation_formatted, '') = ''",
|
||||
UUID(case_law_id), citation,
|
||||
)
|
||||
out["citation"] = citation
|
||||
except Exception as e:
|
||||
logger.warning("citation build failed for %s: %s", case_number, e)
|
||||
|
||||
# The precedents this decision cites → link to the library; flag the ones not found.
|
||||
try:
|
||||
await cit_tools.extract_internal_citations(case_law_id=case_law_id, limit=0)
|
||||
|
||||
Reference in New Issue
Block a user