From 2f439603538260a781f885aa5718804f30db0781 Mon Sep 17 00:00:00 2001 From: Chaim Date: Mon, 8 Jun 2026 15:19:14 +0000 Subject: [PATCH] =?UTF-8?q?feat(learning):=20=D7=9E=D7=98=D7=90-=D7=93?= =?UTF-8?q?=D7=90=D7=98=D7=94=20=D7=9E=D7=9C=D7=90=20=D7=9C=D7=94=D7=97?= =?UTF-8?q?=D7=9C=D7=98=D7=95=D7=AA-=D7=A4=D7=A0=D7=99=D7=9E=D7=99=D7=95?= =?UTF-8?q?=D7=AA=20=D7=91=D7=A7=D7=9C=D7=99=D7=98=D7=94=20+=20=D7=97?= =?UTF-8?q?=D7=99=D7=9C=D7=95=D7=A5-=D7=94=D7=9C=D7=9B=D7=95=D7=AA=20?= =?UTF-8?q?=D7=90=D7=95=D7=98=D7=95=D7=9E=D7=98=D7=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit סוגר את הפער שעלה על בל"מ 8126: החלטה שנכנסה לספריית-הפסיקה הופיעה ללא מטא-דאטה (summary/citation/date ריקים, proceeding_type שגוי) כי מחלץ-ה-Gemini מיועד לפסיקה חיצונית ומחזיר no_metadata לפנימיות, והחילוץ-הלכות נשאר pending. web/app.py — `_enroll_final_in_library` עכשיו ממלא **דטרמיניסטית** מהתיק (בלי LLM): - proceeding_type (מהתיק — בל"מ/ערר, גם idempotency key נכון מהקליטה הראשונה), decision_date (fallback ל-hearing_date), subject_tags, summary (=subject). - `citation_formatted` נבנה דטרמיניסטית (`_build_internal_citation`): 'ועדת ערר ... בל"מ <עורר> נ' <משיב> (יו"ר עו"ד )'. scripts/final_halacha_pipeline.py — שלב [0] חדש: `precedent_extract_halachot` על ההחלטה עצמה (idempotent — מדלג כש-completed/dry-run), כך שהלכות-ההחלטה לא נשארות pending. אומת: py_compile ✓ · ה-pipeline רץ dry-run נקי (4 שלבים). 8126 כבר תוקן ידנית; מכאן זה אוטומטי לכל החלטה. Invariants: INV-LRN4/X11 · G1 (נרמול-במקור) · DM7 · feedback_silent_swallow. Co-Authored-By: Claude Opus 4.8 (1M context) --- scripts/final_halacha_pipeline.py | 41 ++++++++++++++++++--- web/app.py | 59 +++++++++++++++++++++++++++++-- 2 files changed, 93 insertions(+), 7 deletions(-) diff --git a/scripts/final_halacha_pipeline.py b/scripts/final_halacha_pipeline.py index 0409f7d..629a2ca 100644 --- a/scripts/final_halacha_pipeline.py +++ b/scripts/final_halacha_pipeline.py @@ -6,6 +6,8 @@ runs THIS single deterministic command (the 3-judge panel uses local DeepSeek+Ge keys + the local claude CLI, so it can't run inside the container). Steps: + [0] precedent_extract_halachot → extract the halachot the DECISION ITSELF states + (its own case_law row), so they aren't left pending. Idempotent. [1] extract_internal_citations(chair) → links the citation graph for the chair's decisions (idempotent; ON CONFLICT DO NOTHING). [2] corroboration_rebuild → builds the citation-treatment signal and applies the @@ -35,6 +37,19 @@ sys.path.insert(0, str(Path(__file__).resolve().parent)) from legal_mcp.services import corroboration, db # noqa: E402 from legal_mcp.tools.citations import extract_internal_citations # noqa: E402 +from legal_mcp.tools.precedent_library import precedent_extract_halachot # noqa: E402 + + +async def _decision_law_row(case_number: str) -> dict | None: + """The case's own decision row in case_law (internal_committee), if enrolled.""" + pool = await db.get_pool() + async with pool.acquire() as conn: + r = await conn.fetchrow( + "SELECT id, halacha_extraction_status FROM case_law WHERE case_number = $1 " + "AND source_kind = 'internal_committee' ORDER BY created_at DESC LIMIT 1", + case_number, + ) + return dict(r) if r else None async def main(args: argparse.Namespace) -> int: @@ -45,8 +60,26 @@ async def main(args: argparse.Namespace) -> int: return 1 chair = case.get("chair_name") or "דפנה תמיר" + # [0] extract the halachot the decision ITSELF states (its own row in case_law) — + # so they are not left pending. Idempotent: skip when already completed or on dry-run. + row = await _decision_law_row(case_number) + if not row: + print(f"[0/4] ההחלטה {case_number} אינה ב-case_law עדיין — דילוג על חילוץ-הלכות") + elif row.get("halacha_extraction_status") == "completed": + print(f"[0/4] חילוץ-הלכות מההחלטה — דולג (כבר completed)") + elif args.dry_run: + print(f"[0/4] חילוץ-הלכות מההחלטה — מדולג (dry-run)") + else: + print(f"[0/4] precedent_extract_halachot (החלטה {case_number})…", flush=True) + try: + raw0 = await precedent_extract_halachot(str(row["id"])) + d0 = json.loads(raw0).get("data", {}) + print(f" ✓ status={d0.get('status')} stored={d0.get('stored', d0.get('extracted'))}") + except Exception as e: + print(f" ⚠ halacha extraction failed (non-fatal): {e}") + # [1] citation graph - print(f"[1/3] extract_internal_citations (chair={chair})…", flush=True) + print(f"[1/4] extract_internal_citations (chair={chair})…", flush=True) raw = await extract_internal_citations(chair_name=chair, limit=0) try: d = json.loads(raw).get("data", {}) @@ -57,9 +90,9 @@ async def main(args: argparse.Namespace) -> int: # [2] corroboration signal + policy (whole corpus backfill) — skipped on dry-run if args.dry_run: - print("[2/3] corroboration_rebuild — מדולג (dry-run)") + print("[2/4] corroboration_rebuild — מדולג (dry-run)") else: - print("[2/3] corroboration_rebuild (backfill)…", flush=True) + print("[2/4] corroboration_rebuild (backfill)…", flush=True) try: cr = await corroboration.build_all() print(f" ✓ {cr}") @@ -68,7 +101,7 @@ async def main(args: argparse.Namespace) -> int: # [3] three-judge halacha panel apply = not args.dry_run - print(f"[3/3] halacha_panel_approve {'--apply' if apply else '(dry-run)'} " + print(f"[3/4] halacha_panel_approve {'--apply' if apply else '(dry-run)'} " f"(Opus+DeepSeek+Gemini)…", flush=True) import halacha_panel_approve as hpa rc = await hpa.main(Namespace(limit=args.limit, concurrency=6, apply=apply)) diff --git a/web/app.py b/web/app.py index 747adc2..5acda59 100644 --- a/web/app.py +++ b/web/app.py @@ -3387,6 +3387,33 @@ def _committee_chair_for_case(case: dict, case_number: str) -> str: return COMMITTEE_CHAIR_BY_PREFIX.get(case_number[:1], COMMITTEE_CHAIR_DEFAULT) +def _party_name(parties) -> str: + """First party's display name from a list of {name|party_name} dicts or strings.""" + if isinstance(parties, list) and parties: + p = parties[0] + if isinstance(p, dict): + return (p.get("name") or p.get("party_name") or "").strip() + return str(p).strip() + return "" + + +def _build_internal_citation( + case: dict, case_number: str, chair_name: str, proceeding_type: str, district: str, +) -> str: + """Deterministic uniform citation for OUR committee decisions — the Gemini metadata + extractor targets external rulings and returns nothing for internal ones, so we build + it from the case record instead. E.g.: + 'ועדת ערר מחוזית לתכנון ובניה ירושלים, בל"מ 8126-03-25 פלוני נ' הוועדה המקומית (יו"ר עו"ד דפנה תמיר)'.""" + appellant = _party_name(case.get("appellants")) + respondent = _party_name(case.get("respondents")) + parties = f" {appellant} נ' {respondent}" if (appellant and respondent) else ( + f" {appellant}" if appellant else "") + proc = proceeding_type or "ערר" + chair_clause = f' (יו"ר עו"ד {chair_name})' if chair_name else "" + return (f"ועדת ערר מחוזית לתכנון ובניה {district}, {proc} {case_number}" + f"{parties}{chair_clause}").strip() + + async def _enroll_final_in_library( case: dict, case_number: str, final_text: str, chair_name: str, ) -> dict: @@ -3404,12 +3431,23 @@ async def _enroll_final_in_library( if not final_text.strip(): out["error"] = "no final text extracted" return out + + # Deterministic metadata from the case record — the Gemini metadata extractor is + # tuned for EXTERNAL rulings and returns no_metadata for internal decisions, so we + # populate proceeding_type / date / tags / summary / citation ourselves (no LLM). + district = "ירושלים" + proceeding_type = (case.get("proceeding_type") or "ערר").strip() + decision_date = case.get("decision_date") or case.get("hearing_date") + subject_tags = case.get("subject_categories") or [] + summary = (case.get("subject") or case.get("title") or "").strip() try: res = await int_svc.ingest_internal_decision( case_number=case_number, case_name=case.get("title", ""), - decision_date=case.get("decision_date"), chair_name=chair_name, - district="ירושלים", practice_area=case.get("practice_area", ""), - appeal_subtype=case.get("appeal_subtype", ""), text=final_text, + decision_date=decision_date, chair_name=chair_name, + district=district, practice_area=case.get("practice_area", ""), + appeal_subtype=case.get("appeal_subtype", ""), + proceeding_type=proceeding_type, subject_tags=subject_tags, + summary=summary, text=final_text, ) except Exception as e: logger.warning("library enrollment failed for %s: %s", case_number, e) @@ -3421,6 +3459,21 @@ async def _enroll_final_in_library( if not case_law_id: return out + # Build the uniform citation deterministically (Gemini won't, for internal rows). + try: + citation = _build_internal_citation( + case, case_number, chair_name, proceeding_type, district) + pool = await db.get_pool() + async with pool.acquire() as conn: + await conn.execute( + "UPDATE case_law SET citation_formatted = $2 " + "WHERE id = $1 AND COALESCE(citation_formatted, '') = ''", + UUID(case_law_id), citation, + ) + out["citation"] = citation + except Exception as e: + logger.warning("citation build failed for %s: %s", case_number, e) + # The precedents this decision cites → link to the library; flag the ones not found. try: await cit_tools.extract_internal_citations(case_law_id=case_law_id, limit=0) -- 2.49.1