fix(cases): מספור 5-ספרתי לבל"מ — סיווג, ולידציה, וחיפוש פסיקה-חסרה
All checks were successful
G12 Leak-Guard / leak-guard (pull_request) Successful in 6s

נוהל-יו"ר (2026-06-11): מבנה מספר-תיק = <סידורי>-<חודש>-<שנה>, ואורך הסידורי
מקודד את סוג-ההליך — 4 ספרות = ערר, 5 ספרות = בל"מ. הספרה הראשונה ממשיכה
לקבוע תחום בשני האורכים (1→רישוי, 8→היטל, 9→פיצויים). הכלל חד-כיווני:
5-ספרתי הוא תמיד בל"מ; 4-ספרתי אינו מחייב ערר (בל"מ-מורשת מזוהה מהנושא).

הבאג שדיווח עליו היו"ר: חיפוש פסיקה-חסרה לפי מספר-תיק החזיר 404 על כל ערך
שאינו תיק קיים — שבר את הטבלה תוך כדי הקלדה ועל מספרי 5-ספרות.

תיקונים:
- web/app.py: GET /api/missing-precedents — מסנן case_number שלא תאם תיק מחזיר
  רשימה ריקה (200), לא 404. סמנטיקה תקינה ל-collection-filter.
- missing-precedents/page.tsx: debounce (350ms) על שדות-הסינון — קוורי אחד
  אחרי שמפסיקים להקליד, לא אחד לכל הקשה.
- practice_area.py: regex סידורי \d{4}→\d{4,5}; case_serial_digits() +
  is_blam_by_number() (5⇒בל"מ); derive_subtype_with_blam ו-derive_proceeding_type
  מזהים בל"מ גם מ-5-ספרות (בנוסף לנושא). callers: cases.py, internal_decisions.py.
- proofreader.py: דפוסי חילוץ-שם-קובץ \d{3,4}→\d{3,5}.
- web-ui: practice-area.ts (מראָה ל-backend), schemas/case.ts (regex
  serial-month-year, 4-or-5 ספרות, superRefine 5⇒בל"מ), placeholder בוויזרד.
- תיעוד: docs/spec/X1-identifiers.md §1א + legal-ai/CLAUDE.md.

Invariants: מקיים G1 (נרמול-במקור — ספרה ראשונה כמקור-אמת יחיד לתחום),
G2 (מסלול-סיווג יחיד, אין כפילות), INV-DM/X1 (מפתח קנוני + proceeding_type).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-12 06:16:42 +00:00
parent 9cd290e08e
commit e8bcb9c1ea
11 changed files with 157 additions and 26 deletions

View File

@@ -58,6 +58,7 @@ def _internal_validate(inputs: dict) -> None:
def _internal_derive(inputs: dict) -> dict:
district = (inputs.get("district") or "").strip() or _district_from_court(inputs.get("court") or "")
proc = (inputs.get("proceeding_type") or "").strip() or derive_proceeding_type(
case_number=inputs.get("case_number") or "",
appeal_subtype=inputs.get("appeal_subtype") or "", subject=inputs.get("case_name") or "",
)
return {"district": district, "proceeding_type": proc}

View File

@@ -176,8 +176,12 @@ _APPEALS_COMMITTEE_DIGIT_TO_SUBTYPE = {
# Match the case number (last numeric group) in formats like:
# ARAR-25-8126, ARAR-24-01-8007-33, 8126/25, 1170, ערר 1024-25
_CASE_NUM = re.compile(r"(?:ARAR[-\s]*\d{2}[-\s]*(?:\d{2}[-\s]*)?)(\d{4})", re.IGNORECASE)
_PLAIN_NUM = re.compile(r"(\d{4})")
# Serial is 4 OR 5 digits: 4 = ערר (appeal), 5 = בל"מ (extension-of-time) per
# the post-reform numbering convention (Jerusalem adopted 5-digit בל"מ; Tel Aviv
# long predates it — e.g. 81002-01-21). The leading digit still encodes the
# domain (1→רישוי, 8→היטל, 9→פיצויים) in BOTH widths — see is_blam_by_number().
_CASE_NUM = re.compile(r"(?:ARAR[-\s]*\d{2}[-\s]*(?:\d{2}[-\s]*)?)(\d{4,5})", re.IGNORECASE)
_PLAIN_NUM = re.compile(r"(\d{4,5})")
_DOMAIN_TO_SUBTYPE: dict[str, str] = {
@@ -216,6 +220,29 @@ def derive_subtype(case_number: str, practice_area: str = DEFAULT_PRACTICE_AREA)
return _APPEALS_COMMITTEE_DIGIT_TO_SUBTYPE.get(first_digit, "unknown")
def case_serial_digits(case_number: str) -> int | None:
"""Return the digit-count of the case serial, or None if unparseable.
The serial is the leading numeric group of the case number (the part
before month/year): ``8126-03-25`` → 4, ``81002-01-21`` → 5.
"""
cn = case_number or ""
m = _CASE_NUM.search(cn) or _PLAIN_NUM.search(cn)
return len(m.group(1)) if m else None
def is_blam_by_number(case_number: str) -> bool:
"""True iff the case serial has 5 digits.
Post-reform numbering convention: a 4-digit serial is an ערר (appeal),
a 5-digit serial is a בל"מ (בקשה להארכת מועד). This is the authoritative
signal going forward; legacy 4-digit בל"מ cases are still detected from
the subject via ``is_blam_subject``. The rule is **one-directional** — a
5-digit serial implies בל"מ, but a 4-digit serial does NOT imply ערר.
"""
return case_serial_digits(case_number) == 5
def derive_subtype_with_blam(
case_number: str,
subject: str = "",
@@ -236,9 +263,11 @@ def derive_subtype_with_blam(
'building_permit'
"""
base = derive_subtype(case_number, practice_area)
if not is_blam_subject(subject):
# בל"מ is signalled either by the subject text (legacy 4-digit cases) or by
# a 5-digit serial (post-reform convention).
if not (is_blam_subject(subject) or is_blam_by_number(case_number)):
return base
# subject says it's בל"מ — return the matching extension_request_* variant.
# it's a בל"מ — return the matching extension_request_* variant.
# For domain practice_area (axis B), use the direct mapping.
if practice_area in DOMAIN_PRACTICE_AREAS:
return _DOMAIN_TO_BLAM_SUBTYPE.get(practice_area, base)
@@ -263,15 +292,21 @@ def is_blam_subtype(appeal_subtype: str) -> bool:
return appeal_subtype in BLAM_SUBTYPES
def derive_proceeding_type(*, appeal_subtype: str = "", subject: str = "") -> str:
def derive_proceeding_type(
*, case_number: str = "", appeal_subtype: str = "", subject: str = "",
) -> str:
"""Return 'בל"מ' / 'ערר' for appeals-committee decisions/cases.
Priority: explicit subtype prefix → subject regex → default 'ערר'.
Priority: explicit subtype prefix → subject regex → 5-digit serial →
default 'ערר'. The 5-digit signal is one-directional (a 4-digit serial
does not force 'ערר' — a legacy 4-digit בל"מ is caught by the subject).
"""
if appeal_subtype and appeal_subtype.startswith("extension_request_"):
return 'בל"מ'
if subject and is_blam_subject(subject):
return 'בל"מ'
if case_number and is_blam_by_number(case_number):
return 'בל"מ'
return "ערר"

View File

@@ -268,12 +268,13 @@ async def proofread(path: Path) -> tuple[str, dict]:
# ── Metadata extraction ──────────────────────────────────────────
# Serial is 35 digits: 4 = ערר, 5 = בל"מ (post-reform). 3 tolerates legacy short serials.
FILENAME_NUMBER_PATTERNS = [
re.compile(r"^ARAR-(\d{2})-(\d{3,4})"),
re.compile(r"^ערר\s+(\d{3,4})-(\d{2})"),
re.compile(r"^ערר\s+(\d{3,4})\s*-"),
re.compile(r"^ARAR-(\d{2})-(\d{3,5})"),
re.compile(r"^ערר\s+(\d{3,5})-(\d{2})"),
re.compile(r"^ערר\s+(\d{3,5})\s*-"),
]
LEGACY_MULTI_PATTERN = re.compile(r"(\d{3,4})\+(\d{3,4})")
LEGACY_MULTI_PATTERN = re.compile(r"(\d{3,5})\+(\d{3,5})")
def decision_number_from_filename(stem: str) -> str | None: