fix(cases): מספור 5-ספרתי לבל"מ — סיווג, ולידציה, וחיפוש פסיקה-חסרה
All checks were successful
G12 Leak-Guard / leak-guard (pull_request) Successful in 6s
All checks were successful
G12 Leak-Guard / leak-guard (pull_request) Successful in 6s
נוהל-יו"ר (2026-06-11): מבנה מספר-תיק = <סידורי>-<חודש>-<שנה>, ואורך הסידורי
מקודד את סוג-ההליך — 4 ספרות = ערר, 5 ספרות = בל"מ. הספרה הראשונה ממשיכה
לקבוע תחום בשני האורכים (1→רישוי, 8→היטל, 9→פיצויים). הכלל חד-כיווני:
5-ספרתי הוא תמיד בל"מ; 4-ספרתי אינו מחייב ערר (בל"מ-מורשת מזוהה מהנושא).
הבאג שדיווח עליו היו"ר: חיפוש פסיקה-חסרה לפי מספר-תיק החזיר 404 על כל ערך
שאינו תיק קיים — שבר את הטבלה תוך כדי הקלדה ועל מספרי 5-ספרות.
תיקונים:
- web/app.py: GET /api/missing-precedents — מסנן case_number שלא תאם תיק מחזיר
רשימה ריקה (200), לא 404. סמנטיקה תקינה ל-collection-filter.
- missing-precedents/page.tsx: debounce (350ms) על שדות-הסינון — קוורי אחד
אחרי שמפסיקים להקליד, לא אחד לכל הקשה.
- practice_area.py: regex סידורי \d{4}→\d{4,5}; case_serial_digits() +
is_blam_by_number() (5⇒בל"מ); derive_subtype_with_blam ו-derive_proceeding_type
מזהים בל"מ גם מ-5-ספרות (בנוסף לנושא). callers: cases.py, internal_decisions.py.
- proofreader.py: דפוסי חילוץ-שם-קובץ \d{3,4}→\d{3,5}.
- web-ui: practice-area.ts (מראָה ל-backend), schemas/case.ts (regex
serial-month-year, 4-or-5 ספרות, superRefine 5⇒בל"מ), placeholder בוויזרד.
- תיעוד: docs/spec/X1-identifiers.md §1א + legal-ai/CLAUDE.md.
Invariants: מקיים G1 (נרמול-במקור — ספרה ראשונה כמקור-אמת יחיד לתחום),
G2 (מסלול-סיווג יחיד, אין כפילות), INV-DM/X1 (מפתח קנוני + proceeding_type).
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -58,6 +58,7 @@ def _internal_validate(inputs: dict) -> None:
|
||||
def _internal_derive(inputs: dict) -> dict:
|
||||
district = (inputs.get("district") or "").strip() or _district_from_court(inputs.get("court") or "")
|
||||
proc = (inputs.get("proceeding_type") or "").strip() or derive_proceeding_type(
|
||||
case_number=inputs.get("case_number") or "",
|
||||
appeal_subtype=inputs.get("appeal_subtype") or "", subject=inputs.get("case_name") or "",
|
||||
)
|
||||
return {"district": district, "proceeding_type": proc}
|
||||
|
||||
@@ -176,8 +176,12 @@ _APPEALS_COMMITTEE_DIGIT_TO_SUBTYPE = {
|
||||
|
||||
# Match the case number (last numeric group) in formats like:
|
||||
# ARAR-25-8126, ARAR-24-01-8007-33, 8126/25, 1170, ערר 1024-25
|
||||
_CASE_NUM = re.compile(r"(?:ARAR[-\s]*\d{2}[-\s]*(?:\d{2}[-\s]*)?)(\d{4})", re.IGNORECASE)
|
||||
_PLAIN_NUM = re.compile(r"(\d{4})")
|
||||
# Serial is 4 OR 5 digits: 4 = ערר (appeal), 5 = בל"מ (extension-of-time) per
|
||||
# the post-reform numbering convention (Jerusalem adopted 5-digit בל"מ; Tel Aviv
|
||||
# long predates it — e.g. 81002-01-21). The leading digit still encodes the
|
||||
# domain (1→רישוי, 8→היטל, 9→פיצויים) in BOTH widths — see is_blam_by_number().
|
||||
_CASE_NUM = re.compile(r"(?:ARAR[-\s]*\d{2}[-\s]*(?:\d{2}[-\s]*)?)(\d{4,5})", re.IGNORECASE)
|
||||
_PLAIN_NUM = re.compile(r"(\d{4,5})")
|
||||
|
||||
|
||||
_DOMAIN_TO_SUBTYPE: dict[str, str] = {
|
||||
@@ -216,6 +220,29 @@ def derive_subtype(case_number: str, practice_area: str = DEFAULT_PRACTICE_AREA)
|
||||
return _APPEALS_COMMITTEE_DIGIT_TO_SUBTYPE.get(first_digit, "unknown")
|
||||
|
||||
|
||||
def case_serial_digits(case_number: str) -> int | None:
|
||||
"""Return the digit-count of the case serial, or None if unparseable.
|
||||
|
||||
The serial is the leading numeric group of the case number (the part
|
||||
before month/year): ``8126-03-25`` → 4, ``81002-01-21`` → 5.
|
||||
"""
|
||||
cn = case_number or ""
|
||||
m = _CASE_NUM.search(cn) or _PLAIN_NUM.search(cn)
|
||||
return len(m.group(1)) if m else None
|
||||
|
||||
|
||||
def is_blam_by_number(case_number: str) -> bool:
|
||||
"""True iff the case serial has 5 digits.
|
||||
|
||||
Post-reform numbering convention: a 4-digit serial is an ערר (appeal),
|
||||
a 5-digit serial is a בל"מ (בקשה להארכת מועד). This is the authoritative
|
||||
signal going forward; legacy 4-digit בל"מ cases are still detected from
|
||||
the subject via ``is_blam_subject``. The rule is **one-directional** — a
|
||||
5-digit serial implies בל"מ, but a 4-digit serial does NOT imply ערר.
|
||||
"""
|
||||
return case_serial_digits(case_number) == 5
|
||||
|
||||
|
||||
def derive_subtype_with_blam(
|
||||
case_number: str,
|
||||
subject: str = "",
|
||||
@@ -236,9 +263,11 @@ def derive_subtype_with_blam(
|
||||
'building_permit'
|
||||
"""
|
||||
base = derive_subtype(case_number, practice_area)
|
||||
if not is_blam_subject(subject):
|
||||
# בל"מ is signalled either by the subject text (legacy 4-digit cases) or by
|
||||
# a 5-digit serial (post-reform convention).
|
||||
if not (is_blam_subject(subject) or is_blam_by_number(case_number)):
|
||||
return base
|
||||
# subject says it's בל"מ — return the matching extension_request_* variant.
|
||||
# it's a בל"מ — return the matching extension_request_* variant.
|
||||
# For domain practice_area (axis B), use the direct mapping.
|
||||
if practice_area in DOMAIN_PRACTICE_AREAS:
|
||||
return _DOMAIN_TO_BLAM_SUBTYPE.get(practice_area, base)
|
||||
@@ -263,15 +292,21 @@ def is_blam_subtype(appeal_subtype: str) -> bool:
|
||||
return appeal_subtype in BLAM_SUBTYPES
|
||||
|
||||
|
||||
def derive_proceeding_type(*, appeal_subtype: str = "", subject: str = "") -> str:
|
||||
def derive_proceeding_type(
|
||||
*, case_number: str = "", appeal_subtype: str = "", subject: str = "",
|
||||
) -> str:
|
||||
"""Return 'בל"מ' / 'ערר' for appeals-committee decisions/cases.
|
||||
|
||||
Priority: explicit subtype prefix → subject regex → default 'ערר'.
|
||||
Priority: explicit subtype prefix → subject regex → 5-digit serial →
|
||||
default 'ערר'. The 5-digit signal is one-directional (a 4-digit serial
|
||||
does not force 'ערר' — a legacy 4-digit בל"מ is caught by the subject).
|
||||
"""
|
||||
if appeal_subtype and appeal_subtype.startswith("extension_request_"):
|
||||
return 'בל"מ'
|
||||
if subject and is_blam_subject(subject):
|
||||
return 'בל"מ'
|
||||
if case_number and is_blam_by_number(case_number):
|
||||
return 'בל"מ'
|
||||
return "ערר"
|
||||
|
||||
|
||||
|
||||
@@ -268,12 +268,13 @@ async def proofread(path: Path) -> tuple[str, dict]:
|
||||
|
||||
# ── Metadata extraction ──────────────────────────────────────────
|
||||
|
||||
# Serial is 3–5 digits: 4 = ערר, 5 = בל"מ (post-reform). 3 tolerates legacy short serials.
|
||||
FILENAME_NUMBER_PATTERNS = [
|
||||
re.compile(r"^ARAR-(\d{2})-(\d{3,4})"),
|
||||
re.compile(r"^ערר\s+(\d{3,4})-(\d{2})"),
|
||||
re.compile(r"^ערר\s+(\d{3,4})\s*-"),
|
||||
re.compile(r"^ARAR-(\d{2})-(\d{3,5})"),
|
||||
re.compile(r"^ערר\s+(\d{3,5})-(\d{2})"),
|
||||
re.compile(r"^ערר\s+(\d{3,5})\s*-"),
|
||||
]
|
||||
LEGACY_MULTI_PATTERN = re.compile(r"(\d{3,4})\+(\d{3,4})")
|
||||
LEGACY_MULTI_PATTERN = re.compile(r"(\d{3,5})\+(\d{3,5})")
|
||||
|
||||
|
||||
def decision_number_from_filename(stem: str) -> str | None:
|
||||
|
||||
@@ -183,9 +183,10 @@ async def case_create(
|
||||
appeal_subtype = derived_subtype
|
||||
pa.validate(practice_area, appeal_subtype)
|
||||
|
||||
# proceeding_type: explicit override > derived from subtype/subject > 'ערר'
|
||||
# proceeding_type: explicit override > derived from subtype/subject/number > 'ערר'
|
||||
# (a 5-digit serial signals בל"מ per the post-reform numbering convention).
|
||||
resolved_proc = proceeding_type.strip() or pa.derive_proceeding_type(
|
||||
appeal_subtype=appeal_subtype, subject=subject,
|
||||
case_number=case_number, appeal_subtype=appeal_subtype, subject=subject,
|
||||
)
|
||||
|
||||
case = await db.create_case(
|
||||
|
||||
Reference in New Issue
Block a user