Add CMPA (betterment levy) training support and update methodology
Support ingestion of betterment levy (היטל השבחה) decisions into a separate training corpus (CMPA). Key changes: - Add .doc file extraction via LibreOffice conversion in extractor - Add practice_area/appeal_subtype columns to style_corpus table - Route training files to cmp/ or cmpa/ subdirs based on appeal subtype - Fix derive_subtype to handle ARAR-YY-NNNN format (was matching year digit) - Expose practice_area/appeal_subtype params in MCP upload_training tool - Add appeal_subtype filter to analyze_style for per-type style analysis - Update betterment levy methodology in lessons.py: checklist (from generic to corpus-based), opening/closing strategies, and discussion rules Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -43,14 +43,17 @@ SUBTYPES_BY_AREA: dict[str, set[str]] = {
|
||||
|
||||
# ── Derivation ─────────────────────────────────────────────────────
|
||||
|
||||
_FIRST_DIGIT = re.compile(r"^\s*(\d)")
|
||||
|
||||
_APPEALS_COMMITTEE_DIGIT_TO_SUBTYPE = {
|
||||
"1": "building_permit",
|
||||
"8": "betterment_levy",
|
||||
"9": "compensation_197",
|
||||
}
|
||||
|
||||
# Match the case number (last numeric group) in formats like:
|
||||
# ARAR-25-8126, ARAR-24-01-8007-33, 8126/25, 1170, ערר 1024-25
|
||||
_CASE_NUM = re.compile(r"(?:ARAR[-\s]*\d{2}[-\s]*(?:\d{2}[-\s]*)?)(\d{4})", re.IGNORECASE)
|
||||
_PLAIN_NUM = re.compile(r"(\d{4})")
|
||||
|
||||
|
||||
def derive_subtype(case_number: str, practice_area: str = DEFAULT_PRACTICE_AREA) -> str:
|
||||
"""Infer the appeal_subtype from case_number.
|
||||
@@ -58,15 +61,20 @@ def derive_subtype(case_number: str, practice_area: str = DEFAULT_PRACTICE_AREA)
|
||||
For appeals_committee, the convention is:
|
||||
1xxx → building_permit, 8xxx → betterment_levy, 9xxx → compensation_197.
|
||||
|
||||
For other practice areas there is no public numbering convention yet,
|
||||
so we return 'unknown' until a real rule is defined.
|
||||
Handles multiple formats: ARAR-25-8126, 8126/25, 1170, ערר 1024-25.
|
||||
"""
|
||||
if practice_area != "appeals_committee":
|
||||
return "unknown"
|
||||
m = _FIRST_DIGIT.match(case_number or "")
|
||||
cn = case_number or ""
|
||||
# Try ARAR format first (extracts the 4-digit case number after year prefix)
|
||||
m = _CASE_NUM.search(cn)
|
||||
if not m:
|
||||
# Fallback: first 4-digit number in the string
|
||||
m = _PLAIN_NUM.search(cn)
|
||||
if not m:
|
||||
return "unknown"
|
||||
return _APPEALS_COMMITTEE_DIGIT_TO_SUBTYPE.get(m.group(1), "unknown")
|
||||
first_digit = m.group(1)[0]
|
||||
return _APPEALS_COMMITTEE_DIGIT_TO_SUBTYPE.get(first_digit, "unknown")
|
||||
|
||||
|
||||
# ── Validation ─────────────────────────────────────────────────────
|
||||
|
||||
Reference in New Issue
Block a user