"""Practice area + appeal subtype: derivation, validation, constants. Two orthogonal axes used to separate legal domains across the system: practice_area — top-level domain. **Two taxonomies coexist** (see below). appeal_subtype — refines within a domain. ⚠️ TWO TAXONOMIES — DO NOT CONFUSE ================================== A. **Multi-tenant axis** (legacy, used in routing logic): - ``appeals_committee`` — the legal-ai instance for Daphna's committee - ``national_insurance`` — future / hypothetical other tenants - ``labor_law`` — future When this axis is used, ``appeal_subtype`` carries the actual domain: ``building_permit`` (1xxx), ``betterment_levy`` (8xxx), ``compensation_197`` (9xxx). B. **Domain axis** (DB columns ``case_law.practice_area``, ``cases.practice_area`` — what tests, validators, and CHECK constraints actually use): - ``rishuy_uvniya`` — רישוי ובנייה (1xxx) - ``betterment_levy`` — היטל השבחה (8xxx) - ``compensation_197`` — פיצויים סעיף 197 (9xxx) Use ``to_db_practice_area(multi_tenant_pa, appeal_subtype)`` to convert from axis A to axis B before writing to the DB. Background: TaskMaster #30 (sub-bug ב) — many ``case_law`` rows stored ``appeals_committee`` (axis A) where they should have stored a domain value (axis B). The migration backfill plus CHECK constraints close the gap, and this module now validates **both** namespaces. """ from __future__ import annotations import re # ── Enums ────────────────────────────────────────────────────────── # Multi-tenant axis (legacy) MULTI_TENANT_PRACTICE_AREAS: set[str] = { "appeals_committee", "national_insurance", "labor_law", } # Domain axis (matches DB constraints on case_law/cases) DOMAIN_PRACTICE_AREAS: set[str] = { "rishuy_uvniya", "betterment_levy", "compensation_197", } # Union — what ``validate()`` accepts for backward-compat. # Empty string is permitted because the DB CHECK constraint allows it as # a "not yet classified" sentinel (e.g. when auto-derivation fails on an # unrecognized case_number format). PRACTICE_AREAS: set[str] = MULTI_TENANT_PRACTICE_AREAS | DOMAIN_PRACTICE_AREAS | {""} APPEALS_COMMITTEE_SUBTYPES: set[str] = { "building_permit", "betterment_levy", "compensation_197", # בל"מ — בקשה להארכת מועד להגשת ערר. מסלולים נפרדים לפי domain: "extension_request_building_permit", # 1xxx — סעיף 152, 30 ימים "extension_request_betterment_levy", # 8xxx — סעיף 14 לתוספת ג', 45 ימים "extension_request_compensation", # 9xxx — סעיף 198(ד), 30 ימים "unknown", } # בל"מ subtypes — קל לזהות ע"י prefix BLAM_SUBTYPES: set[str] = { "extension_request_building_permit", "extension_request_betterment_levy", "extension_request_compensation", } # מיפוי domain → בל"מ subtype _DOMAIN_TO_BLAM_SUBTYPE: dict[str, str] = { "rishuy_uvniya": "extension_request_building_permit", "betterment_levy": "extension_request_betterment_levy", "compensation_197": "extension_request_compensation", } # מיפוי first-digit → בל"מ subtype (אותו מבנה כמו _APPEALS_COMMITTEE_DIGIT_TO_SUBTYPE) _APPEALS_COMMITTEE_DIGIT_TO_BLAM = { "1": "extension_request_building_permit", "8": "extension_request_betterment_levy", "9": "extension_request_compensation", } DEFAULT_PRACTICE_AREA = "appeals_committee" # Subtypes per practice_area (extend when adding domains) SUBTYPES_BY_AREA: dict[str, set[str]] = { "appeals_committee": APPEALS_COMMITTEE_SUBTYPES, "national_insurance": {"unknown"}, "labor_law": {"unknown"}, # Domain values — subtype is implicit in the value itself "rishuy_uvniya": {"building_permit", "extension_request_building_permit", "unknown"}, "betterment_levy": {"betterment_levy", "extension_request_betterment_levy", "unknown"}, "compensation_197": {"compensation_197", "extension_request_compensation", "unknown"}, # Empty (unclassified) — allow any of the appeals_committee subtypes "": APPEALS_COMMITTEE_SUBTYPES, } # Mapping: (multi_tenant_pa, appeal_subtype) → domain_pa _SUBTYPE_TO_DOMAIN: dict[str, str] = { "building_permit": "rishuy_uvniya", "betterment_levy": "betterment_levy", "compensation_197": "compensation_197", "extension_request_building_permit": "rishuy_uvniya", "extension_request_betterment_levy": "betterment_levy", "extension_request_compensation": "compensation_197", } # Regex לזיהוי "בקשה להארכת מועד" בנושא הערר (subject) — # וריאציות נפוצות. case-insensitive, מתחשב במרכאות חכמות/רגילות. _BLAM_SUBJECT_PATTERNS = ( re.compile(r"בקשה\s+להארכת\s+מועד", re.IGNORECASE), re.compile(r"בל[\"״״]מ", re.IGNORECASE), # בל"מ עם quote variants re.compile(r"הארכת\s+מועד\s+להגשת", re.IGNORECASE), ) def is_blam_subject(subject: str) -> bool: """True iff subject indicates a בל"מ (extension-of-time request). מזהה: "בקשה להארכת מועד", "בל\"מ", "הארכת מועד להגשת..." Examples: >>> is_blam_subject("בל\"מ אלחנן ברלינגר נ' לינדאב") True >>> is_blam_subject("בקשה להארכת מועד להגשת ערר") True >>> is_blam_subject("היתר בנייה ברחוב X") False """ if not subject: return False return any(p.search(subject) for p in _BLAM_SUBJECT_PATTERNS) def to_db_practice_area(practice_area: str, appeal_subtype: str = "") -> str: """Convert a multi-tenant practice_area + appeal_subtype to the domain value stored in DB columns (case_law/cases). Returns ``""`` when the input cannot be mapped — callers should handle this rather than letting ``""`` propagate silently to the DB. Examples: >>> to_db_practice_area("appeals_committee", "building_permit") 'rishuy_uvniya' >>> to_db_practice_area("rishuy_uvniya") 'rishuy_uvniya' >>> to_db_practice_area("appeals_committee") '' """ pa = (practice_area or "").strip() if pa in DOMAIN_PRACTICE_AREAS: return pa if pa == "appeals_committee": return _SUBTYPE_TO_DOMAIN.get((appeal_subtype or "").strip(), "") return "" # ── Derivation ───────────────────────────────────────────────────── _APPEALS_COMMITTEE_DIGIT_TO_SUBTYPE = { "1": "building_permit", "8": "betterment_levy", "9": "compensation_197", } # Match the case number (last numeric group) in formats like: # ARAR-25-8126, ARAR-24-01-8007-33, 8126/25, 1170, ערר 1024-25 _CASE_NUM = re.compile(r"(?:ARAR[-\s]*\d{2}[-\s]*(?:\d{2}[-\s]*)?)(\d{4})", re.IGNORECASE) _PLAIN_NUM = re.compile(r"(\d{4})") _DOMAIN_TO_SUBTYPE: dict[str, str] = { "rishuy_uvniya": "building_permit", "betterment_levy": "betterment_levy", "compensation_197": "compensation_197", } def derive_subtype(case_number: str, practice_area: str = DEFAULT_PRACTICE_AREA) -> str: """Infer the appeal_subtype from case_number. For appeals_committee (axis A), the convention is: 1xxx → building_permit, 8xxx → betterment_levy, 9xxx → compensation_197. For domain values (axis B — rishuy_uvniya/betterment_levy/compensation_197), the subtype is implicit in the practice_area itself — we map directly without parsing the case number. Handles multiple formats: ARAR-25-8126, 8126/25, 1170, ערר 1024-25. """ # Axis B: practice_area is already a domain value — map directly. if practice_area in DOMAIN_PRACTICE_AREAS: return _DOMAIN_TO_SUBTYPE.get(practice_area, "unknown") if practice_area != "appeals_committee": return "unknown" cn = case_number or "" # Try ARAR format first (extracts the 4-digit case number after year prefix) m = _CASE_NUM.search(cn) if not m: # Fallback: first 4-digit number in the string m = _PLAIN_NUM.search(cn) if not m: return "unknown" first_digit = m.group(1)[0] return _APPEALS_COMMITTEE_DIGIT_TO_SUBTYPE.get(first_digit, "unknown") def derive_subtype_with_blam( case_number: str, subject: str = "", practice_area: str = DEFAULT_PRACTICE_AREA, ) -> str: """Like ``derive_subtype()`` but also detects בל"מ from the subject. If ``subject`` indicates a בקשה להארכת מועד, the returned subtype is one of the ``extension_request_*`` values (chosen per case_number / practice_area). Otherwise behaviour matches ``derive_subtype()``. Examples: >>> derive_subtype_with_blam("1017-03-26", "בל\"מ ברלינגר נ' לינדאב") 'extension_request_building_permit' >>> derive_subtype_with_blam("8500-25", "בקשה להארכת מועד") 'extension_request_betterment_levy' >>> derive_subtype_with_blam("1033-25", "ערר על החלטת ועדה") 'building_permit' """ base = derive_subtype(case_number, practice_area) if not is_blam_subject(subject): return base # subject says it's בל"מ — return the matching extension_request_* variant. # For domain practice_area (axis B), use the direct mapping. if practice_area in DOMAIN_PRACTICE_AREAS: return _DOMAIN_TO_BLAM_SUBTYPE.get(practice_area, base) # For appeals_committee (axis A), derive from case_number digit. if practice_area == "appeals_committee": cn = case_number or "" m = _CASE_NUM.search(cn) or _PLAIN_NUM.search(cn) if m: first_digit = m.group(1)[0] blam = _APPEALS_COMMITTEE_DIGIT_TO_BLAM.get(first_digit) if blam: return blam return base def is_blam_subtype(appeal_subtype: str) -> bool: """True iff appeal_subtype is one of the extension_request_* variants. Useful for UI badges and routing logic that need to detect בל"מ cases regardless of which domain they belong to. """ return appeal_subtype in BLAM_SUBTYPES def derive_proceeding_type(*, appeal_subtype: str = "", subject: str = "") -> str: """Return 'בל"מ' / 'ערר' for appeals-committee decisions/cases. Priority: explicit subtype prefix → subject regex → default 'ערר'. """ if appeal_subtype and appeal_subtype.startswith("extension_request_"): return 'בל"מ' if subject and is_blam_subject(subject): return 'בל"מ' return "ערר" def derive_domain_practice_area(case_number: str) -> str: """Map a case_number prefix to a domain practice_area (axis B). Returns: ``"rishuy_uvniya"`` for 1xxx, ``"betterment_levy"`` for 8xxx, ``"compensation_197"`` for 9xxx, or ``""`` when the prefix is unrecognized (caller decides the fallback). Examples: >>> derive_domain_practice_area("8126/25") 'betterment_levy' >>> derive_domain_practice_area("1170") 'rishuy_uvniya' >>> derive_domain_practice_area("ARAR-24-01-9007") 'compensation_197' >>> derive_domain_practice_area("foo") '' """ cn = case_number or "" m = _CASE_NUM.search(cn) or _PLAIN_NUM.search(cn) if not m: return "" first_digit = m.group(1)[0] subtype = _APPEALS_COMMITTEE_DIGIT_TO_SUBTYPE.get(first_digit) if not subtype: return "" return _SUBTYPE_TO_DOMAIN.get(subtype, "") # ── Validation ───────────────────────────────────────────────────── def validate(practice_area: str, appeal_subtype: str | None) -> None: """Raise ValueError on unknown values. appeal_subtype=None is allowed.""" if practice_area not in PRACTICE_AREAS: raise ValueError( f"unknown practice_area: {practice_area!r}. " f"expected one of {sorted(PRACTICE_AREAS)}" ) if appeal_subtype is None: return allowed = SUBTYPES_BY_AREA.get(practice_area, {"unknown"}) if appeal_subtype not in allowed: raise ValueError( f"unknown appeal_subtype {appeal_subtype!r} for practice_area " f"{practice_area!r}. expected one of {sorted(allowed)}" ) def is_override(case_number: str, practice_area: str, appeal_subtype: str) -> bool: """True iff the user-supplied subtype disagrees with what derive_subtype would have produced (and the derived value is not 'unknown'). Note: בל"מ variants (extension_request_*) are NOT considered overrides of their parent domain — extension_request_building_permit on a 1xxx case is consistent with the case-number convention. """ derived = derive_subtype(case_number, practice_area) if derived == "unknown": return False if derived == appeal_subtype: return False # בל"מ variants of the same domain are not overrides. if appeal_subtype in BLAM_SUBTYPES: # extension_request_building_permit ↔ building_permit (1xxx) — same domain if _SUBTYPE_TO_DOMAIN.get(appeal_subtype) == _SUBTYPE_TO_DOMAIN.get(derived): return False return True