feat(ingest): write-time canonical case_number normalization (GAP-06, FU-2a)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-30 20:42:47 +00:00
parent bcd226ac1a
commit 2b91173f25

View File

@@ -1155,7 +1155,7 @@ async def create_case(
hearing_date, notes, expected_outcome,
practice_area, appeal_subtype, proceeding_type)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15)""",
case_id, case_number, title,
case_id, _canonical_case_number(case_number), title,
json.dumps(appellants or []),
json.dumps(respondents or []),
subject, property_address, permit_number, committee_type,
@@ -1211,6 +1211,21 @@ def _normalize_case_number(s: str) -> str:
return s.strip().replace("/", "-")
def _canonical_case_number(s: str) -> str:
"""Canonical write-time form per X1 §1: trim · prefix-strip · '/''-'.
Deterministic and format-only — does NOT add or remove a month segment.
Used at the write boundary for identifier-keyed corpora (internal
committee decisions, active cases). NOT for external precedents, whose
canonical identifier is the full citation.
"""
s = (s or "").strip()
m = re.search(r"\d", s)
if m:
s = s[m.start():]
return s.strip().replace("/", "-")
async def get_case_by_number(case_number: str) -> dict | None:
pool = await get_pool()
norm = _normalize_case_number(case_number)
@@ -2647,6 +2662,7 @@ async def create_internal_committee_decision(
filed against an existing appeal with the same number).
"""
pool = await get_pool()
case_number = _canonical_case_number(case_number)
tags_json = json.dumps(subject_tags or [], ensure_ascii=False)
async with pool.acquire() as conn:
existing = await conn.fetchrow(