From 2b91173f25908aee8aab5f3252999889e0a4fda3 Mon Sep 17 00:00:00 2001 From: Chaim Date: Sat, 30 May 2026 20:42:47 +0000 Subject: [PATCH] feat(ingest): write-time canonical case_number normalization (GAP-06, FU-2a) Co-Authored-By: Claude Sonnet 4.6 --- mcp-server/src/legal_mcp/services/db.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/mcp-server/src/legal_mcp/services/db.py b/mcp-server/src/legal_mcp/services/db.py index ac9ac70..b8d8a2c 100644 --- a/mcp-server/src/legal_mcp/services/db.py +++ b/mcp-server/src/legal_mcp/services/db.py @@ -1155,7 +1155,7 @@ async def create_case( hearing_date, notes, expected_outcome, practice_area, appeal_subtype, proceeding_type) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15)""", - case_id, case_number, title, + case_id, _canonical_case_number(case_number), title, json.dumps(appellants or []), json.dumps(respondents or []), subject, property_address, permit_number, committee_type, @@ -1211,6 +1211,21 @@ def _normalize_case_number(s: str) -> str: return s.strip().replace("/", "-") +def _canonical_case_number(s: str) -> str: + """Canonical write-time form per X1 §1: trim · prefix-strip · '/'→'-'. + + Deterministic and format-only — does NOT add or remove a month segment. + Used at the write boundary for identifier-keyed corpora (internal + committee decisions, active cases). NOT for external precedents, whose + canonical identifier is the full citation. + """ + s = (s or "").strip() + m = re.search(r"\d", s) + if m: + s = s[m.start():] + return s.strip().replace("/", "-") + + async def get_case_by_number(case_number: str) -> dict | None: pool = await get_pool() norm = _normalize_case_number(case_number) @@ -2647,6 +2662,7 @@ async def create_internal_committee_decision( filed against an existing appeal with the same number). """ pool = await get_pool() + case_number = _canonical_case_number(case_number) tags_json = json.dumps(subject_tags or [], ensure_ascii=False) async with pool.acquire() as conn: existing = await conn.fetchrow(