#!/usr/bin/env python3 """One-off data fix for TaskMaster #137 — committee precedent whose identifier field was polluted by a full citation. Background: ``missing_precedent_upload`` (committee branch) fell back to the raw ``citation`` when the form left ``case_number`` blank, so a citation like ``ערר (ת"א 85074-04-25) רפאל לוי ואח' נ' הוועדה … - חולון`` landed as the identifier / display name (INV-ID2 violation). The code fix (PR for #137) prevents recurrence; this script corrects the one already-stored row. Scope: a SINGLE internal_committee row (1bf0bae0). Verified the only internal_committee row where ``_canonical_case_number(case_number) != case_number`` (the legacy 138 "polluted" matches are external/cited_only PREFIXED numbers — the X1 §5 external-identifier item, deliberately out of scope here). The row has ``document_id = NULL`` (no file/storage key to rename) and 0 incoming citations, so only three columns change: ``case_number``, ``case_name``, ``citation_formatted``. Idempotent: if the row already carries the canonical number it is a no-op. Dry-run by default; pass ``--apply`` to write. Run (local, reads ~/.env for POSTGRES_URL): HOME=/home/chaim PYTHONPATH=mcp-server/src python scripts/fix_137_committee_case_number.py [--apply] """ from __future__ import annotations import asyncio import sys import asyncpg from legal_mcp import config # The verified target row + its corrected values (see module docstring). CASE_LAW_ID = "1bf0bae0-1cb7-4110-ba1b-b956e42b0355" BAD_CASE_NUMBER = "85074/0425" NEW_CASE_NUMBER = "85074-04-25" NEW_CASE_NAME = "רפאל לוי ואח' נ' הוועדה המקומית לתכנון ובניה - חולון" async def main(apply: bool) -> int: conn = await asyncpg.connect(config.POSTGRES_URL) try: row = await conn.fetchrow( "SELECT case_number, case_name, citation_formatted, proceeding_type " "FROM case_law WHERE id = $1", CASE_LAW_ID, ) if row is None: print(f"row {CASE_LAW_ID} not found — nothing to do") return 0 if row["case_number"] == NEW_CASE_NUMBER: print(f"already canonical (case_number={NEW_CASE_NUMBER!r}) — no-op") return 0 if row["case_number"] != BAD_CASE_NUMBER: print( f"UNEXPECTED current case_number={row['case_number']!r} " f"(expected {BAD_CASE_NUMBER!r}) — refusing to guess; inspect manually" ) return 1 # Collision guard: the (case_number, proceeding_type) partial-unique key. clash = await conn.fetchval( "SELECT id FROM case_law WHERE source_kind='internal_committee' " "AND case_number = $1 AND proceeding_type = $2 AND id <> $3", NEW_CASE_NUMBER, row["proceeding_type"], CASE_LAW_ID, ) if clash: print(f"COLLISION: {NEW_CASE_NUMBER!r}/{row['proceeding_type']!r} " f"already exists as {clash} — aborting") return 1 new_citation = (row["citation_formatted"] or "").replace( BAD_CASE_NUMBER, NEW_CASE_NUMBER) print("WILL UPDATE:") print(f" case_number: {row['case_number']!r} -> {NEW_CASE_NUMBER!r}") print(f" case_name: {row['case_name']!r}\n -> {NEW_CASE_NAME!r}") print(f" citation_formatted: {row['citation_formatted']!r}\n" f" -> {new_citation!r}") if not apply: print("\n(dry-run — pass --apply to write)") return 0 await conn.execute( "UPDATE case_law SET case_number = $2, case_name = $3, " "citation_formatted = $4 WHERE id = $1", CASE_LAW_ID, NEW_CASE_NUMBER, NEW_CASE_NAME, new_citation, ) print("\n✓ updated") return 0 finally: await conn.close() if __name__ == "__main__": sys.exit(asyncio.run(main("--apply" in sys.argv)))