"""Backfill case.status after the 17 → 10 status-menu trim. Why this exists: the manual status menu was trimmed from 17 to 10 core statuses (decorative mid-stage markers that no pipeline code ever set were removed). Existing rows that currently hold a removed status would otherwise be "stuck" on a value no longer in the dropdown / SSoT, rendering via the Hebrew legacy fallback. This maps each removed status to the nearest *preceding* kept status in the lifecycle order, so a case keeps the closest truthful position. Mapping (removed → kept): uploading → processing in_progress → outcome_set analyst_verified → documents_ready research_complete → documents_ready brainstorming → outcome_set analysis_enriched → direction_approved ready_for_writing → direction_approved drafting → direction_approved qa_failed → qa_review Idempotent: a second run is a no-op (no rows match the removed statuses). Dry-run by default — prints the affected counts; pass --apply to write. Usage (runs inside the legal-ai container — shared Postgres on :5433): docker cp scripts/backfill_case_status_trim.py :/tmp/ docker exec python /tmp/backfill_case_status_trim.py # dry-run docker exec python /tmp/backfill_case_status_trim.py --apply # write """ from __future__ import annotations import argparse import asyncio import logging import sys from pathlib import Path def _setup_paths(): here = Path(__file__).resolve().parent mcp_src = here.parent / "mcp-server" / "src" if mcp_src.is_dir() and str(mcp_src) not in sys.path: sys.path.insert(0, str(mcp_src)) _setup_paths() from legal_mcp.services import db # noqa: E402 logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s") log = logging.getLogger("status-trim") # removed status → nearest preceding kept status STATUS_MAP = { "uploading": "processing", "in_progress": "outcome_set", "analyst_verified": "documents_ready", "research_complete": "documents_ready", "brainstorming": "outcome_set", "analysis_enriched": "direction_approved", "ready_for_writing": "direction_approved", "drafting": "direction_approved", "qa_failed": "qa_review", } async def backfill(apply: bool) -> int: pool = await db.get_pool() # Show the full current distribution for context. dist = await pool.fetch("SELECT status, count(*) AS n FROM cases GROUP BY status ORDER BY n DESC") log.info("Current status distribution:") for r in dist: log.info(" %-22s %d", r["status"], r["n"]) affected = {r["status"]: r["n"] for r in dist if r["status"] in STATUS_MAP} total = sum(affected.values()) if not total: log.info("Nothing to migrate — no rows hold a removed status. ✓") return 0 log.info("Rows to migrate (%d total):", total) for old, n in affected.items(): log.info(" %-22s → %-20s (%d)", old, STATUS_MAP[old], n) if not apply: log.info("DRY-RUN — no changes written. Re-run with --apply to migrate.") return total migrated = 0 for old, new in STATUS_MAP.items(): if old not in affected: continue res = await pool.execute( "UPDATE cases SET status = $1, updated_at = now() WHERE status = $2", new, old, ) # res like "UPDATE 3" n = int(res.split()[-1]) if res and res.split()[-1].isdigit() else 0 migrated += n log.info(" migrated %-22s → %-20s (%d)", old, new, n) log.info("Done — migrated %d rows.", migrated) return migrated def main() -> int: parser = argparse.ArgumentParser(description="Backfill case.status after the 17→10 status trim") parser.add_argument("--apply", action="store_true", help="Write changes (default: dry-run)") args = parser.parse_args() return 0 if asyncio.run(backfill(args.apply)) >= 0 else 1 if __name__ == "__main__": raise SystemExit(main())