#!/usr/bin/env python3 """One-shot: extract appellant claims for case 8174-24. The analyst (CMPA-13) finished but `extract_claims` timed out three times on the main 25K-char appeal document, so we have only 19 committee/response claims in DB and zero appellant claims. This script reruns extraction with a higher timeout and parallel chunks. Targets: • כתב ערר 18.12.24 (appeal, 25,474 chars) — appellant claims • השלמת מסמכים תמ״א 38 (decision, 3,718 chars) — supplementary appeal filing After phase 1.1-1.3 lands, this script becomes obsolete. Usage: /home/chaim/legal-ai/mcp-server/.venv/bin/python scripts/extract_claims_8174.py """ from __future__ import annotations import asyncio import json import sys import time from pathlib import Path from uuid import UUID # Ensure we can import legal_mcp from this repo's mcp-server tree sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "mcp-server" / "src")) from legal_mcp.services import claims_extractor, claude_session, db # ── Patch claude_session to use 30-min ceiling ─────────────────────── # The hard-coded timeout=120 in claims_extractor.extract_claims_with_ai is # what kept failing. Force every claude_session call here to use 1800s. _orig_query_json = claude_session.query_json _orig_query = claude_session.query def _patched_query_json(prompt: str, timeout: int = 120): return _orig_query_json(prompt, timeout=max(timeout, 1800)) def _patched_query(prompt: str, timeout: int = 120, max_turns: int = 1): return _orig_query(prompt, timeout=max(timeout, 1800), max_turns=max_turns) claude_session.query_json = _patched_query_json claude_session.query = _patched_query CASE_NUMBER = "8174-24" TARGETS = [ # (doc_id, title hint, doc_type override, party_hint) ("655f96f7-d406-44ac-bb53-6b2c1ab2909c", "כתב ערר 18.12.24", "appeal", "יואל גולדמן"), ("13b4795a-4fb7-460e-bddf-a5d282a1a67f", "השלמת מסמכים תמ״א 38", "appeal", "יואל גולדמן"), ] async def main() -> int: case = await db.get_case_by_number(CASE_NUMBER) if not case: print(f"ERROR: case {CASE_NUMBER} not found") return 1 case_id = UUID(case["id"]) print(f"=== Case {CASE_NUMBER} — {case['title']} ===") print() for doc_id, label, doc_type, party_hint in TARGETS: text = await db.get_document_text(UUID(doc_id)) if not text: print(f"SKIP {label} — no extracted_text") continue chars = len(text) print(f"--- {label} ({chars:,} chars, doc_type={doc_type}) ---") t0 = time.monotonic() try: result = await claims_extractor.extract_and_store_claims( case_id=case_id, document_id=UUID(doc_id), text=text, doc_type=doc_type, party_hint=party_hint, ) except Exception as e: print(f" FAILED: {e}") continue dt = time.monotonic() - t0 print(f" done in {dt:.1f}s — {json.dumps(result, ensure_ascii=False)}") print() # Final tally pool = await db.get_pool() async with pool.acquire() as conn: rows = await conn.fetch( """SELECT party_role, claim_type, source_document, count(*) as n FROM claims WHERE case_id = $1 GROUP BY 1, 2, 3 ORDER BY 1, 3""", case_id, ) print("=== Final claims breakdown ===") total = 0 for r in rows: n = r["n"] total += n print(f" {r['party_role']:12} {r['claim_type']:10} ({n:3}) ← {r['source_document']}") print(f" TOTAL: {total} claims") return 0 if __name__ == "__main__": sys.exit(asyncio.run(main()))