#!/usr/bin/env python3 """Import 6 final signed decisions: extract text, store in DB.""" import asyncio import json import sys from datetime import date from pathlib import Path from uuid import UUID sys.path.insert(0, str(Path(__file__).parent.parent / "mcp-server" / "src")) import fitz # PyMuPDF from docx import Document as DocxDocument from legal_mcp.services.db import get_pool, init_schema, close_pool # ═══════════════════════════════════════════════════════════════════ # 6 Final Decisions # ═══════════════════════════════════════════════════════════════════ FINAL_DECISIONS = [ { "case_number": "1180-1181", "file_path": "legacy/dafna-tamir/04_Archive/ערר 1180-1181 הכט/החלטה/הכט 1180-1181.pdf", "title": "החלטה סופית — הכט 1180-1181", "outcome": "rejected", "decision_date": date(2026, 2, 5), }, { "case_number": "8255-25", "file_path": "legacy/dafna-tamir/04_Archive/בל\"מ 8255-25 אפרים אבי נ' הוועדה המקומית לתכנון ובניה/החלטה/אליהו הרנון - להפצה.docx", "title": "החלטה סופית — אפרים אבי 8255-25", "outcome": "rejected", "decision_date": None, }, { "case_number": "8007-24", "file_path": "legacy/dafna-tamir/04_Archive/ערר 8007-24-עומר דרוויש-ערר על שומה מכרעת/החלטה/החלטה-סופית.docx", "title": "החלטה סופית — עומר דרוויש 8007-24", "outcome": "", "decision_date": None, }, { "case_number": "1113/25", "file_path": "legacy/dafna-tamir/04_Archive/ערר-1113-25-אייל-מבורך/החלטה/החלטה-1113-25-טיוטה-סופית.docx", "title": "החלטה סופית — מבורך 1113-25", "outcome": "", "decision_date": None, }, { "case_number": "1126/25+1141/25", "file_path": "legacy/dafna-tamir/04_Archive/ערר-1126-25-תמא-38-בית-הכרם/החלטה/בית הכרם-טיוטת החלטה-9.pdf", "title": "החלטה סופית — בית הכרם 1126/25", "outcome": "partial", "decision_date": date(2026, 3, 1), }, { "case_number": "1128/25", "file_path": "legacy/dafna-tamir/04_Archive/ערר-1128-25-שטרית/החלטה/1128-25 החלטה להפצה.pdf", "title": "החלטה סופית — שטרית 1128-25", "outcome": "", "decision_date": None, }, ] PROJECT_ROOT = Path(__file__).parent.parent def extract_pdf_text(file_path: Path) -> str: """Extract text from PDF using PyMuPDF.""" doc = fitz.open(str(file_path)) text_parts = [] for page in doc: text_parts.append(page.get_text()) doc.close() return "\n".join(text_parts) def extract_docx_text(file_path: Path) -> str: """Extract text from DOCX.""" doc = DocxDocument(str(file_path)) return "\n".join(p.text for p in doc.paragraphs if p.text.strip()) def extract_text(file_path: Path) -> str: """Extract text based on file extension.""" suffix = file_path.suffix.lower() if suffix == ".pdf": return extract_pdf_text(file_path) elif suffix == ".docx": return extract_docx_text(file_path) else: raise ValueError(f"Unsupported format: {suffix}") def count_words(text: str) -> int: return len(text.split()) async def main(): await init_schema() pool = await get_pool() for d in FINAL_DECISIONS: file_path = PROJECT_ROOT / d["file_path"] if not file_path.exists(): print(f"❌ קובץ לא נמצא: {file_path}") continue # Extract text print(f"\nמחלץ טקסט: {d['title']}...") text = extract_text(file_path) word_count = count_words(text) print(f" {word_count} מילים, {len(text)} תווים") async with pool.acquire() as conn: # Get case_id case_id = await conn.fetchval( "SELECT id FROM cases WHERE case_number = $1", d["case_number"] ) if not case_id: print(f" ⚠ תיק {d['case_number']} לא נמצא ב-DB — מדלג") continue # Register document existing_doc = await conn.fetchval( "SELECT id FROM documents WHERE file_path = $1", str(file_path), ) if existing_doc: doc_id = existing_doc print(f" מסמך כבר קיים ב-DB: {doc_id}") # Update text await conn.execute( """UPDATE documents SET extracted_text = $1, extraction_status = 'completed' WHERE id = $2""", text, doc_id, ) else: doc_id = await conn.fetchval( """INSERT INTO documents (case_id, doc_type, title, file_path, extracted_text, extraction_status, page_count) VALUES ($1, 'decision', $2, $3, $4, 'completed', $5) RETURNING id""", case_id, d["title"], str(file_path), text, len(fitz.open(str(file_path))) if file_path.suffix == ".pdf" else None, ) print(f" מסמך נרשם: {doc_id}") # Create/update decision record existing_decision = await conn.fetchval( "SELECT id FROM decisions WHERE case_id = $1", case_id ) if existing_decision: await conn.execute( """UPDATE decisions SET status = 'final', outcome = $1, total_words = $2, decision_date = $3, updated_at = now() WHERE id = $4""", d["outcome"], word_count, d["decision_date"], existing_decision, ) decision_id = existing_decision print(f" החלטה עודכנה: {decision_id}") else: decision_id = await conn.fetchval( """INSERT INTO decisions (case_id, version, status, outcome, outcome_summary, total_words, decision_date, author) VALUES ($1, 1, 'final', $2, $3, $4, $5, 'דפנה תמיר') RETURNING id""", case_id, d["outcome"], d["title"], word_count, d["decision_date"], ) print(f" החלטה נוצרה: {decision_id}") # Update case status await conn.execute( "UPDATE cases SET status = 'final', expected_outcome = $1, updated_at = now() WHERE id = $2", d["outcome"], case_id, ) print(f" ✅ הושלם: {d['case_number']}") # Summary async with pool.acquire() as conn: doc_count = await conn.fetchval( "SELECT count(*) FROM documents WHERE doc_type = 'decision' AND extraction_status = 'completed'" ) dec_count = await conn.fetchval( "SELECT count(*) FROM decisions WHERE status = 'final'" ) total_words = await conn.fetchval( "SELECT sum(total_words) FROM decisions WHERE status = 'final'" ) await close_pool() print(f"\n{'='*50}") print(f"✅ סה\"כ מסמכי החלטה: {doc_count}") print(f"✅ סה\"כ החלטות סופיות: {dec_count}") print(f"✅ סה\"כ מילים: {total_words:,}") if __name__ == "__main__": asyncio.run(main())