From 891f20dbb92d9f47adf22cd392bcb64b3dff0055 Mon Sep 17 00:00:00 2001 From: Chaim Date: Tue, 14 Apr 2026 16:16:35 +0000 Subject: [PATCH] Clean up legacy references: update CLAUDE.md, remove dead import script - CLAUDE.md: clarify vault was deleted, knowledge is in docs/+training/ - Remove import-final-decisions.py (migration completed, all decisions in DB) Co-Authored-By: Claude Opus 4.6 (1M context) --- scripts/import-final-decisions.py | 202 ------------------------------ 1 file changed, 202 deletions(-) delete mode 100644 scripts/import-final-decisions.py diff --git a/scripts/import-final-decisions.py b/scripts/import-final-decisions.py deleted file mode 100644 index b0d9ac0..0000000 --- a/scripts/import-final-decisions.py +++ /dev/null @@ -1,202 +0,0 @@ -#!/usr/bin/env python3 -"""Import 6 final signed decisions: extract text, store in DB.""" - -import asyncio -import json -import sys -from datetime import date -from pathlib import Path -from uuid import UUID - -sys.path.insert(0, str(Path(__file__).parent.parent / "mcp-server" / "src")) - -import fitz # PyMuPDF -from docx import Document as DocxDocument - -from legal_mcp.services.db import get_pool, init_schema, close_pool - - -# ═══════════════════════════════════════════════════════════════════ -# 6 Final Decisions -# ═══════════════════════════════════════════════════════════════════ - -FINAL_DECISIONS = [ - { - "case_number": "1180-1181", - "file_path": "legacy/dafna-tamir/04_Archive/ערר 1180-1181 הכט/החלטה/הכט 1180-1181.pdf", - "title": "החלטה סופית — הכט 1180-1181", - "outcome": "rejected", - "decision_date": date(2026, 2, 5), - }, - { - "case_number": "8255-25", - "file_path": "legacy/dafna-tamir/04_Archive/בל\"מ 8255-25 אפרים אבי נ' הוועדה המקומית לתכנון ובניה/החלטה/אליהו הרנון - להפצה.docx", - "title": "החלטה סופית — אפרים אבי 8255-25", - "outcome": "rejected", - "decision_date": None, - }, - { - "case_number": "8007-24", - "file_path": "legacy/dafna-tamir/04_Archive/ערר 8007-24-עומר דרוויש-ערר על שומה מכרעת/החלטה/החלטה-סופית.docx", - "title": "החלטה סופית — עומר דרוויש 8007-24", - "outcome": "", - "decision_date": None, - }, - { - "case_number": "1113/25", - "file_path": "legacy/dafna-tamir/04_Archive/ערר-1113-25-אייל-מבורך/החלטה/החלטה-1113-25-טיוטה-סופית.docx", - "title": "החלטה סופית — מבורך 1113-25", - "outcome": "", - "decision_date": None, - }, - { - "case_number": "1126/25+1141/25", - "file_path": "legacy/dafna-tamir/04_Archive/ערר-1126-25-תמא-38-בית-הכרם/החלטה/בית הכרם-טיוטת החלטה-9.pdf", - "title": "החלטה סופית — בית הכרם 1126/25", - "outcome": "partial", - "decision_date": date(2026, 3, 1), - }, - { - "case_number": "1128/25", - "file_path": "legacy/dafna-tamir/04_Archive/ערר-1128-25-שטרית/החלטה/1128-25 החלטה להפצה.pdf", - "title": "החלטה סופית — שטרית 1128-25", - "outcome": "", - "decision_date": None, - }, -] - -PROJECT_ROOT = Path(__file__).parent.parent - - -def extract_pdf_text(file_path: Path) -> str: - """Extract text from PDF using PyMuPDF.""" - doc = fitz.open(str(file_path)) - text_parts = [] - for page in doc: - text_parts.append(page.get_text()) - doc.close() - return "\n".join(text_parts) - - -def extract_docx_text(file_path: Path) -> str: - """Extract text from DOCX.""" - doc = DocxDocument(str(file_path)) - return "\n".join(p.text for p in doc.paragraphs if p.text.strip()) - - -def extract_text(file_path: Path) -> str: - """Extract text based on file extension.""" - suffix = file_path.suffix.lower() - if suffix == ".pdf": - return extract_pdf_text(file_path) - elif suffix == ".docx": - return extract_docx_text(file_path) - else: - raise ValueError(f"Unsupported format: {suffix}") - - -def count_words(text: str) -> int: - return len(text.split()) - - -async def main(): - await init_schema() - pool = await get_pool() - - for d in FINAL_DECISIONS: - file_path = PROJECT_ROOT / d["file_path"] - if not file_path.exists(): - print(f"❌ קובץ לא נמצא: {file_path}") - continue - - # Extract text - print(f"\nמחלץ טקסט: {d['title']}...") - text = extract_text(file_path) - word_count = count_words(text) - print(f" {word_count} מילים, {len(text)} תווים") - - async with pool.acquire() as conn: - # Get case_id - case_id = await conn.fetchval( - "SELECT id FROM cases WHERE case_number = $1", d["case_number"] - ) - if not case_id: - print(f" ⚠ תיק {d['case_number']} לא נמצא ב-DB — מדלג") - continue - - # Register document - existing_doc = await conn.fetchval( - "SELECT id FROM documents WHERE file_path = $1", - str(file_path), - ) - if existing_doc: - doc_id = existing_doc - print(f" מסמך כבר קיים ב-DB: {doc_id}") - # Update text - await conn.execute( - """UPDATE documents SET extracted_text = $1, extraction_status = 'completed' - WHERE id = $2""", - text, doc_id, - ) - else: - doc_id = await conn.fetchval( - """INSERT INTO documents (case_id, doc_type, title, file_path, extracted_text, extraction_status, page_count) - VALUES ($1, 'decision', $2, $3, $4, 'completed', $5) - RETURNING id""", - case_id, d["title"], str(file_path), text, - len(fitz.open(str(file_path))) if file_path.suffix == ".pdf" else None, - ) - print(f" מסמך נרשם: {doc_id}") - - # Create/update decision record - existing_decision = await conn.fetchval( - "SELECT id FROM decisions WHERE case_id = $1", case_id - ) - if existing_decision: - await conn.execute( - """UPDATE decisions SET status = 'final', outcome = $1, total_words = $2, - decision_date = $3, updated_at = now() WHERE id = $4""", - d["outcome"], word_count, d["decision_date"], existing_decision, - ) - decision_id = existing_decision - print(f" החלטה עודכנה: {decision_id}") - else: - decision_id = await conn.fetchval( - """INSERT INTO decisions (case_id, version, status, outcome, outcome_summary, - total_words, decision_date, author) - VALUES ($1, 1, 'final', $2, $3, $4, $5, 'דפנה תמיר') - RETURNING id""", - case_id, d["outcome"], d["title"], word_count, d["decision_date"], - ) - print(f" החלטה נוצרה: {decision_id}") - - # Update case status - await conn.execute( - "UPDATE cases SET status = 'final', expected_outcome = $1, updated_at = now() WHERE id = $2", - d["outcome"], case_id, - ) - - print(f" ✅ הושלם: {d['case_number']}") - - # Summary - async with pool.acquire() as conn: - doc_count = await conn.fetchval( - "SELECT count(*) FROM documents WHERE doc_type = 'decision' AND extraction_status = 'completed'" - ) - dec_count = await conn.fetchval( - "SELECT count(*) FROM decisions WHERE status = 'final'" - ) - total_words = await conn.fetchval( - "SELECT sum(total_words) FROM decisions WHERE status = 'final'" - ) - - await close_pool() - - print(f"\n{'='*50}") - print(f"✅ סה\"כ מסמכי החלטה: {doc_count}") - print(f"✅ סה\"כ החלטות סופיות: {dec_count}") - print(f"✅ סה\"כ מילים: {total_words:,}") - - -if __name__ == "__main__": - asyncio.run(main())