Clean up legacy references: update CLAUDE.md, remove dead import script
- CLAUDE.md: clarify vault was deleted, knowledge is in docs/+training/ - Remove import-final-decisions.py (migration completed, all decisions in DB) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,202 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""Import 6 final signed decisions: extract text, store in DB."""
|
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import json
|
|
||||||
import sys
|
|
||||||
from datetime import date
|
|
||||||
from pathlib import Path
|
|
||||||
from uuid import UUID
|
|
||||||
|
|
||||||
sys.path.insert(0, str(Path(__file__).parent.parent / "mcp-server" / "src"))
|
|
||||||
|
|
||||||
import fitz # PyMuPDF
|
|
||||||
from docx import Document as DocxDocument
|
|
||||||
|
|
||||||
from legal_mcp.services.db import get_pool, init_schema, close_pool
|
|
||||||
|
|
||||||
|
|
||||||
# ═══════════════════════════════════════════════════════════════════
|
|
||||||
# 6 Final Decisions
|
|
||||||
# ═══════════════════════════════════════════════════════════════════
|
|
||||||
|
|
||||||
FINAL_DECISIONS = [
|
|
||||||
{
|
|
||||||
"case_number": "1180-1181",
|
|
||||||
"file_path": "legacy/dafna-tamir/04_Archive/ערר 1180-1181 הכט/החלטה/הכט 1180-1181.pdf",
|
|
||||||
"title": "החלטה סופית — הכט 1180-1181",
|
|
||||||
"outcome": "rejected",
|
|
||||||
"decision_date": date(2026, 2, 5),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"case_number": "8255-25",
|
|
||||||
"file_path": "legacy/dafna-tamir/04_Archive/בל\"מ 8255-25 אפרים אבי נ' הוועדה המקומית לתכנון ובניה/החלטה/אליהו הרנון - להפצה.docx",
|
|
||||||
"title": "החלטה סופית — אפרים אבי 8255-25",
|
|
||||||
"outcome": "rejected",
|
|
||||||
"decision_date": None,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"case_number": "8007-24",
|
|
||||||
"file_path": "legacy/dafna-tamir/04_Archive/ערר 8007-24-עומר דרוויש-ערר על שומה מכרעת/החלטה/החלטה-סופית.docx",
|
|
||||||
"title": "החלטה סופית — עומר דרוויש 8007-24",
|
|
||||||
"outcome": "",
|
|
||||||
"decision_date": None,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"case_number": "1113/25",
|
|
||||||
"file_path": "legacy/dafna-tamir/04_Archive/ערר-1113-25-אייל-מבורך/החלטה/החלטה-1113-25-טיוטה-סופית.docx",
|
|
||||||
"title": "החלטה סופית — מבורך 1113-25",
|
|
||||||
"outcome": "",
|
|
||||||
"decision_date": None,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"case_number": "1126/25+1141/25",
|
|
||||||
"file_path": "legacy/dafna-tamir/04_Archive/ערר-1126-25-תמא-38-בית-הכרם/החלטה/בית הכרם-טיוטת החלטה-9.pdf",
|
|
||||||
"title": "החלטה סופית — בית הכרם 1126/25",
|
|
||||||
"outcome": "partial",
|
|
||||||
"decision_date": date(2026, 3, 1),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"case_number": "1128/25",
|
|
||||||
"file_path": "legacy/dafna-tamir/04_Archive/ערר-1128-25-שטרית/החלטה/1128-25 החלטה להפצה.pdf",
|
|
||||||
"title": "החלטה סופית — שטרית 1128-25",
|
|
||||||
"outcome": "",
|
|
||||||
"decision_date": None,
|
|
||||||
},
|
|
||||||
]
|
|
||||||
|
|
||||||
PROJECT_ROOT = Path(__file__).parent.parent
|
|
||||||
|
|
||||||
|
|
||||||
def extract_pdf_text(file_path: Path) -> str:
|
|
||||||
"""Extract text from PDF using PyMuPDF."""
|
|
||||||
doc = fitz.open(str(file_path))
|
|
||||||
text_parts = []
|
|
||||||
for page in doc:
|
|
||||||
text_parts.append(page.get_text())
|
|
||||||
doc.close()
|
|
||||||
return "\n".join(text_parts)
|
|
||||||
|
|
||||||
|
|
||||||
def extract_docx_text(file_path: Path) -> str:
|
|
||||||
"""Extract text from DOCX."""
|
|
||||||
doc = DocxDocument(str(file_path))
|
|
||||||
return "\n".join(p.text for p in doc.paragraphs if p.text.strip())
|
|
||||||
|
|
||||||
|
|
||||||
def extract_text(file_path: Path) -> str:
|
|
||||||
"""Extract text based on file extension."""
|
|
||||||
suffix = file_path.suffix.lower()
|
|
||||||
if suffix == ".pdf":
|
|
||||||
return extract_pdf_text(file_path)
|
|
||||||
elif suffix == ".docx":
|
|
||||||
return extract_docx_text(file_path)
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Unsupported format: {suffix}")
|
|
||||||
|
|
||||||
|
|
||||||
def count_words(text: str) -> int:
|
|
||||||
return len(text.split())
|
|
||||||
|
|
||||||
|
|
||||||
async def main():
|
|
||||||
await init_schema()
|
|
||||||
pool = await get_pool()
|
|
||||||
|
|
||||||
for d in FINAL_DECISIONS:
|
|
||||||
file_path = PROJECT_ROOT / d["file_path"]
|
|
||||||
if not file_path.exists():
|
|
||||||
print(f"❌ קובץ לא נמצא: {file_path}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Extract text
|
|
||||||
print(f"\nמחלץ טקסט: {d['title']}...")
|
|
||||||
text = extract_text(file_path)
|
|
||||||
word_count = count_words(text)
|
|
||||||
print(f" {word_count} מילים, {len(text)} תווים")
|
|
||||||
|
|
||||||
async with pool.acquire() as conn:
|
|
||||||
# Get case_id
|
|
||||||
case_id = await conn.fetchval(
|
|
||||||
"SELECT id FROM cases WHERE case_number = $1", d["case_number"]
|
|
||||||
)
|
|
||||||
if not case_id:
|
|
||||||
print(f" ⚠ תיק {d['case_number']} לא נמצא ב-DB — מדלג")
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Register document
|
|
||||||
existing_doc = await conn.fetchval(
|
|
||||||
"SELECT id FROM documents WHERE file_path = $1",
|
|
||||||
str(file_path),
|
|
||||||
)
|
|
||||||
if existing_doc:
|
|
||||||
doc_id = existing_doc
|
|
||||||
print(f" מסמך כבר קיים ב-DB: {doc_id}")
|
|
||||||
# Update text
|
|
||||||
await conn.execute(
|
|
||||||
"""UPDATE documents SET extracted_text = $1, extraction_status = 'completed'
|
|
||||||
WHERE id = $2""",
|
|
||||||
text, doc_id,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
doc_id = await conn.fetchval(
|
|
||||||
"""INSERT INTO documents (case_id, doc_type, title, file_path, extracted_text, extraction_status, page_count)
|
|
||||||
VALUES ($1, 'decision', $2, $3, $4, 'completed', $5)
|
|
||||||
RETURNING id""",
|
|
||||||
case_id, d["title"], str(file_path), text,
|
|
||||||
len(fitz.open(str(file_path))) if file_path.suffix == ".pdf" else None,
|
|
||||||
)
|
|
||||||
print(f" מסמך נרשם: {doc_id}")
|
|
||||||
|
|
||||||
# Create/update decision record
|
|
||||||
existing_decision = await conn.fetchval(
|
|
||||||
"SELECT id FROM decisions WHERE case_id = $1", case_id
|
|
||||||
)
|
|
||||||
if existing_decision:
|
|
||||||
await conn.execute(
|
|
||||||
"""UPDATE decisions SET status = 'final', outcome = $1, total_words = $2,
|
|
||||||
decision_date = $3, updated_at = now() WHERE id = $4""",
|
|
||||||
d["outcome"], word_count, d["decision_date"], existing_decision,
|
|
||||||
)
|
|
||||||
decision_id = existing_decision
|
|
||||||
print(f" החלטה עודכנה: {decision_id}")
|
|
||||||
else:
|
|
||||||
decision_id = await conn.fetchval(
|
|
||||||
"""INSERT INTO decisions (case_id, version, status, outcome, outcome_summary,
|
|
||||||
total_words, decision_date, author)
|
|
||||||
VALUES ($1, 1, 'final', $2, $3, $4, $5, 'דפנה תמיר')
|
|
||||||
RETURNING id""",
|
|
||||||
case_id, d["outcome"], d["title"], word_count, d["decision_date"],
|
|
||||||
)
|
|
||||||
print(f" החלטה נוצרה: {decision_id}")
|
|
||||||
|
|
||||||
# Update case status
|
|
||||||
await conn.execute(
|
|
||||||
"UPDATE cases SET status = 'final', expected_outcome = $1, updated_at = now() WHERE id = $2",
|
|
||||||
d["outcome"], case_id,
|
|
||||||
)
|
|
||||||
|
|
||||||
print(f" ✅ הושלם: {d['case_number']}")
|
|
||||||
|
|
||||||
# Summary
|
|
||||||
async with pool.acquire() as conn:
|
|
||||||
doc_count = await conn.fetchval(
|
|
||||||
"SELECT count(*) FROM documents WHERE doc_type = 'decision' AND extraction_status = 'completed'"
|
|
||||||
)
|
|
||||||
dec_count = await conn.fetchval(
|
|
||||||
"SELECT count(*) FROM decisions WHERE status = 'final'"
|
|
||||||
)
|
|
||||||
total_words = await conn.fetchval(
|
|
||||||
"SELECT sum(total_words) FROM decisions WHERE status = 'final'"
|
|
||||||
)
|
|
||||||
|
|
||||||
await close_pool()
|
|
||||||
|
|
||||||
print(f"\n{'='*50}")
|
|
||||||
print(f"✅ סה\"כ מסמכי החלטה: {doc_count}")
|
|
||||||
print(f"✅ סה\"כ החלטות סופיות: {dec_count}")
|
|
||||||
print(f"✅ סה\"כ מילים: {total_words:,}")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
asyncio.run(main())
|
|
||||||
Reference in New Issue
Block a user