Includes: - docs/: architecture, block-schema, migration-plan, product-specification - scripts/: bidi_table, decompose-decisions, extract-claims, seed-knowledge, etc. - skill-legal-decision/: SKILL.md + references + block-schema - skill-legal-assistant/: SKILL.md - skill-legal-docx/: SKILL.md + references - .claude/commands/: bidi-table skill - .taskmaster/: task config + PRDs - .gitignore: exclude legacy/, kiryat-yearim/, node_modules/, memory/ Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
203 lines
7.7 KiB
Python
203 lines
7.7 KiB
Python
#!/usr/bin/env python3
|
|
"""Import 6 final signed decisions: extract text, store in DB."""
|
|
|
|
import asyncio
|
|
import json
|
|
import sys
|
|
from datetime import date
|
|
from pathlib import Path
|
|
from uuid import UUID
|
|
|
|
sys.path.insert(0, str(Path(__file__).parent.parent / "mcp-server" / "src"))
|
|
|
|
import fitz # PyMuPDF
|
|
from docx import Document as DocxDocument
|
|
|
|
from legal_mcp.services.db import get_pool, init_schema, close_pool
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════════
|
|
# 6 Final Decisions
|
|
# ═══════════════════════════════════════════════════════════════════
|
|
|
|
FINAL_DECISIONS = [
|
|
{
|
|
"case_number": "1180-1181",
|
|
"file_path": "legacy/dafna-tamir/04_Archive/ערר 1180-1181 הכט/החלטה/הכט 1180-1181.pdf",
|
|
"title": "החלטה סופית — הכט 1180-1181",
|
|
"outcome": "rejected",
|
|
"decision_date": date(2026, 2, 5),
|
|
},
|
|
{
|
|
"case_number": "8255-25",
|
|
"file_path": "legacy/dafna-tamir/04_Archive/בל\"מ 8255-25 אפרים אבי נ' הוועדה המקומית לתכנון ובניה/החלטה/אליהו הרנון - להפצה.docx",
|
|
"title": "החלטה סופית — אפרים אבי 8255-25",
|
|
"outcome": "rejected",
|
|
"decision_date": None,
|
|
},
|
|
{
|
|
"case_number": "8007-24",
|
|
"file_path": "legacy/dafna-tamir/04_Archive/ערר 8007-24-עומר דרוויש-ערר על שומה מכרעת/החלטה/החלטה-סופית.docx",
|
|
"title": "החלטה סופית — עומר דרוויש 8007-24",
|
|
"outcome": "",
|
|
"decision_date": None,
|
|
},
|
|
{
|
|
"case_number": "1113/25",
|
|
"file_path": "legacy/dafna-tamir/04_Archive/ערר-1113-25-אייל-מבורך/החלטה/החלטה-1113-25-טיוטה-סופית.docx",
|
|
"title": "החלטה סופית — מבורך 1113-25",
|
|
"outcome": "",
|
|
"decision_date": None,
|
|
},
|
|
{
|
|
"case_number": "1126/25+1141/25",
|
|
"file_path": "legacy/dafna-tamir/04_Archive/ערר-1126-25-תמא-38-בית-הכרם/החלטה/בית הכרם-טיוטת החלטה-9.pdf",
|
|
"title": "החלטה סופית — בית הכרם 1126/25",
|
|
"outcome": "partial",
|
|
"decision_date": date(2026, 3, 1),
|
|
},
|
|
{
|
|
"case_number": "1128/25",
|
|
"file_path": "legacy/dafna-tamir/04_Archive/ערר-1128-25-שטרית/החלטה/1128-25 החלטה להפצה.pdf",
|
|
"title": "החלטה סופית — שטרית 1128-25",
|
|
"outcome": "",
|
|
"decision_date": None,
|
|
},
|
|
]
|
|
|
|
PROJECT_ROOT = Path(__file__).parent.parent
|
|
|
|
|
|
def extract_pdf_text(file_path: Path) -> str:
|
|
"""Extract text from PDF using PyMuPDF."""
|
|
doc = fitz.open(str(file_path))
|
|
text_parts = []
|
|
for page in doc:
|
|
text_parts.append(page.get_text())
|
|
doc.close()
|
|
return "\n".join(text_parts)
|
|
|
|
|
|
def extract_docx_text(file_path: Path) -> str:
|
|
"""Extract text from DOCX."""
|
|
doc = DocxDocument(str(file_path))
|
|
return "\n".join(p.text for p in doc.paragraphs if p.text.strip())
|
|
|
|
|
|
def extract_text(file_path: Path) -> str:
|
|
"""Extract text based on file extension."""
|
|
suffix = file_path.suffix.lower()
|
|
if suffix == ".pdf":
|
|
return extract_pdf_text(file_path)
|
|
elif suffix == ".docx":
|
|
return extract_docx_text(file_path)
|
|
else:
|
|
raise ValueError(f"Unsupported format: {suffix}")
|
|
|
|
|
|
def count_words(text: str) -> int:
|
|
return len(text.split())
|
|
|
|
|
|
async def main():
|
|
await init_schema()
|
|
pool = await get_pool()
|
|
|
|
for d in FINAL_DECISIONS:
|
|
file_path = PROJECT_ROOT / d["file_path"]
|
|
if not file_path.exists():
|
|
print(f"❌ קובץ לא נמצא: {file_path}")
|
|
continue
|
|
|
|
# Extract text
|
|
print(f"\nמחלץ טקסט: {d['title']}...")
|
|
text = extract_text(file_path)
|
|
word_count = count_words(text)
|
|
print(f" {word_count} מילים, {len(text)} תווים")
|
|
|
|
async with pool.acquire() as conn:
|
|
# Get case_id
|
|
case_id = await conn.fetchval(
|
|
"SELECT id FROM cases WHERE case_number = $1", d["case_number"]
|
|
)
|
|
if not case_id:
|
|
print(f" ⚠ תיק {d['case_number']} לא נמצא ב-DB — מדלג")
|
|
continue
|
|
|
|
# Register document
|
|
existing_doc = await conn.fetchval(
|
|
"SELECT id FROM documents WHERE file_path = $1",
|
|
str(file_path),
|
|
)
|
|
if existing_doc:
|
|
doc_id = existing_doc
|
|
print(f" מסמך כבר קיים ב-DB: {doc_id}")
|
|
# Update text
|
|
await conn.execute(
|
|
"""UPDATE documents SET extracted_text = $1, extraction_status = 'completed'
|
|
WHERE id = $2""",
|
|
text, doc_id,
|
|
)
|
|
else:
|
|
doc_id = await conn.fetchval(
|
|
"""INSERT INTO documents (case_id, doc_type, title, file_path, extracted_text, extraction_status, page_count)
|
|
VALUES ($1, 'decision', $2, $3, $4, 'completed', $5)
|
|
RETURNING id""",
|
|
case_id, d["title"], str(file_path), text,
|
|
len(fitz.open(str(file_path))) if file_path.suffix == ".pdf" else None,
|
|
)
|
|
print(f" מסמך נרשם: {doc_id}")
|
|
|
|
# Create/update decision record
|
|
existing_decision = await conn.fetchval(
|
|
"SELECT id FROM decisions WHERE case_id = $1", case_id
|
|
)
|
|
if existing_decision:
|
|
await conn.execute(
|
|
"""UPDATE decisions SET status = 'final', outcome = $1, total_words = $2,
|
|
decision_date = $3, updated_at = now() WHERE id = $4""",
|
|
d["outcome"], word_count, d["decision_date"], existing_decision,
|
|
)
|
|
decision_id = existing_decision
|
|
print(f" החלטה עודכנה: {decision_id}")
|
|
else:
|
|
decision_id = await conn.fetchval(
|
|
"""INSERT INTO decisions (case_id, version, status, outcome, outcome_summary,
|
|
total_words, decision_date, author)
|
|
VALUES ($1, 1, 'final', $2, $3, $4, $5, 'דפנה תמיר')
|
|
RETURNING id""",
|
|
case_id, d["outcome"], d["title"], word_count, d["decision_date"],
|
|
)
|
|
print(f" החלטה נוצרה: {decision_id}")
|
|
|
|
# Update case status
|
|
await conn.execute(
|
|
"UPDATE cases SET status = 'final', expected_outcome = $1, updated_at = now() WHERE id = $2",
|
|
d["outcome"], case_id,
|
|
)
|
|
|
|
print(f" ✅ הושלם: {d['case_number']}")
|
|
|
|
# Summary
|
|
async with pool.acquire() as conn:
|
|
doc_count = await conn.fetchval(
|
|
"SELECT count(*) FROM documents WHERE doc_type = 'decision' AND extraction_status = 'completed'"
|
|
)
|
|
dec_count = await conn.fetchval(
|
|
"SELECT count(*) FROM decisions WHERE status = 'final'"
|
|
)
|
|
total_words = await conn.fetchval(
|
|
"SELECT sum(total_words) FROM decisions WHERE status = 'final'"
|
|
)
|
|
|
|
await close_pool()
|
|
|
|
print(f"\n{'='*50}")
|
|
print(f"✅ סה\"כ מסמכי החלטה: {doc_count}")
|
|
print(f"✅ סה\"כ החלטות סופיות: {dec_count}")
|
|
print(f"✅ סה\"כ מילים: {total_words:,}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|