Add docs, scripts, skills, commands, and taskmaster config to repo
Includes: - docs/: architecture, block-schema, migration-plan, product-specification - scripts/: bidi_table, decompose-decisions, extract-claims, seed-knowledge, etc. - skill-legal-decision/: SKILL.md + references + block-schema - skill-legal-assistant/: SKILL.md - skill-legal-docx/: SKILL.md + references - .claude/commands/: bidi-table skill - .taskmaster/: task config + PRDs - .gitignore: exclude legacy/, kiryat-yearim/, node_modules/, memory/ Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
202
scripts/import-final-decisions.py
Normal file
202
scripts/import-final-decisions.py
Normal file
@@ -0,0 +1,202 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Import 6 final signed decisions: extract text, store in DB."""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import sys
|
||||
from datetime import date
|
||||
from pathlib import Path
|
||||
from uuid import UUID
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "mcp-server" / "src"))
|
||||
|
||||
import fitz # PyMuPDF
|
||||
from docx import Document as DocxDocument
|
||||
|
||||
from legal_mcp.services.db import get_pool, init_schema, close_pool
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════
|
||||
# 6 Final Decisions
|
||||
# ═══════════════════════════════════════════════════════════════════
|
||||
|
||||
FINAL_DECISIONS = [
|
||||
{
|
||||
"case_number": "1180-1181",
|
||||
"file_path": "legacy/dafna-tamir/04_Archive/ערר 1180-1181 הכט/החלטה/הכט 1180-1181.pdf",
|
||||
"title": "החלטה סופית — הכט 1180-1181",
|
||||
"outcome": "rejected",
|
||||
"decision_date": date(2026, 2, 5),
|
||||
},
|
||||
{
|
||||
"case_number": "8255-25",
|
||||
"file_path": "legacy/dafna-tamir/04_Archive/בל\"מ 8255-25 אפרים אבי נ' הוועדה המקומית לתכנון ובניה/החלטה/אליהו הרנון - להפצה.docx",
|
||||
"title": "החלטה סופית — אפרים אבי 8255-25",
|
||||
"outcome": "rejected",
|
||||
"decision_date": None,
|
||||
},
|
||||
{
|
||||
"case_number": "8007-24",
|
||||
"file_path": "legacy/dafna-tamir/04_Archive/ערר 8007-24-עומר דרוויש-ערר על שומה מכרעת/החלטה/החלטה-סופית.docx",
|
||||
"title": "החלטה סופית — עומר דרוויש 8007-24",
|
||||
"outcome": "",
|
||||
"decision_date": None,
|
||||
},
|
||||
{
|
||||
"case_number": "1113/25",
|
||||
"file_path": "legacy/dafna-tamir/04_Archive/ערר-1113-25-אייל-מבורך/החלטה/החלטה-1113-25-טיוטה-סופית.docx",
|
||||
"title": "החלטה סופית — מבורך 1113-25",
|
||||
"outcome": "",
|
||||
"decision_date": None,
|
||||
},
|
||||
{
|
||||
"case_number": "1126/25+1141/25",
|
||||
"file_path": "legacy/dafna-tamir/04_Archive/ערר-1126-25-תמא-38-בית-הכרם/החלטה/בית הכרם-טיוטת החלטה-9.pdf",
|
||||
"title": "החלטה סופית — בית הכרם 1126/25",
|
||||
"outcome": "partial",
|
||||
"decision_date": date(2026, 3, 1),
|
||||
},
|
||||
{
|
||||
"case_number": "1128/25",
|
||||
"file_path": "legacy/dafna-tamir/04_Archive/ערר-1128-25-שטרית/החלטה/1128-25 החלטה להפצה.pdf",
|
||||
"title": "החלטה סופית — שטרית 1128-25",
|
||||
"outcome": "",
|
||||
"decision_date": None,
|
||||
},
|
||||
]
|
||||
|
||||
PROJECT_ROOT = Path(__file__).parent.parent
|
||||
|
||||
|
||||
def extract_pdf_text(file_path: Path) -> str:
|
||||
"""Extract text from PDF using PyMuPDF."""
|
||||
doc = fitz.open(str(file_path))
|
||||
text_parts = []
|
||||
for page in doc:
|
||||
text_parts.append(page.get_text())
|
||||
doc.close()
|
||||
return "\n".join(text_parts)
|
||||
|
||||
|
||||
def extract_docx_text(file_path: Path) -> str:
|
||||
"""Extract text from DOCX."""
|
||||
doc = DocxDocument(str(file_path))
|
||||
return "\n".join(p.text for p in doc.paragraphs if p.text.strip())
|
||||
|
||||
|
||||
def extract_text(file_path: Path) -> str:
|
||||
"""Extract text based on file extension."""
|
||||
suffix = file_path.suffix.lower()
|
||||
if suffix == ".pdf":
|
||||
return extract_pdf_text(file_path)
|
||||
elif suffix == ".docx":
|
||||
return extract_docx_text(file_path)
|
||||
else:
|
||||
raise ValueError(f"Unsupported format: {suffix}")
|
||||
|
||||
|
||||
def count_words(text: str) -> int:
|
||||
return len(text.split())
|
||||
|
||||
|
||||
async def main():
|
||||
await init_schema()
|
||||
pool = await get_pool()
|
||||
|
||||
for d in FINAL_DECISIONS:
|
||||
file_path = PROJECT_ROOT / d["file_path"]
|
||||
if not file_path.exists():
|
||||
print(f"❌ קובץ לא נמצא: {file_path}")
|
||||
continue
|
||||
|
||||
# Extract text
|
||||
print(f"\nמחלץ טקסט: {d['title']}...")
|
||||
text = extract_text(file_path)
|
||||
word_count = count_words(text)
|
||||
print(f" {word_count} מילים, {len(text)} תווים")
|
||||
|
||||
async with pool.acquire() as conn:
|
||||
# Get case_id
|
||||
case_id = await conn.fetchval(
|
||||
"SELECT id FROM cases WHERE case_number = $1", d["case_number"]
|
||||
)
|
||||
if not case_id:
|
||||
print(f" ⚠ תיק {d['case_number']} לא נמצא ב-DB — מדלג")
|
||||
continue
|
||||
|
||||
# Register document
|
||||
existing_doc = await conn.fetchval(
|
||||
"SELECT id FROM documents WHERE file_path = $1",
|
||||
str(file_path),
|
||||
)
|
||||
if existing_doc:
|
||||
doc_id = existing_doc
|
||||
print(f" מסמך כבר קיים ב-DB: {doc_id}")
|
||||
# Update text
|
||||
await conn.execute(
|
||||
"""UPDATE documents SET extracted_text = $1, extraction_status = 'completed'
|
||||
WHERE id = $2""",
|
||||
text, doc_id,
|
||||
)
|
||||
else:
|
||||
doc_id = await conn.fetchval(
|
||||
"""INSERT INTO documents (case_id, doc_type, title, file_path, extracted_text, extraction_status, page_count)
|
||||
VALUES ($1, 'decision', $2, $3, $4, 'completed', $5)
|
||||
RETURNING id""",
|
||||
case_id, d["title"], str(file_path), text,
|
||||
len(fitz.open(str(file_path))) if file_path.suffix == ".pdf" else None,
|
||||
)
|
||||
print(f" מסמך נרשם: {doc_id}")
|
||||
|
||||
# Create/update decision record
|
||||
existing_decision = await conn.fetchval(
|
||||
"SELECT id FROM decisions WHERE case_id = $1", case_id
|
||||
)
|
||||
if existing_decision:
|
||||
await conn.execute(
|
||||
"""UPDATE decisions SET status = 'final', outcome = $1, total_words = $2,
|
||||
decision_date = $3, updated_at = now() WHERE id = $4""",
|
||||
d["outcome"], word_count, d["decision_date"], existing_decision,
|
||||
)
|
||||
decision_id = existing_decision
|
||||
print(f" החלטה עודכנה: {decision_id}")
|
||||
else:
|
||||
decision_id = await conn.fetchval(
|
||||
"""INSERT INTO decisions (case_id, version, status, outcome, outcome_summary,
|
||||
total_words, decision_date, author)
|
||||
VALUES ($1, 1, 'final', $2, $3, $4, $5, 'דפנה תמיר')
|
||||
RETURNING id""",
|
||||
case_id, d["outcome"], d["title"], word_count, d["decision_date"],
|
||||
)
|
||||
print(f" החלטה נוצרה: {decision_id}")
|
||||
|
||||
# Update case status
|
||||
await conn.execute(
|
||||
"UPDATE cases SET status = 'final', expected_outcome = $1, updated_at = now() WHERE id = $2",
|
||||
d["outcome"], case_id,
|
||||
)
|
||||
|
||||
print(f" ✅ הושלם: {d['case_number']}")
|
||||
|
||||
# Summary
|
||||
async with pool.acquire() as conn:
|
||||
doc_count = await conn.fetchval(
|
||||
"SELECT count(*) FROM documents WHERE doc_type = 'decision' AND extraction_status = 'completed'"
|
||||
)
|
||||
dec_count = await conn.fetchval(
|
||||
"SELECT count(*) FROM decisions WHERE status = 'final'"
|
||||
)
|
||||
total_words = await conn.fetchval(
|
||||
"SELECT sum(total_words) FROM decisions WHERE status = 'final'"
|
||||
)
|
||||
|
||||
await close_pool()
|
||||
|
||||
print(f"\n{'='*50}")
|
||||
print(f"✅ סה\"כ מסמכי החלטה: {doc_count}")
|
||||
print(f"✅ סה\"כ החלטות סופיות: {dec_count}")
|
||||
print(f"✅ סה\"כ מילים: {total_words:,}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user