New services (11 files): - classifier.py: auto doc-type classification + party identification (Claude Haiku) - claims_extractor.py: claim extraction from pleadings (Claude Sonnet + regex) - references_extractor.py: plan/case-law/legislation detection (regex) - brainstorm.py: direction generation with 2-3 options (Claude Sonnet) - block_writer.py: 12-block decision writer (template + Claude Sonnet/Opus) - docx_exporter.py: DOCX export with David font, RTL, headings - qa_validator.py: 6 QA checks with export blocking on critical failure - learning_loop.py: draft vs final comparison + lesson extraction - metrics.py: KPIs dashboard per case and global - audit.py: action audit log - cli.py: standalone CLI with 11 commands Updated pipeline: extract → classify → chunk → embed → store → extract_references New MCP tools: 29 total (was 16) New DB tables: audit_log, decisions CRUD, claims CRUD Config: Infisical support, external service allowlist Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
275 lines
8.8 KiB
Python
275 lines
8.8 KiB
Python
"""ייצוא החלטת ועדת ערר ל-DOCX מעוצב.
|
|
|
|
דרישות: גופן David, RTL מלא, כותרות, מספור סעיפים רציף.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import re
|
|
from datetime import date
|
|
from pathlib import Path
|
|
from uuid import UUID
|
|
|
|
from docx import Document
|
|
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
|
from docx.oxml import OxmlElement
|
|
from docx.oxml.ns import qn
|
|
from docx.shared import Cm, Pt, RGBColor
|
|
|
|
from legal_mcp import config
|
|
from legal_mcp.services import db
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# ── Constants ─────────────────────────────────────────────────────
|
|
|
|
FONT_NAME = "David"
|
|
FONT_SIZE_BODY = Pt(12)
|
|
FONT_SIZE_TITLE = Pt(16)
|
|
FONT_SIZE_HEADING = Pt(14)
|
|
LINE_SPACING = 1.5
|
|
PAGE_MARGIN = Cm(2.5)
|
|
|
|
|
|
# ── RTL helpers ───────────────────────────────────────────────────
|
|
|
|
def _set_rtl_paragraph(paragraph) -> None:
|
|
"""Set paragraph-level RTL properties."""
|
|
pPr = paragraph._element.get_or_add_pPr()
|
|
bidi = OxmlElement("w:bidi")
|
|
bidi.set(qn("w:val"), "1")
|
|
pPr.append(bidi)
|
|
|
|
|
|
def _set_rtl_run(run) -> None:
|
|
"""Set run-level RTL properties."""
|
|
rPr = run._element.get_or_add_rPr()
|
|
rtl = OxmlElement("w:rtl")
|
|
rtl.set(qn("w:val"), "1")
|
|
rPr.append(rtl)
|
|
|
|
|
|
def _set_rtl_section(section) -> None:
|
|
"""Set section-level RTL (bidi)."""
|
|
sectPr = section._sectPr
|
|
bidi = OxmlElement("w:bidi")
|
|
bidi.set(qn("w:val"), "1")
|
|
sectPr.append(bidi)
|
|
|
|
|
|
def _add_paragraph(doc, text: str, style: str = "Normal",
|
|
bold: bool = False, font_size=None,
|
|
alignment=None, space_after: Pt | None = None) -> None:
|
|
"""Add an RTL paragraph with David font."""
|
|
para = doc.add_paragraph()
|
|
_set_rtl_paragraph(para)
|
|
|
|
if alignment:
|
|
para.alignment = alignment
|
|
else:
|
|
para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
|
|
|
|
run = para.add_run(text)
|
|
run.font.name = FONT_NAME
|
|
run.font.size = font_size or FONT_SIZE_BODY
|
|
run.bold = bold
|
|
_set_rtl_run(run)
|
|
|
|
# Line spacing
|
|
pf = para.paragraph_format
|
|
pf.line_spacing = LINE_SPACING
|
|
if space_after is not None:
|
|
pf.space_after = space_after
|
|
|
|
|
|
def _add_centered_paragraph(doc, text: str, bold: bool = True,
|
|
font_size=None) -> None:
|
|
"""Add centered RTL paragraph."""
|
|
_add_paragraph(doc, text, bold=bold, font_size=font_size,
|
|
alignment=WD_ALIGN_PARAGRAPH.CENTER)
|
|
|
|
|
|
def _add_blockquote(doc, text: str) -> None:
|
|
"""Add indented blockquote paragraph."""
|
|
para = doc.add_paragraph()
|
|
_set_rtl_paragraph(para)
|
|
para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
|
|
|
|
run = para.add_run(text)
|
|
run.font.name = FONT_NAME
|
|
run.font.size = Pt(11)
|
|
run.italic = True
|
|
_set_rtl_run(run)
|
|
|
|
pf = para.paragraph_format
|
|
pf.left_indent = Cm(1.5)
|
|
pf.right_indent = Cm(1.5)
|
|
pf.line_spacing = LINE_SPACING
|
|
|
|
|
|
def _add_image_placeholder(doc, description: str) -> None:
|
|
"""Add image placeholder box."""
|
|
_add_paragraph(doc, f"[{description}]",
|
|
alignment=WD_ALIGN_PARAGRAPH.CENTER,
|
|
font_size=Pt(10))
|
|
|
|
|
|
# ── Main export ───────────────────────────────────────────────────
|
|
|
|
async def export_decision(case_id: UUID, output_path: str | None = None) -> str:
|
|
"""ייצוא החלטה ל-DOCX.
|
|
|
|
Args:
|
|
case_id: מזהה התיק
|
|
output_path: נתיב לשמירה (אופציונלי)
|
|
|
|
Returns:
|
|
נתיב הקובץ שנוצר
|
|
"""
|
|
case = await db.get_case(case_id)
|
|
if not case:
|
|
raise ValueError(f"Case {case_id} not found")
|
|
|
|
decision = await db.get_decision_by_case(case_id)
|
|
if not decision:
|
|
raise ValueError(f"No decision for case {case_id}")
|
|
|
|
# Get blocks
|
|
pool = await db.get_pool()
|
|
async with pool.acquire() as conn:
|
|
blocks = await conn.fetch(
|
|
"""SELECT block_id, block_index, title, content, word_count
|
|
FROM decision_blocks
|
|
WHERE decision_id = $1
|
|
ORDER BY block_index""",
|
|
UUID(decision["id"]),
|
|
)
|
|
|
|
if not blocks:
|
|
raise ValueError("No blocks in decision")
|
|
|
|
# Create document
|
|
doc = Document()
|
|
|
|
# Set page margins
|
|
for section in doc.sections:
|
|
section.top_margin = PAGE_MARGIN
|
|
section.bottom_margin = PAGE_MARGIN
|
|
section.left_margin = PAGE_MARGIN
|
|
section.right_margin = PAGE_MARGIN
|
|
_set_rtl_section(section)
|
|
|
|
# Write blocks
|
|
for block in blocks:
|
|
block_id = block["block_id"]
|
|
content = block["content"] or ""
|
|
if not content.strip():
|
|
continue
|
|
|
|
_write_block_to_docx(doc, block_id, block["title"], content)
|
|
|
|
# Determine output path
|
|
if not output_path:
|
|
case_dir = config.CASES_DIR / case["case_number"] / "output"
|
|
case_dir.mkdir(parents=True, exist_ok=True)
|
|
output_path = str(case_dir / f"החלטה-{case['case_number']}.docx")
|
|
|
|
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
|
doc.save(output_path)
|
|
logger.info("DOCX exported: %s", output_path)
|
|
return output_path
|
|
|
|
|
|
def _write_block_to_docx(doc, block_id: str, title: str, content: str) -> None:
|
|
"""Write a single block to the DOCX document."""
|
|
# Header blocks (א-ד)
|
|
if block_id == "block-alef":
|
|
for line in content.split("\n"):
|
|
if line.strip():
|
|
_add_centered_paragraph(doc, line.strip(), bold=True, font_size=FONT_SIZE_HEADING)
|
|
return
|
|
|
|
if block_id == "block-bet":
|
|
_add_paragraph(doc, "", space_after=Pt(6)) # spacer
|
|
for line in content.split("\n"):
|
|
if line.strip():
|
|
_add_centered_paragraph(doc, line.strip(), bold=False, font_size=FONT_SIZE_BODY)
|
|
return
|
|
|
|
if block_id == "block-gimel":
|
|
_add_paragraph(doc, "", space_after=Pt(6))
|
|
lines = content.split("\n")
|
|
for line in lines:
|
|
stripped = line.strip()
|
|
if not stripped:
|
|
continue
|
|
if stripped == "נגד":
|
|
_add_centered_paragraph(doc, "— נגד —", bold=True, font_size=FONT_SIZE_BODY)
|
|
else:
|
|
_add_centered_paragraph(doc, stripped, bold=False, font_size=FONT_SIZE_BODY)
|
|
return
|
|
|
|
if block_id == "block-dalet":
|
|
_add_paragraph(doc, "", space_after=Pt(12)) # spacer
|
|
_add_centered_paragraph(doc, "החלטה", bold=True, font_size=FONT_SIZE_TITLE)
|
|
_add_paragraph(doc, "", space_after=Pt(12))
|
|
return
|
|
|
|
if block_id == "block-yod-bet":
|
|
_add_paragraph(doc, "", space_after=Pt(24)) # spacer
|
|
for line in content.split("\n"):
|
|
if line.strip():
|
|
_add_centered_paragraph(doc, line.strip(), bold=False, font_size=FONT_SIZE_BODY)
|
|
return
|
|
|
|
# Content blocks (ה-יא) — parse paragraphs
|
|
paragraphs = content.split("\n")
|
|
for para_text in paragraphs:
|
|
stripped = para_text.strip()
|
|
if not stripped:
|
|
continue
|
|
|
|
# Section headings (e.g., "תמצית טענות הצדדים", "טענות העוררים")
|
|
if _is_section_heading(stripped):
|
|
_add_paragraph(doc, stripped, bold=True, font_size=FONT_SIZE_HEADING,
|
|
space_after=Pt(6))
|
|
continue
|
|
|
|
# Blockquotes (indented quotes from protocols/rulings)
|
|
if stripped.startswith('"') or stripped.startswith("״") or stripped.startswith(">"):
|
|
clean = stripped.lstrip(">").strip().strip('"').strip("״").strip('"')
|
|
_add_blockquote(doc, clean)
|
|
continue
|
|
|
|
# Image placeholders
|
|
if "📷" in stripped or stripped.startswith("[") and "תמונה" in stripped:
|
|
_add_image_placeholder(doc, stripped.strip("[]📷 "))
|
|
continue
|
|
|
|
# Regular numbered paragraph or plain text
|
|
_add_paragraph(doc, stripped)
|
|
|
|
|
|
def _is_section_heading(text: str) -> bool:
|
|
"""Detect section headings in decision text."""
|
|
heading_patterns = [
|
|
r"^תמצית\s+טענות",
|
|
r"^טענות\s+העוררי",
|
|
r"^עמדת\s+הוועדה",
|
|
r"^עמדת\s+מבקשי",
|
|
r"^ההליכים\s+בפני",
|
|
r"^דיון\s+והכרעה",
|
|
r"^סוף\s+דבר",
|
|
r"^סיכום",
|
|
r"^פתח\s+דבר",
|
|
r"^תכניות\s+חלות",
|
|
]
|
|
for pattern in heading_patterns:
|
|
if re.search(pattern, text):
|
|
return True
|
|
# Short bold-like lines (under 60 chars, not numbered)
|
|
if len(text) < 60 and not re.match(r"^\d+\.", text):
|
|
return False
|
|
return False
|