Files
legal-ai/mcp-server/src/legal_mcp/services/docx_exporter.py
Chaim d9e5ef0f46 Add full decision writing pipeline: classify, extract, brainstorm, write, QA, export
New services (11 files):
- classifier.py: auto doc-type classification + party identification (Claude Haiku)
- claims_extractor.py: claim extraction from pleadings (Claude Sonnet + regex)
- references_extractor.py: plan/case-law/legislation detection (regex)
- brainstorm.py: direction generation with 2-3 options (Claude Sonnet)
- block_writer.py: 12-block decision writer (template + Claude Sonnet/Opus)
- docx_exporter.py: DOCX export with David font, RTL, headings
- qa_validator.py: 6 QA checks with export blocking on critical failure
- learning_loop.py: draft vs final comparison + lesson extraction
- metrics.py: KPIs dashboard per case and global
- audit.py: action audit log
- cli.py: standalone CLI with 11 commands

Updated pipeline: extract → classify → chunk → embed → store → extract_references
New MCP tools: 29 total (was 16)
New DB tables: audit_log, decisions CRUD, claims CRUD
Config: Infisical support, external service allowlist

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-03 10:21:47 +00:00

275 lines
8.8 KiB
Python

"""ייצוא החלטת ועדת ערר ל-DOCX מעוצב.
דרישות: גופן David, RTL מלא, כותרות, מספור סעיפים רציף.
"""
from __future__ import annotations
import logging
import re
from datetime import date
from pathlib import Path
from uuid import UUID
from docx import Document
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.oxml import OxmlElement
from docx.oxml.ns import qn
from docx.shared import Cm, Pt, RGBColor
from legal_mcp import config
from legal_mcp.services import db
logger = logging.getLogger(__name__)
# ── Constants ─────────────────────────────────────────────────────
FONT_NAME = "David"
FONT_SIZE_BODY = Pt(12)
FONT_SIZE_TITLE = Pt(16)
FONT_SIZE_HEADING = Pt(14)
LINE_SPACING = 1.5
PAGE_MARGIN = Cm(2.5)
# ── RTL helpers ───────────────────────────────────────────────────
def _set_rtl_paragraph(paragraph) -> None:
"""Set paragraph-level RTL properties."""
pPr = paragraph._element.get_or_add_pPr()
bidi = OxmlElement("w:bidi")
bidi.set(qn("w:val"), "1")
pPr.append(bidi)
def _set_rtl_run(run) -> None:
"""Set run-level RTL properties."""
rPr = run._element.get_or_add_rPr()
rtl = OxmlElement("w:rtl")
rtl.set(qn("w:val"), "1")
rPr.append(rtl)
def _set_rtl_section(section) -> None:
"""Set section-level RTL (bidi)."""
sectPr = section._sectPr
bidi = OxmlElement("w:bidi")
bidi.set(qn("w:val"), "1")
sectPr.append(bidi)
def _add_paragraph(doc, text: str, style: str = "Normal",
bold: bool = False, font_size=None,
alignment=None, space_after: Pt | None = None) -> None:
"""Add an RTL paragraph with David font."""
para = doc.add_paragraph()
_set_rtl_paragraph(para)
if alignment:
para.alignment = alignment
else:
para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
run = para.add_run(text)
run.font.name = FONT_NAME
run.font.size = font_size or FONT_SIZE_BODY
run.bold = bold
_set_rtl_run(run)
# Line spacing
pf = para.paragraph_format
pf.line_spacing = LINE_SPACING
if space_after is not None:
pf.space_after = space_after
def _add_centered_paragraph(doc, text: str, bold: bool = True,
font_size=None) -> None:
"""Add centered RTL paragraph."""
_add_paragraph(doc, text, bold=bold, font_size=font_size,
alignment=WD_ALIGN_PARAGRAPH.CENTER)
def _add_blockquote(doc, text: str) -> None:
"""Add indented blockquote paragraph."""
para = doc.add_paragraph()
_set_rtl_paragraph(para)
para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
run = para.add_run(text)
run.font.name = FONT_NAME
run.font.size = Pt(11)
run.italic = True
_set_rtl_run(run)
pf = para.paragraph_format
pf.left_indent = Cm(1.5)
pf.right_indent = Cm(1.5)
pf.line_spacing = LINE_SPACING
def _add_image_placeholder(doc, description: str) -> None:
"""Add image placeholder box."""
_add_paragraph(doc, f"[{description}]",
alignment=WD_ALIGN_PARAGRAPH.CENTER,
font_size=Pt(10))
# ── Main export ───────────────────────────────────────────────────
async def export_decision(case_id: UUID, output_path: str | None = None) -> str:
"""ייצוא החלטה ל-DOCX.
Args:
case_id: מזהה התיק
output_path: נתיב לשמירה (אופציונלי)
Returns:
נתיב הקובץ שנוצר
"""
case = await db.get_case(case_id)
if not case:
raise ValueError(f"Case {case_id} not found")
decision = await db.get_decision_by_case(case_id)
if not decision:
raise ValueError(f"No decision for case {case_id}")
# Get blocks
pool = await db.get_pool()
async with pool.acquire() as conn:
blocks = await conn.fetch(
"""SELECT block_id, block_index, title, content, word_count
FROM decision_blocks
WHERE decision_id = $1
ORDER BY block_index""",
UUID(decision["id"]),
)
if not blocks:
raise ValueError("No blocks in decision")
# Create document
doc = Document()
# Set page margins
for section in doc.sections:
section.top_margin = PAGE_MARGIN
section.bottom_margin = PAGE_MARGIN
section.left_margin = PAGE_MARGIN
section.right_margin = PAGE_MARGIN
_set_rtl_section(section)
# Write blocks
for block in blocks:
block_id = block["block_id"]
content = block["content"] or ""
if not content.strip():
continue
_write_block_to_docx(doc, block_id, block["title"], content)
# Determine output path
if not output_path:
case_dir = config.CASES_DIR / case["case_number"] / "output"
case_dir.mkdir(parents=True, exist_ok=True)
output_path = str(case_dir / f"החלטה-{case['case_number']}.docx")
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
doc.save(output_path)
logger.info("DOCX exported: %s", output_path)
return output_path
def _write_block_to_docx(doc, block_id: str, title: str, content: str) -> None:
"""Write a single block to the DOCX document."""
# Header blocks (א-ד)
if block_id == "block-alef":
for line in content.split("\n"):
if line.strip():
_add_centered_paragraph(doc, line.strip(), bold=True, font_size=FONT_SIZE_HEADING)
return
if block_id == "block-bet":
_add_paragraph(doc, "", space_after=Pt(6)) # spacer
for line in content.split("\n"):
if line.strip():
_add_centered_paragraph(doc, line.strip(), bold=False, font_size=FONT_SIZE_BODY)
return
if block_id == "block-gimel":
_add_paragraph(doc, "", space_after=Pt(6))
lines = content.split("\n")
for line in lines:
stripped = line.strip()
if not stripped:
continue
if stripped == "נגד":
_add_centered_paragraph(doc, "— נגד —", bold=True, font_size=FONT_SIZE_BODY)
else:
_add_centered_paragraph(doc, stripped, bold=False, font_size=FONT_SIZE_BODY)
return
if block_id == "block-dalet":
_add_paragraph(doc, "", space_after=Pt(12)) # spacer
_add_centered_paragraph(doc, "החלטה", bold=True, font_size=FONT_SIZE_TITLE)
_add_paragraph(doc, "", space_after=Pt(12))
return
if block_id == "block-yod-bet":
_add_paragraph(doc, "", space_after=Pt(24)) # spacer
for line in content.split("\n"):
if line.strip():
_add_centered_paragraph(doc, line.strip(), bold=False, font_size=FONT_SIZE_BODY)
return
# Content blocks (ה-יא) — parse paragraphs
paragraphs = content.split("\n")
for para_text in paragraphs:
stripped = para_text.strip()
if not stripped:
continue
# Section headings (e.g., "תמצית טענות הצדדים", "טענות העוררים")
if _is_section_heading(stripped):
_add_paragraph(doc, stripped, bold=True, font_size=FONT_SIZE_HEADING,
space_after=Pt(6))
continue
# Blockquotes (indented quotes from protocols/rulings)
if stripped.startswith('"') or stripped.startswith("״") or stripped.startswith(">"):
clean = stripped.lstrip(">").strip().strip('"').strip("״").strip('"')
_add_blockquote(doc, clean)
continue
# Image placeholders
if "📷" in stripped or stripped.startswith("[") and "תמונה" in stripped:
_add_image_placeholder(doc, stripped.strip("[]📷 "))
continue
# Regular numbered paragraph or plain text
_add_paragraph(doc, stripped)
def _is_section_heading(text: str) -> bool:
"""Detect section headings in decision text."""
heading_patterns = [
r"^תמצית\s+טענות",
r"^טענות\s+העוררי",
r"^עמדת\s+הוועדה",
r"^עמדת\s+מבקשי",
r"^ההליכים\s+בפני",
r"^דיון\s+והכרעה",
r"^סוף\s+דבר",
r"^סיכום",
r"^פתח\s+דבר",
r"^תכניות\s+חלות",
]
for pattern in heading_patterns:
if re.search(pattern, text):
return True
# Short bold-like lines (under 60 chars, not numbered)
if len(text) < 60 and not re.match(r"^\d+\.", text):
return False
return False