Add full decision writing pipeline: classify, extract, brainstorm, write, QA, export

New services (11 files): - classifier.py: auto doc-type classification + party identification (Claude Haiku) - claims_extractor.py: claim extraction from pleadings (Claude Sonnet + regex) - references_extractor.py: plan/case-law/legislation detection (regex) - brainstorm.py: direction generation with 2-3 options (Claude Sonnet) - block_writer.py: 12-block decision writer (template + Claude Sonnet/Opus) - docx_exporter.py: DOCX export with David font, RTL, headings - qa_validator.py: 6 QA checks with export blocking on critical failure - learning_loop.py: draft vs final comparison + lesson extraction - metrics.py: KPIs dashboard per case and global - audit.py: action audit log - cli.py: standalone CLI with 11 commands Updated pipeline: extract → classify → chunk → embed → store → extract_references New MCP tools: 29 total (was 16) New DB tables: audit_log, decisions CRUD, claims CRUD Config: Infisical support, external service allowlist Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-03 10:21:47 +00:00
parent df7cc4f5a5
commit d9e5ef0f46
21 changed files with 3957 additions and 14 deletions
--- a/mcp-server/src/legal_mcp/services/docx_exporter.py
+++ b/mcp-server/src/legal_mcp/services/docx_exporter.py
@@ -0,0 +1,274 @@
+"""ייצוא החלטת ועדת ערר ל-DOCX מעוצב.
+
+דרישות: גופן David, RTL מלא, כותרות, מספור סעיפים רציף.
+"""
+
+from __future__ import annotations
+
+import logging
+import re
+from datetime import date
+from pathlib import Path
+from uuid import UUID
+
+from docx import Document
+from docx.enum.text import WD_ALIGN_PARAGRAPH
+from docx.oxml import OxmlElement
+from docx.oxml.ns import qn
+from docx.shared import Cm, Pt, RGBColor
+
+from legal_mcp import config
+from legal_mcp.services import db
+
+logger = logging.getLogger(__name__)
+
+# ── Constants ─────────────────────────────────────────────────────
+
+FONT_NAME = "David"
+FONT_SIZE_BODY = Pt(12)
+FONT_SIZE_TITLE = Pt(16)
+FONT_SIZE_HEADING = Pt(14)
+LINE_SPACING = 1.5
+PAGE_MARGIN = Cm(2.5)
+
+
+# ── RTL helpers ───────────────────────────────────────────────────
+
+def _set_rtl_paragraph(paragraph) -> None:
+    """Set paragraph-level RTL properties."""
+    pPr = paragraph._element.get_or_add_pPr()
+    bidi = OxmlElement("w:bidi")
+    bidi.set(qn("w:val"), "1")
+    pPr.append(bidi)
+
+
+def _set_rtl_run(run) -> None:
+    """Set run-level RTL properties."""
+    rPr = run._element.get_or_add_rPr()
+    rtl = OxmlElement("w:rtl")
+    rtl.set(qn("w:val"), "1")
+    rPr.append(rtl)
+
+
+def _set_rtl_section(section) -> None:
+    """Set section-level RTL (bidi)."""
+    sectPr = section._sectPr
+    bidi = OxmlElement("w:bidi")
+    bidi.set(qn("w:val"), "1")
+    sectPr.append(bidi)
+
+
+def _add_paragraph(doc, text: str, style: str = "Normal",
+                   bold: bool = False, font_size=None,
+                   alignment=None, space_after: Pt | None = None) -> None:
+    """Add an RTL paragraph with David font."""
+    para = doc.add_paragraph()
+    _set_rtl_paragraph(para)
+
+    if alignment:
+        para.alignment = alignment
+    else:
+        para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
+
+    run = para.add_run(text)
+    run.font.name = FONT_NAME
+    run.font.size = font_size or FONT_SIZE_BODY
+    run.bold = bold
+    _set_rtl_run(run)
+
+    # Line spacing
+    pf = para.paragraph_format
+    pf.line_spacing = LINE_SPACING
+    if space_after is not None:
+        pf.space_after = space_after
+
+
+def _add_centered_paragraph(doc, text: str, bold: bool = True,
+                            font_size=None) -> None:
+    """Add centered RTL paragraph."""
+    _add_paragraph(doc, text, bold=bold, font_size=font_size,
+                   alignment=WD_ALIGN_PARAGRAPH.CENTER)
+
+
+def _add_blockquote(doc, text: str) -> None:
+    """Add indented blockquote paragraph."""
+    para = doc.add_paragraph()
+    _set_rtl_paragraph(para)
+    para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
+
+    run = para.add_run(text)
+    run.font.name = FONT_NAME
+    run.font.size = Pt(11)
+    run.italic = True
+    _set_rtl_run(run)
+
+    pf = para.paragraph_format
+    pf.left_indent = Cm(1.5)
+    pf.right_indent = Cm(1.5)
+    pf.line_spacing = LINE_SPACING
+
+
+def _add_image_placeholder(doc, description: str) -> None:
+    """Add image placeholder box."""
+    _add_paragraph(doc, f"[{description}]",
+                   alignment=WD_ALIGN_PARAGRAPH.CENTER,
+                   font_size=Pt(10))
+
+
+# ── Main export ───────────────────────────────────────────────────
+
+async def export_decision(case_id: UUID, output_path: str | None = None) -> str:
+    """ייצוא החלטה ל-DOCX.
+
+    Args:
+        case_id: מזהה התיק
+        output_path: נתיב לשמירה (אופציונלי)
+
+    Returns:
+        נתיב הקובץ שנוצר
+    """
+    case = await db.get_case(case_id)
+    if not case:
+        raise ValueError(f"Case {case_id} not found")
+
+    decision = await db.get_decision_by_case(case_id)
+    if not decision:
+        raise ValueError(f"No decision for case {case_id}")
+
+    # Get blocks
+    pool = await db.get_pool()
+    async with pool.acquire() as conn:
+        blocks = await conn.fetch(
+            """SELECT block_id, block_index, title, content, word_count
+               FROM decision_blocks
+               WHERE decision_id = $1
+               ORDER BY block_index""",
+            UUID(decision["id"]),
+        )
+
+    if not blocks:
+        raise ValueError("No blocks in decision")
+
+    # Create document
+    doc = Document()
+
+    # Set page margins
+    for section in doc.sections:
+        section.top_margin = PAGE_MARGIN
+        section.bottom_margin = PAGE_MARGIN
+        section.left_margin = PAGE_MARGIN
+        section.right_margin = PAGE_MARGIN
+        _set_rtl_section(section)
+
+    # Write blocks
+    for block in blocks:
+        block_id = block["block_id"]
+        content = block["content"] or ""
+        if not content.strip():
+            continue
+
+        _write_block_to_docx(doc, block_id, block["title"], content)
+
+    # Determine output path
+    if not output_path:
+        case_dir = config.CASES_DIR / case["case_number"] / "output"
+        case_dir.mkdir(parents=True, exist_ok=True)
+        output_path = str(case_dir / f"החלטה-{case['case_number']}.docx")
+
+    Path(output_path).parent.mkdir(parents=True, exist_ok=True)
+    doc.save(output_path)
+    logger.info("DOCX exported: %s", output_path)
+    return output_path
+
+
+def _write_block_to_docx(doc, block_id: str, title: str, content: str) -> None:
+    """Write a single block to the DOCX document."""
+    # Header blocks (א-ד)
+    if block_id == "block-alef":
+        for line in content.split("\n"):
+            if line.strip():
+                _add_centered_paragraph(doc, line.strip(), bold=True, font_size=FONT_SIZE_HEADING)
+        return
+
+    if block_id == "block-bet":
+        _add_paragraph(doc, "", space_after=Pt(6))  # spacer
+        for line in content.split("\n"):
+            if line.strip():
+                _add_centered_paragraph(doc, line.strip(), bold=False, font_size=FONT_SIZE_BODY)
+        return
+
+    if block_id == "block-gimel":
+        _add_paragraph(doc, "", space_after=Pt(6))
+        lines = content.split("\n")
+        for line in lines:
+            stripped = line.strip()
+            if not stripped:
+                continue
+            if stripped == "נגד":
+                _add_centered_paragraph(doc, "— נגד —", bold=True, font_size=FONT_SIZE_BODY)
+            else:
+                _add_centered_paragraph(doc, stripped, bold=False, font_size=FONT_SIZE_BODY)
+        return
+
+    if block_id == "block-dalet":
+        _add_paragraph(doc, "", space_after=Pt(12))  # spacer
+        _add_centered_paragraph(doc, "החלטה", bold=True, font_size=FONT_SIZE_TITLE)
+        _add_paragraph(doc, "", space_after=Pt(12))
+        return
+
+    if block_id == "block-yod-bet":
+        _add_paragraph(doc, "", space_after=Pt(24))  # spacer
+        for line in content.split("\n"):
+            if line.strip():
+                _add_centered_paragraph(doc, line.strip(), bold=False, font_size=FONT_SIZE_BODY)
+        return
+
+    # Content blocks (ה-יא) — parse paragraphs
+    paragraphs = content.split("\n")
+    for para_text in paragraphs:
+        stripped = para_text.strip()
+        if not stripped:
+            continue
+
+        # Section headings (e.g., "תמצית טענות הצדדים", "טענות העוררים")
+        if _is_section_heading(stripped):
+            _add_paragraph(doc, stripped, bold=True, font_size=FONT_SIZE_HEADING,
+                           space_after=Pt(6))
+            continue
+
+        # Blockquotes (indented quotes from protocols/rulings)
+        if stripped.startswith('"') or stripped.startswith("״") or stripped.startswith(">"):
+            clean = stripped.lstrip(">").strip().strip('"').strip("״").strip('"')
+            _add_blockquote(doc, clean)
+            continue
+
+        # Image placeholders
+        if "📷" in stripped or stripped.startswith("[") and "תמונה" in stripped:
+            _add_image_placeholder(doc, stripped.strip("[]📷 "))
+            continue
+
+        # Regular numbered paragraph or plain text
+        _add_paragraph(doc, stripped)
+
+
+def _is_section_heading(text: str) -> bool:
+    """Detect section headings in decision text."""
+    heading_patterns = [
+        r"^תמצית\s+טענות",
+        r"^טענות\s+העוררי",
+        r"^עמדת\s+הוועדה",
+        r"^עמדת\s+מבקשי",
+        r"^ההליכים\s+בפני",
+        r"^דיון\s+והכרעה",
+        r"^סוף\s+דבר",
+        r"^סיכום",
+        r"^פתח\s+דבר",
+        r"^תכניות\s+חלות",
+    ]
+    for pattern in heading_patterns:
+        if re.search(pattern, text):
+            return True
+    # Short bold-like lines (under 60 chars, not numbered)
+    if len(text) < 60 and not re.match(r"^\d+\.", text):
+        return False
+    return False