Pre-existing agent updates + analysis DOCX export
Updates accumulated from prior sessions: - HEARTBEAT: company-based filtering (CMP/CMPA) rules - legal-qa, legal-researcher: routine updates - analysis_docx_exporter: new service for analysis DOCX export - compose page: "הורד כ-DOCX" button for analysis - decision_template.docx: template for exporter Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
503
mcp-server/src/legal_mcp/services/analysis_docx_exporter.py
Normal file
503
mcp-server/src/legal_mcp/services/analysis_docx_exporter.py
Normal file
@@ -0,0 +1,503 @@
|
||||
"""Export the legal analysis (analysis-and-research.md + precedents) to a
|
||||
DOCX file that uses דפנה's decision template styles.
|
||||
|
||||
The template lives at `skills/docx/decision_template.docx` (converted once
|
||||
from `טיוטת החלטה.dotx` via `scripts/convert_decision_template.py`).
|
||||
We open it, wipe the sample body paragraphs, and write new content by
|
||||
applying style names only — never by hand-setting font/size/RTL/margins,
|
||||
because the template's styles.xml already carries those.
|
||||
|
||||
Style mapping:
|
||||
"Title" → the document title (case number, date)
|
||||
"Heading 2" → top-level section headers
|
||||
(טענות סף / סוגיות להכרעה / מסקנות)
|
||||
"Normal" + bold → subsection headers (individual claim/issue)
|
||||
"Normal" → field label (bold run) + value
|
||||
"Quote" → precedent quote text
|
||||
"Normal" (italic) → precedent citation
|
||||
|
||||
Output: data/cases/{case_number}/exports/ניתוח-משפטי-v{N}.docx
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from uuid import UUID
|
||||
|
||||
from docx import Document
|
||||
from docx.document import Document as DocumentT
|
||||
from docx.oxml.ns import qn
|
||||
from docx.oxml import OxmlElement
|
||||
from docx.text.paragraph import Paragraph
|
||||
from docx.text.run import Run
|
||||
|
||||
from legal_mcp import config
|
||||
from legal_mcp.services import db, research_md
|
||||
|
||||
|
||||
def _mark_run_rtl(run: Run) -> None:
|
||||
"""Mark a run as complex-script (Hebrew/Arabic) so Word uses the `cs`
|
||||
font slot from the style (David) rather than `ascii` (Times New Roman).
|
||||
|
||||
Without this, runs we add programmatically render Hebrew in the ascii
|
||||
font — even though the paragraph style has `<w:rFonts cs="David"/>`.
|
||||
"""
|
||||
rPr = run._r.get_or_add_rPr()
|
||||
if rPr.find(qn("w:rtl")) is None:
|
||||
rPr.append(OxmlElement("w:rtl"))
|
||||
|
||||
|
||||
def _mark_paragraph_rtl(paragraph: Paragraph) -> None:
|
||||
"""Add `<w:rtl/>` inside the paragraph's rPr so the paragraph mark
|
||||
itself is treated as RTL. The paragraph style already sets bidi
|
||||
direction, but empty paragraphs and trailing marks need this flag.
|
||||
"""
|
||||
pPr = paragraph._p.get_or_add_pPr()
|
||||
rPr = pPr.find(qn("w:rPr"))
|
||||
if rPr is None:
|
||||
rPr = OxmlElement("w:rPr")
|
||||
pPr.append(rPr)
|
||||
if rPr.find(qn("w:rtl")) is None:
|
||||
rPr.append(OxmlElement("w:rtl"))
|
||||
|
||||
# Path to the converted template. Static — populated by
|
||||
# scripts/convert_decision_template.py.
|
||||
TEMPLATE_PATH = (
|
||||
Path(__file__).resolve().parents[4]
|
||||
/ "skills"
|
||||
/ "docx"
|
||||
/ "decision_template.docx"
|
||||
)
|
||||
|
||||
CHAIR_POSITION_LABEL = "עמדת ועדת הערר"
|
||||
CHAIR_POSITION_PLACEHOLDER = "[טרם מולאה עמדת ועדת הערר]"
|
||||
|
||||
NUMBERED_LINE_RE = re.compile(r"^\s*(\d+)[.)]\s+(.+)$")
|
||||
BULLET_LINE_RE = re.compile(r"^\s*[\-\u2022\*\u25CF\u25E6]\s+(.+)$")
|
||||
# (א) (ב) (ג) ... — Hebrew-letter enumeration used by the authors.
|
||||
# We keep the marker inside the text (the author wrote it), but render the
|
||||
# paragraph as "List Paragraph" without the numPr so the visual indentation
|
||||
# matches the template's list style without adding a double "1." prefix.
|
||||
HEB_LETTER_LINE_RE = re.compile(r"^\s*\([א-ת]\)\s+")
|
||||
|
||||
# A standalone **LABEL:** line (the whole trimmed line is wrapped in ** **)
|
||||
STANDALONE_LABEL_RE = re.compile(r"^\s*\*\*([^\n*]+?):\*\*\s*$")
|
||||
# A short standalone "XYZ:" line (no ** **) — acts as a sub-heading for the
|
||||
# paragraphs that follow. Limit to short phrases to avoid eating real
|
||||
# sentences that happen to end with a colon.
|
||||
PLAIN_LABEL_RE = re.compile(r"^\s*([^\n:]{2,40}):\s*$")
|
||||
# "**LABEL:** value" inline — bold label followed by prose on the same line.
|
||||
INLINE_LABEL_RE = re.compile(r"^\s*\*\*([^\n*]+?):\*\*\s+(.+)$")
|
||||
|
||||
|
||||
def _classify_line(line: str) -> tuple[str, str]:
|
||||
"""Return (kind, clean_text) where kind ∈ {numbered, bullet, heb_letter,
|
||||
label_heading, inline_label, plain}.
|
||||
|
||||
clean_text conventions:
|
||||
- numbered/bullet — marker stripped
|
||||
- heb_letter — marker kept (author supplied it)
|
||||
- label_heading — surrounding ** and trailing : stripped
|
||||
- inline_label — "LABEL\x00VALUE" (NUL-separated; _emit splits it)
|
||||
"""
|
||||
m = STANDALONE_LABEL_RE.match(line)
|
||||
if m:
|
||||
return "label_heading", m.group(1).strip()
|
||||
m = INLINE_LABEL_RE.match(line)
|
||||
if m:
|
||||
return "inline_label", f"{m.group(1).strip()}\x00{m.group(2).strip()}"
|
||||
m = NUMBERED_LINE_RE.match(line)
|
||||
if m:
|
||||
return "numbered", m.group(2).strip()
|
||||
m = BULLET_LINE_RE.match(line)
|
||||
if m:
|
||||
inner = m.group(1).strip()
|
||||
# A bullet whose only content is **LABEL:** is a heading, not a list item.
|
||||
# E.g. "- **נקודות פתוחות:**"
|
||||
m2 = STANDALONE_LABEL_RE.match(inner)
|
||||
if m2:
|
||||
return "label_heading", m2.group(1).strip()
|
||||
# A bullet of the form "- **LABEL:** value" → inline label.
|
||||
m3 = INLINE_LABEL_RE.match(inner)
|
||||
if m3:
|
||||
return "inline_label", f"{m3.group(1).strip()}\x00{m3.group(2).strip()}"
|
||||
return "bullet", inner
|
||||
if HEB_LETTER_LINE_RE.match(line):
|
||||
return "heb_letter", line.strip()
|
||||
m = PLAIN_LABEL_RE.match(line)
|
||||
if m:
|
||||
return "label_heading", m.group(1).strip()
|
||||
return "plain", line.strip()
|
||||
|
||||
|
||||
def _strip_numpr(paragraph: Paragraph) -> None:
|
||||
"""Remove any <w:numPr> from the paragraph's pPr.
|
||||
|
||||
Used when we want the visual styling of `List Paragraph` (indent,
|
||||
font) without Word's auto-decimal "1." prefix — e.g. for Hebrew-
|
||||
letter enumeration where the author wrote (א) (ב) (ג) manually.
|
||||
"""
|
||||
pPr = paragraph._p.get_or_add_pPr()
|
||||
for numPr in pPr.findall(qn("w:numPr")):
|
||||
pPr.remove(numPr)
|
||||
|
||||
|
||||
# Characters that the code should never emit (user instruction: "no dashes").
|
||||
# Applied only to code-generated text, not to user content from the md file.
|
||||
_CODE_DASH_RE = re.compile(r"[\u2013\u2014]")
|
||||
|
||||
# Markdown inline bold — `**...**`
|
||||
_INLINE_BOLD_RE = re.compile(r"\*\*([^\n*]+?)\*\*")
|
||||
|
||||
|
||||
def _no_dash(text: str) -> str:
|
||||
"""Strip em/en dashes from text the code emits (not from source content)."""
|
||||
return _CODE_DASH_RE.sub("", text)
|
||||
|
||||
|
||||
def _add_runs_with_inline_bold(paragraph: Paragraph, text: str) -> None:
|
||||
"""Split `text` on `**...**` markers, adding alternating plain and bold
|
||||
runs to `paragraph`. All runs are marked RTL and passed through
|
||||
`_no_dash`.
|
||||
|
||||
This keeps `**טענה חשובה**` rendering as bold (as the author intended)
|
||||
instead of leaving the literal asterisks in the output.
|
||||
"""
|
||||
text = _no_dash(text)
|
||||
pos = 0
|
||||
for m in _INLINE_BOLD_RE.finditer(text):
|
||||
if m.start() > pos:
|
||||
plain = paragraph.add_run(text[pos : m.start()])
|
||||
_mark_run_rtl(plain)
|
||||
bold = paragraph.add_run(m.group(1))
|
||||
bold.bold = True
|
||||
_mark_run_rtl(bold)
|
||||
pos = m.end()
|
||||
if pos < len(text):
|
||||
tail = paragraph.add_run(text[pos:])
|
||||
_mark_run_rtl(tail)
|
||||
|
||||
|
||||
def _clear_body(doc: DocumentT) -> None:
|
||||
"""Remove every paragraph currently in the document body.
|
||||
|
||||
The template ships with example paragraphs ("רקע", "דיון והכרעה"…)
|
||||
that we don't want in the output. Section properties (sectPr) are
|
||||
kept so page size / margins / RTL / footer remain intact.
|
||||
"""
|
||||
body = doc.element.body
|
||||
for p in list(body.findall(qn("w:p"))):
|
||||
body.remove(p)
|
||||
# Leave sectPr alone — it carries page setup including bidi.
|
||||
|
||||
|
||||
def _add_paragraph(doc: DocumentT, text: str, style: str) -> Paragraph:
|
||||
p = doc.add_paragraph(style=style)
|
||||
_mark_paragraph_rtl(p)
|
||||
if text:
|
||||
_add_runs_with_inline_bold(p, text)
|
||||
return p
|
||||
|
||||
|
||||
def _add_label_value(
|
||||
doc: DocumentT, label: str, value: str, *, value_italic: bool = False
|
||||
) -> Paragraph:
|
||||
"""Add a paragraph with a bold label and an inline value.
|
||||
|
||||
Example rendering: **עמדת המבקשת:** The party argues that…
|
||||
"""
|
||||
p = doc.add_paragraph(style="Normal")
|
||||
_mark_paragraph_rtl(p)
|
||||
run_label = p.add_run(f"{_no_dash(label)}: ")
|
||||
run_label.bold = True
|
||||
_mark_run_rtl(run_label)
|
||||
if value:
|
||||
if value_italic:
|
||||
# Placeholder text — italic, no inline-bold handling.
|
||||
run_value = p.add_run(_no_dash(value))
|
||||
run_value.italic = True
|
||||
_mark_run_rtl(run_value)
|
||||
else:
|
||||
_add_runs_with_inline_bold(p, value)
|
||||
return p
|
||||
|
||||
|
||||
def _add_multiline_value(
|
||||
doc: DocumentT, label: str, value: str
|
||||
) -> None:
|
||||
"""Render a field (label + value).
|
||||
|
||||
Multi-line values get the label as its own Heading 2 paragraph (so the
|
||||
structure visually breaks between fields), then each body line as its
|
||||
own paragraph routed through `_emit_content_line`.
|
||||
|
||||
Single-line values stay inline (bold label + text) — a Heading 2 for
|
||||
a one-liner would look inflated.
|
||||
"""
|
||||
lines = [ln for ln in value.splitlines() if ln.strip()]
|
||||
if not lines:
|
||||
_add_label_value(doc, label, "")
|
||||
return
|
||||
if len(lines) == 1:
|
||||
kind, text = _classify_line(lines[0])
|
||||
# Single-line — inline with label regardless of kind
|
||||
_add_label_value(doc, label, text)
|
||||
return
|
||||
# Multi-line: label as Heading 2, then each line via _emit_content_line
|
||||
_add_paragraph(doc, label, "Heading 2")
|
||||
for line in lines:
|
||||
_emit_content_line(doc, line)
|
||||
|
||||
|
||||
def _emit_content_line(doc: DocumentT, line: str) -> None:
|
||||
"""Render a single line of content using the right template style.
|
||||
|
||||
- `label_heading` (e.g. "**נקודות פתוחות:**" alone) → Heading 2
|
||||
- `numbered` ("1. ...") → List Paragraph
|
||||
(auto-decimal)
|
||||
- `heb_letter` ("(א) ...") → List Paragraph
|
||||
with numPr stripped
|
||||
(author supplied
|
||||
the marker)
|
||||
- `bullet` ("- ...") → Normal (marker
|
||||
stripped)
|
||||
- `plain` → Normal
|
||||
"""
|
||||
kind, text = _classify_line(line)
|
||||
|
||||
if kind == "label_heading":
|
||||
_add_paragraph(doc, text, "Heading 2")
|
||||
return
|
||||
|
||||
if kind == "inline_label":
|
||||
label, value = text.split("\x00", 1)
|
||||
_add_label_value(doc, label, value)
|
||||
return
|
||||
|
||||
if kind == "numbered":
|
||||
para = doc.add_paragraph(style="List Paragraph")
|
||||
elif kind == "heb_letter":
|
||||
para = doc.add_paragraph(style="List Paragraph")
|
||||
_strip_numpr(para)
|
||||
else:
|
||||
para = doc.add_paragraph(style="Normal")
|
||||
_mark_paragraph_rtl(para)
|
||||
_add_runs_with_inline_bold(para, text)
|
||||
|
||||
|
||||
def _format_subsection_title(item: dict[str, Any], kind_label: str) -> str:
|
||||
"""Return '{kind_label} {number}: {title}' e.g. 'טענת סף 1: חוסר סמכות'."""
|
||||
number = item.get("number") or ""
|
||||
title = item.get("title", "").strip()
|
||||
if number and title:
|
||||
return f"{kind_label} {number}: {title}"
|
||||
if title:
|
||||
return title
|
||||
return f"{kind_label} {number}".strip()
|
||||
|
||||
|
||||
def _write_subsection(
|
||||
doc: DocumentT,
|
||||
item: dict[str, Any],
|
||||
precedents_for_item: list[dict[str, Any]],
|
||||
kind_label: str,
|
||||
) -> None:
|
||||
# Subsection header — bolded Normal paragraph, not a Heading,
|
||||
# so it visually sits under the section's Heading 2.
|
||||
header_text = _format_subsection_title(item, kind_label)
|
||||
p = doc.add_paragraph(style="Normal")
|
||||
_mark_paragraph_rtl(p)
|
||||
run = p.add_run(_no_dash(header_text))
|
||||
run.bold = True
|
||||
_mark_run_rtl(run)
|
||||
|
||||
# Regular fields (party positions, legal questions, etc.)
|
||||
for field in item.get("fields", []):
|
||||
label = field.get("label", "").strip()
|
||||
content = field.get("content", "").strip()
|
||||
if not label:
|
||||
continue
|
||||
_add_multiline_value(doc, label, content)
|
||||
|
||||
# Chair position — special handling: always render, use placeholder if empty.
|
||||
chair_position = (item.get("chair_position") or "").strip()
|
||||
if chair_position:
|
||||
_add_multiline_value(doc, CHAIR_POSITION_LABEL, chair_position)
|
||||
else:
|
||||
_add_label_value(
|
||||
doc, CHAIR_POSITION_LABEL, CHAIR_POSITION_PLACEHOLDER,
|
||||
value_italic=True,
|
||||
)
|
||||
|
||||
# Precedents attached to this subsection
|
||||
if precedents_for_item:
|
||||
p = doc.add_paragraph(style="Normal")
|
||||
_mark_paragraph_rtl(p)
|
||||
run = p.add_run("פסיקה רלוונטית:")
|
||||
run.bold = True
|
||||
_mark_run_rtl(run)
|
||||
for prec in precedents_for_item:
|
||||
quote = (prec.get("quote") or "").strip()
|
||||
citation = (prec.get("citation") or "").strip()
|
||||
if quote:
|
||||
_add_paragraph(doc, quote, "Quote")
|
||||
if citation:
|
||||
cite_p = doc.add_paragraph(style="Normal")
|
||||
_mark_paragraph_rtl(cite_p)
|
||||
cite_run = cite_p.add_run(_no_dash(citation))
|
||||
cite_run.italic = True
|
||||
_mark_run_rtl(cite_run)
|
||||
|
||||
|
||||
def _add_background_section(
|
||||
doc: DocumentT, title: str, body: str | None
|
||||
) -> None:
|
||||
"""Render a background H2 section (e.g. "רקע דיוני") from a prose
|
||||
body. Lines are routed through `_emit_content_line` so bullets,
|
||||
`**labels:**`, and (א) enumerations all get the template styles.
|
||||
"""
|
||||
if not body or not body.strip():
|
||||
return
|
||||
_add_paragraph(doc, title, "Heading 2")
|
||||
for raw in body.splitlines():
|
||||
if not raw.strip():
|
||||
continue
|
||||
_emit_content_line(doc, raw)
|
||||
|
||||
|
||||
def _group_precedents(
|
||||
precedents: list[dict[str, Any]],
|
||||
) -> tuple[list[dict], dict[str, list[dict]]]:
|
||||
"""Split the flat precedent list into case-level and per-section maps.
|
||||
|
||||
Returns (case_level_precedents, {section_id: [precedents]}).
|
||||
"""
|
||||
case_level: list[dict] = []
|
||||
by_section: dict[str, list[dict]] = {}
|
||||
for p in precedents:
|
||||
sid = p.get("section_id")
|
||||
if sid is None:
|
||||
case_level.append(p)
|
||||
else:
|
||||
by_section.setdefault(sid, []).append(p)
|
||||
return case_level, by_section
|
||||
|
||||
|
||||
def _next_version(export_dir: Path) -> int:
|
||||
"""Return the next version number for ניתוח-משפטי-v{N}.docx."""
|
||||
existing = sorted(export_dir.glob("ניתוח-משפטי-v*.docx"))
|
||||
next_ver = 1
|
||||
for p in existing:
|
||||
try:
|
||||
ver = int(p.stem.split("-v")[1])
|
||||
except (IndexError, ValueError):
|
||||
continue
|
||||
next_ver = max(next_ver, ver + 1)
|
||||
return next_ver
|
||||
|
||||
|
||||
async def build_analysis_docx(case_number: str) -> Path:
|
||||
"""Build a DOCX of the legal analysis for a case using the template
|
||||
styles, and save a versioned copy under the case's exports folder.
|
||||
|
||||
Raises FileNotFoundError if no analysis file or template exists.
|
||||
"""
|
||||
if not TEMPLATE_PATH.exists():
|
||||
raise FileNotFoundError(
|
||||
f"Template not found at {TEMPLATE_PATH}. "
|
||||
"Run: python scripts/convert_decision_template.py"
|
||||
)
|
||||
|
||||
case_dir = config.find_case_dir(case_number)
|
||||
analysis_path = case_dir / "documents" / "research" / "analysis-and-research.md"
|
||||
if not analysis_path.exists():
|
||||
raise FileNotFoundError(
|
||||
f"Analysis file not found for case {case_number}"
|
||||
)
|
||||
|
||||
parsed = research_md.parse(analysis_path)
|
||||
|
||||
# Resolve case_id so we can fetch precedents. Missing case → proceed
|
||||
# without precedents rather than failing the export.
|
||||
case_level_precedents: list[dict] = []
|
||||
precedents_by_section: dict[str, list[dict]] = {}
|
||||
case = await db.get_case_by_number(case_number)
|
||||
if case:
|
||||
precedents = await db.list_case_precedents(UUID(case["id"]))
|
||||
case_level_precedents, precedents_by_section = _group_precedents(precedents)
|
||||
|
||||
doc = Document(str(TEMPLATE_PATH))
|
||||
_clear_body(doc)
|
||||
|
||||
# Document title
|
||||
header = parsed.get("header", {})
|
||||
date = header.get("date", "").strip()
|
||||
title_text = f"ניתוח משפטי וכתיבת עמדה בערר {case_number}"
|
||||
_add_paragraph(doc, title_text, "Heading 1")
|
||||
if date:
|
||||
p_date = doc.add_paragraph(style="Normal")
|
||||
_mark_paragraph_rtl(p_date)
|
||||
run_date = p_date.add_run(f"תאריך: {date}")
|
||||
_mark_run_rtl(run_date)
|
||||
|
||||
# Background sections — printed first so the reader gets context
|
||||
# before any claims/precedents. These come only in the exported DOCX,
|
||||
# not in the web UI (the UI renders them elsewhere).
|
||||
_add_background_section(doc, "רקע לניתוח", parsed.get("represented_party"))
|
||||
_add_background_section(doc, "רקע דיוני", parsed.get("procedural_background"))
|
||||
_add_background_section(doc, "עובדות מוסכמות", parsed.get("agreed_facts"))
|
||||
_add_background_section(
|
||||
doc, "עובדות שנויות במחלוקת", parsed.get("disputed_facts")
|
||||
)
|
||||
|
||||
# Case-level precedents appear at the top (they cut across claims/issues)
|
||||
if case_level_precedents:
|
||||
_add_paragraph(doc, "פסיקה כללית", "Heading 2")
|
||||
for prec in case_level_precedents:
|
||||
quote = (prec.get("quote") or "").strip()
|
||||
citation = (prec.get("citation") or "").strip()
|
||||
if quote:
|
||||
_add_paragraph(doc, quote, "Quote")
|
||||
if citation:
|
||||
cp = doc.add_paragraph(style="Normal")
|
||||
_mark_paragraph_rtl(cp)
|
||||
cr = cp.add_run(_no_dash(citation))
|
||||
cr.italic = True
|
||||
_mark_run_rtl(cr)
|
||||
|
||||
# Threshold claims
|
||||
threshold_claims = parsed.get("threshold_claims", [])
|
||||
if threshold_claims:
|
||||
_add_paragraph(doc, "טענות סף", "Heading 2")
|
||||
for tc in threshold_claims:
|
||||
_write_subsection(
|
||||
doc, tc, precedents_by_section.get(tc["id"], []), "טענת סף"
|
||||
)
|
||||
|
||||
# Issues
|
||||
issues = parsed.get("issues", [])
|
||||
if issues:
|
||||
_add_paragraph(doc, "סוגיות להכרעה", "Heading 2")
|
||||
for iss in issues:
|
||||
_write_subsection(
|
||||
doc, iss, precedents_by_section.get(iss["id"], []), "סוגיה"
|
||||
)
|
||||
|
||||
# Conclusions
|
||||
conclusions = (parsed.get("conclusions") or "").strip()
|
||||
if conclusions:
|
||||
_add_paragraph(doc, "מסקנות", "Heading 2")
|
||||
for raw in conclusions.splitlines():
|
||||
if not raw.strip():
|
||||
continue
|
||||
_emit_content_line(doc, raw)
|
||||
|
||||
# Save versioned
|
||||
export_dir = case_dir / "exports"
|
||||
export_dir.mkdir(parents=True, exist_ok=True)
|
||||
version = _next_version(export_dir)
|
||||
out_path = export_dir / f"ניתוח-משפטי-v{version}.docx"
|
||||
doc.save(str(out_path))
|
||||
return out_path
|
||||
Reference in New Issue
Block a user