fix(style-acq T9): מספור-אוטומטי אמיתי בייצוא DOCX (היה ללא מספור)

באג: ה-exporter הסיר את הקידומת "N." והחיל סגנון "List Paragraph" — שאין לו numPr בתבנית (אין numbering.xml) → ההחלטות יצאו **ללא מספור** כלל. - docx_exporter._ensure_decision_numbering: מזריק abstractNum עשרוני (RTL, lvlJc=right) + num לחלק-המספור פעם אחת; _apply_list_numbering מחבר כל פסקת-גוף לרשימה הרציפה. מספור Word אמיתי — מתעדכן בעריכה, copy/paste נקי. אומת מבנית: numId יחיד, decimal, שתי פסקאות→אותו numId, docx נשמר. - התאמת ANTI_PATTERNS (T7): הוסר manual_paragraph_numbers — "N." בתחילת-שורה הוא ה-signal הנדרש לייצוא, לא אנטי-דפוס. נשאר inline (1)..(2)/markdown/bullets. - voice-fingerprint §3.1: תוקן — הכותב כן מקדים "N. " בתחילת-שורה (signal), הייצוא ממיר ל-auto-numbering. סתירה קודמת ("אל תקליד מספרים") יושבה. ⚠️ אימות-מבנה עבר; אימות ויזואלי ב-Word מומלץ על ייצוא ראשון. G11. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-06 19:23:29 +00:00
parent f20a3a09fd
commit 3c68383e86
3 changed files with 91 additions and 12 deletions
--- a/mcp-server/src/legal_mcp/services/docx_exporter.py
+++ b/mcp-server/src/legal_mcp/services/docx_exporter.py
@@ -112,6 +112,84 @@ def _suppress_paragraph_numbering(paragraph) -> None:
    pPr.append(numPr)


+def _ensure_decision_numbering(doc) -> int:
+    """T9 — define a single continuous decimal list (RTL) and return its numId.
+
+    Dafna's decisions are ALWAYS sequentially numbered (1. 2. 3. ...). The template
+    ships no numbering definition, so previously the body paragraphs were stripped of
+    their manual "N." prefix and styled "List Paragraph" — which carries NO numPr,
+    yielding UNNUMBERED output. Here we inject one decimal abstractNum + num into the
+    numbering part once per document; body paragraphs then reference it (real Word
+    auto-numbering → renumbers automatically, copy-pastes cleanly).
+    """
+    cached = getattr(doc, "_decision_num_id", None)
+    if cached is not None:
+        return cached
+
+    numbering = doc.part.numbering_part.element  # <w:numbering>
+
+    def _next_id(tag: str, attr: str) -> int:
+        ids = [int(el.get(qn(attr))) for el in numbering.findall(qn(tag))
+               if el.get(qn(attr)) and el.get(qn(attr)).isdigit()]
+        return (max(ids) + 1) if ids else 1
+
+    abstract_id = _next_id("w:abstractNum", "w:abstractNumId")
+    num_id = _next_id("w:num", "w:numId")
+
+    abstract = OxmlElement("w:abstractNum")
+    abstract.set(qn("w:abstractNumId"), str(abstract_id))
+    mlt = OxmlElement("w:multiLevelType")
+    mlt.set(qn("w:val"), "singleLevel")
+    abstract.append(mlt)
+    lvl = OxmlElement("w:lvl")
+    lvl.set(qn("w:ilvl"), "0")
+    for tag, val in (("w:start", "1"), ("w:numFmt", "decimal"),
+                     ("w:lvlText", "%1."), ("w:lvlJc", "right")):
+        el = OxmlElement(tag)
+        el.set(qn("w:val"), val)
+        lvl.append(el)
+    lvl_ppr = OxmlElement("w:pPr")
+    ind = OxmlElement("w:ind")
+    ind.set(qn("w:start"), "720")
+    ind.set(qn("w:hanging"), "360")
+    lvl_ppr.append(ind)
+    lvl.append(lvl_ppr)
+    abstract.append(lvl)
+
+    num = OxmlElement("w:num")
+    num.set(qn("w:numId"), str(num_id))
+    anum_ref = OxmlElement("w:abstractNumId")
+    anum_ref.set(qn("w:val"), str(abstract_id))
+    num.append(anum_ref)
+
+    # abstractNum elements must precede num elements in <w:numbering>.
+    last_abstract = numbering.findall(qn("w:abstractNum"))
+    if last_abstract:
+        last_abstract[-1].addnext(abstract)
+    else:
+        numbering.insert(0, abstract)
+    numbering.append(num)
+
+    doc._decision_num_id = num_id
+    return num_id
+
+
+def _apply_list_numbering(paragraph, num_id: int) -> None:
+    """Attach paragraph to the continuous decision list (real auto-numbering)."""
+    pPr = paragraph._p.get_or_add_pPr()
+    existing = pPr.find(qn("w:numPr"))
+    if existing is not None:
+        pPr.remove(existing)
+    numPr = OxmlElement("w:numPr")
+    ilvl = OxmlElement("w:ilvl")
+    ilvl.set(qn("w:val"), "0")
+    nid = OxmlElement("w:numId")
+    nid.set(qn("w:val"), str(num_id))
+    numPr.append(ilvl)
+    numPr.append(nid)
+    pPr.append(numPr)
+
+
 def _clear_body(doc) -> None:
    """Remove all paragraphs in the document body while keeping sectPr.

@@ -485,12 +563,15 @@ def _write_block_to_docx(doc, block_id: str, title: str, content: str) -> None:
            _add_image_placeholder(doc, stripped.strip("[]📷 "))
            continue

-        # Numbered body paragraph ("1. text") → List Paragraph with auto-num.
-        # The literal prefix is dropped; Word renders "1. 2. 3. ..." via numId.
+        # Numbered body paragraph ("1. text") → real Word auto-numbering (T9).
+        # The literal prefix is dropped and a numPr referencing the document's
+        # continuous decimal list is attached, so Word renders "1. 2. 3. ..."
+        # itself (renumbers on edit, copy-pastes without stray digits).
        num_match = _NUM_PREFIX_RE.match(stripped)
        if num_match:
            body_text = num_match.group(2).strip()
-            _add_styled_paragraph(doc, body_text, style="List Paragraph")
+            para = _add_styled_paragraph(doc, body_text, style="List Paragraph")
+            _apply_list_numbering(para, _ensure_decision_numbering(doc))
            continue

        _add_styled_paragraph(doc, stripped, style="Normal")
--- a/mcp-server/src/legal_mcp/services/lessons.py
+++ b/mcp-server/src/legal_mcp/services/lessons.py
@@ -43,13 +43,11 @@ GOLDEN_RATIOS: dict[str, dict[str, tuple[int, int]]] = {
 }

 # ── Anti-patterns (what Dafna avoids) — detectable signals for style-distance (T7) ──
-# Derived from daphna-voice-fingerprint.md §3 (corrected 2026-06-06: sequential
-# paragraph numbering is REQUIRED — applied as Word auto-numbering at export — so the
-# anti-pattern is MANUAL numbers typed as text, not numbering itself).
+# Derived from daphna-voice-fingerprint.md §3 (corrected 2026-06-06). NOTE: a leading
+# "N." per paragraph is NOT an anti-pattern — it is the REQUIRED signal the DOCX
+# exporter converts to real Word auto-numbering (docx_exporter._ensure_decision_numbering).
+# The real anti-patterns are mid-paragraph mini-lists, markdown, and bullets.
 ANTI_PATTERNS: list[dict] = [
-    {"name": "manual_paragraph_numbers",
-     "regex": r"(?m)^\s*\d{1,3}\.\s",
-     "note": "מספרים ידניים כטקסט בראש פסקה — אמורים להיות auto-numbering בייצוא"},
    {"name": "inline_numbered_fragments",
     "regex": r"\([0-9]\)[^\n]{0,200}\([0-9]\)",
     "note": "פיצול טיעון לרשימת-מיני (1)...(2) בתוך פסקת-אנליזה"},