fix(style-acq T9): מספור-אוטומטי אמיתי בייצוא DOCX (היה ללא מספור)

באג: ה-exporter הסיר את הקידומת "N." והחיל סגנון "List Paragraph" — שאין לו
numPr בתבנית (אין numbering.xml) → ההחלטות יצאו **ללא מספור** כלל.

- docx_exporter._ensure_decision_numbering: מזריק abstractNum עשרוני (RTL,
  lvlJc=right) + num לחלק-המספור פעם אחת; _apply_list_numbering מחבר כל
  פסקת-גוף לרשימה הרציפה. מספור Word אמיתי — מתעדכן בעריכה, copy/paste נקי.
  אומת מבנית: numId יחיד, decimal, שתי פסקאות→אותו numId, docx נשמר.
- התאמת ANTI_PATTERNS (T7): הוסר manual_paragraph_numbers — "N." בתחילת-שורה
  הוא ה-signal הנדרש לייצוא, לא אנטי-דפוס. נשאר inline (1)..(2)/markdown/bullets.
- voice-fingerprint §3.1: תוקן — הכותב כן מקדים "N. " בתחילת-שורה (signal),
  הייצוא ממיר ל-auto-numbering. סתירה קודמת ("אל תקליד מספרים") יושבה.

⚠️ אימות-מבנה עבר; אימות ויזואלי ב-Word מומלץ על ייצוא ראשון. G11.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-06 19:23:29 +00:00
parent f20a3a09fd
commit 3c68383e86
3 changed files with 91 additions and 12 deletions

View File

@@ -112,6 +112,84 @@ def _suppress_paragraph_numbering(paragraph) -> None:
pPr.append(numPr)
def _ensure_decision_numbering(doc) -> int:
"""T9 — define a single continuous decimal list (RTL) and return its numId.
Dafna's decisions are ALWAYS sequentially numbered (1. 2. 3. ...). The template
ships no numbering definition, so previously the body paragraphs were stripped of
their manual "N." prefix and styled "List Paragraph" — which carries NO numPr,
yielding UNNUMBERED output. Here we inject one decimal abstractNum + num into the
numbering part once per document; body paragraphs then reference it (real Word
auto-numbering → renumbers automatically, copy-pastes cleanly).
"""
cached = getattr(doc, "_decision_num_id", None)
if cached is not None:
return cached
numbering = doc.part.numbering_part.element # <w:numbering>
def _next_id(tag: str, attr: str) -> int:
ids = [int(el.get(qn(attr))) for el in numbering.findall(qn(tag))
if el.get(qn(attr)) and el.get(qn(attr)).isdigit()]
return (max(ids) + 1) if ids else 1
abstract_id = _next_id("w:abstractNum", "w:abstractNumId")
num_id = _next_id("w:num", "w:numId")
abstract = OxmlElement("w:abstractNum")
abstract.set(qn("w:abstractNumId"), str(abstract_id))
mlt = OxmlElement("w:multiLevelType")
mlt.set(qn("w:val"), "singleLevel")
abstract.append(mlt)
lvl = OxmlElement("w:lvl")
lvl.set(qn("w:ilvl"), "0")
for tag, val in (("w:start", "1"), ("w:numFmt", "decimal"),
("w:lvlText", "%1."), ("w:lvlJc", "right")):
el = OxmlElement(tag)
el.set(qn("w:val"), val)
lvl.append(el)
lvl_ppr = OxmlElement("w:pPr")
ind = OxmlElement("w:ind")
ind.set(qn("w:start"), "720")
ind.set(qn("w:hanging"), "360")
lvl_ppr.append(ind)
lvl.append(lvl_ppr)
abstract.append(lvl)
num = OxmlElement("w:num")
num.set(qn("w:numId"), str(num_id))
anum_ref = OxmlElement("w:abstractNumId")
anum_ref.set(qn("w:val"), str(abstract_id))
num.append(anum_ref)
# abstractNum elements must precede num elements in <w:numbering>.
last_abstract = numbering.findall(qn("w:abstractNum"))
if last_abstract:
last_abstract[-1].addnext(abstract)
else:
numbering.insert(0, abstract)
numbering.append(num)
doc._decision_num_id = num_id
return num_id
def _apply_list_numbering(paragraph, num_id: int) -> None:
"""Attach paragraph to the continuous decision list (real auto-numbering)."""
pPr = paragraph._p.get_or_add_pPr()
existing = pPr.find(qn("w:numPr"))
if existing is not None:
pPr.remove(existing)
numPr = OxmlElement("w:numPr")
ilvl = OxmlElement("w:ilvl")
ilvl.set(qn("w:val"), "0")
nid = OxmlElement("w:numId")
nid.set(qn("w:val"), str(num_id))
numPr.append(ilvl)
numPr.append(nid)
pPr.append(numPr)
def _clear_body(doc) -> None:
"""Remove all paragraphs in the document body while keeping sectPr.
@@ -485,12 +563,15 @@ def _write_block_to_docx(doc, block_id: str, title: str, content: str) -> None:
_add_image_placeholder(doc, stripped.strip("[]📷 "))
continue
# Numbered body paragraph ("1. text") → List Paragraph with auto-num.
# The literal prefix is dropped; Word renders "1. 2. 3. ..." via numId.
# Numbered body paragraph ("1. text") → real Word auto-numbering (T9).
# The literal prefix is dropped and a numPr referencing the document's
# continuous decimal list is attached, so Word renders "1. 2. 3. ..."
# itself (renumbers on edit, copy-pastes without stray digits).
num_match = _NUM_PREFIX_RE.match(stripped)
if num_match:
body_text = num_match.group(2).strip()
_add_styled_paragraph(doc, body_text, style="List Paragraph")
para = _add_styled_paragraph(doc, body_text, style="List Paragraph")
_apply_list_numbering(para, _ensure_decision_numbering(doc))
continue
_add_styled_paragraph(doc, stripped, style="Normal")

View File

@@ -43,13 +43,11 @@ GOLDEN_RATIOS: dict[str, dict[str, tuple[int, int]]] = {
}
# ── Anti-patterns (what Dafna avoids) — detectable signals for style-distance (T7) ──
# Derived from daphna-voice-fingerprint.md §3 (corrected 2026-06-06: sequential
# paragraph numbering is REQUIRED — applied as Word auto-numbering at export — so the
# anti-pattern is MANUAL numbers typed as text, not numbering itself).
# Derived from daphna-voice-fingerprint.md §3 (corrected 2026-06-06). NOTE: a leading
# "N." per paragraph is NOT an anti-pattern — it is the REQUIRED signal the DOCX
# exporter converts to real Word auto-numbering (docx_exporter._ensure_decision_numbering).
# The real anti-patterns are mid-paragraph mini-lists, markdown, and bullets.
ANTI_PATTERNS: list[dict] = [
{"name": "manual_paragraph_numbers",
"regex": r"(?m)^\s*\d{1,3}\.\s",
"note": "מספרים ידניים כטקסט בראש פסקה — אמורים להיות auto-numbering בייצוא"},
{"name": "inline_numbered_fragments",
"regex": r"\([0-9]\)[^\n]{0,200}\([0-9]\)",
"note": "פיצול טיעון לרשימת-מיני (1)...(2) בתוך פסקת-אנליזה"},