From 726498126dc1373a844745572147d8bbbed6d3c5 Mon Sep 17 00:00:00 2001 From: Chaim Date: Thu, 16 Apr 2026 18:49:30 +0000 Subject: [PATCH] Add Track Changes architecture for draft revisions (CMP + CMPA) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes critical bug in 1033-25: user-uploaded עריכה-*.docx files were orphaned on disk while exports kept rebuilding from stale DB blocks. New architecture: - User-uploaded DOCX becomes the source of truth (cases.active_draft_path) - System edits via XML surgery with real Word / revisions - User can Accept/Reject each change from within Word Components: - docx_reviser.py: XML surgery for Track Changes (15 tests) - docx_retrofit.py: retroactive bookmark injection with Hebrew marker detection + heading heuristic (9 tests) - docx_exporter.py: emits bookmarks around each of the 12 blocks - 3 new MCP tools: apply_user_edit, list_bookmarks, revise_draft - 4 new/updated endpoints: upload (auto-registers active draft), /exports/revise, /exports/bookmarks, /exports/{filename}/retrofit, /active-draft - DB migration: cases.active_draft_path column - UI: correct banner using real v-numbers, "מקור האמת" badge, detailed upload toast with bookmarks_added/missing_blocks - agents: legal-exporter (3 export modes), legal-ceo (stage G for revision handling), legal-writer (revision mode) Multi-tenancy: - Works for both CMP (1xxx cases) and CMPA (8xxx/9xxx cases) - New revise-draft skill added to both companies - deploy-track-changes.sh syncs skills CMP ↔ CMPA - retrofit_case.py: one-off retrofit of existing files Tests: 34 passing (15 reviser + 9 retrofit + 4 exporter bookmarks + 6 e2e) Co-Authored-By: Claude Opus 4.7 (1M context) --- .claude/agents/legal-ceo.md | 44 ++ .claude/agents/legal-exporter.md | 46 +- .claude/agents/legal-writer.md | 22 + mcp-server/src/legal_mcp/server.py | 19 + mcp-server/src/legal_mcp/services/db.py | 24 + .../src/legal_mcp/services/docx_exporter.py | 63 ++- .../src/legal_mcp/services/docx_retrofit.py | 290 ++++++++++ .../src/legal_mcp/services/docx_reviser.py | 514 ++++++++++++++++++ mcp-server/src/legal_mcp/tools/drafting.py | 163 ++++++ mcp-server/tests/__init__.py | 0 .../tests/test_docx_exporter_bookmarks.py | 103 ++++ mcp-server/tests/test_docx_retrofit.py | 141 +++++ mcp-server/tests/test_docx_reviser.py | 342 ++++++++++++ mcp-server/tests/test_track_changes_e2e.py | 237 ++++++++ scripts/SCRIPTS.md | 3 + scripts/deploy-track-changes.sh | 86 +++ scripts/retrofit_case.py | 84 +++ web-ui/src/components/cases/drafts-panel.tsx | 55 +- web-ui/src/lib/api/exports.ts | 106 +++- web/app.py | 100 +++- 20 files changed, 2419 insertions(+), 23 deletions(-) create mode 100644 mcp-server/src/legal_mcp/services/docx_retrofit.py create mode 100644 mcp-server/src/legal_mcp/services/docx_reviser.py create mode 100644 mcp-server/tests/__init__.py create mode 100644 mcp-server/tests/test_docx_exporter_bookmarks.py create mode 100644 mcp-server/tests/test_docx_retrofit.py create mode 100644 mcp-server/tests/test_docx_reviser.py create mode 100644 mcp-server/tests/test_track_changes_e2e.py create mode 100755 scripts/deploy-track-changes.sh create mode 100755 scripts/retrofit_case.py diff --git a/.claude/agents/legal-ceo.md b/.claude/agents/legal-ceo.md index 2a36952..2f10695 100644 --- a/.claude/agents/legal-ceo.md +++ b/.claude/agents/legal-ceo.md @@ -25,6 +25,9 @@ tools: - mcp__legal-ai__brainstorm_directions - mcp__legal-ai__validate_decision - mcp__legal-ai__export_docx + - mcp__legal-ai__apply_user_edit + - mcp__legal-ai__list_bookmarks + - mcp__legal-ai__revise_draft --- # עוזר משפטי — מנהל תהליך כתיבת החלטות @@ -363,6 +366,47 @@ python3 /home/chaim/legal-ai/scripts/notify.py \ **מתי לחזור אחורה:** אם דוח QA מצביע על בעיה מתודולוגית (סילוגיזם חסר, כיוון לא תואם chair_directions) — חזור לשלב C/D ולא רק לכותב. +### שלב G: טיפול בעריכה מהמשתמש (אחרי ייצוא) + +**מתי:** המשתמש העלה `עריכה-v*.docx` (אחרי שייצאנו `טיוטה-v*.docx` קודמת) וכתב תגובה בקומנט. + +**מטרה:** המשתמש ערך את הטיוטה ב-Word ושמר כ-`עריכה-v*.docx`. הוא רוצה שתתייחס לעריכה שלו כבסיס החדש, ואולי לבצע שינויים ממוקדים ע"ג העריכה. כל שינוי שאתה מבצע חייב להיות ב-**Track Changes** כדי שהמשתמש יראה מה שינית ויוכל לאשר/לדחות. + +**תהליך:** + +1. קרא את הקומנט האחרון של המשתמש — האם הוא רק מעדכן ("העליתי טיוטה ערוכה"), או מבקש שינוי ספציפי ("הוסף פסק הלכה X")? + +2. הרץ `apply_user_edit(case_number, "עריכה-v{N}.docx")` — זה: + - מזריק bookmarks אם חסר (`block-alef` עד `block-yod-bet`) + - מגדיר את הקובץ כ-`active_draft_path` + - מחזיר `bookmarks_added` ו-`missing_blocks` + +3. אם המשתמש רק עדכן (לא ביקש שינוי): + - דווח בקומנט: "העריכה נקלטה. זיהיתי N בלוקים. אם יש שינויים שתרצה שאבצע — שלח אותם כהוראה." + - **אל תייצר `טיוטה-v{N+1}.docx` חדשה** + +4. אם המשתמש ביקש שינוי: + - קרא `list_bookmarks(case_number)` לדעת אילו אנקורים זמינים + - אם הבקשה מצריכה ניסוח חדש (למשל הוספת פסק הלכה, שכתוב בלוק) — הפעל את **legal-writer** עם `revision_mode: true` והוראה מדויקת לניסוח. הכותב יחזיר תוכן מנוסח בסגנון דפנה (לא ישמור ב-DB — ה-revision חי בקובץ) + - בנה רשימת revisions (JSON): + ```json + [{ + "id": "r1", + "type": "insert_after", + "anchor_bookmark": "block-yod", + "content": "<הטקסט שהכותב ניסח>", + "style": "body", + "reason": "הוספת פסק הלכה X לפי בקשת יו\"ר" + }] + ``` + - הרץ `revise_draft(case_number, revisions_json)` — ייצור `טיוטה-v{N+1}.docx` עם Track Changes + - פרסם comment: "טיוטה מעודכנת: `טיוטה-v{N+1}.docx`. השינויים מסומנים כ-Track Changes — פתח ב-Word ואשר/דחה." + +**חשוב:** +- לעולם אל תקרא ל-`export_docx` כשיש `active_draft_path` שהוא `עריכה-*` — זה ידרוס את העריכה של המשתמש בגרסה ישנה מ-DB. +- השתמש ב-`revise_draft` בלבד במצב ג'. +- אם המשתמש ביקש שינוי מאסיבי (שכתוב מלא של בלוק) — עדיף להציע לו לעבוד על זה בעריכה נוספת מצדו ולא לייצר revisions ארוכים. + ## מפת סטטוסים **סטטוסים של התיק (`cases.status`) — כל סטטוס מתאים לפעולה אחת בדיוק:** diff --git a/.claude/agents/legal-exporter.md b/.claude/agents/legal-exporter.md index 0c76093..23666bb 100644 --- a/.claude/agents/legal-exporter.md +++ b/.claude/agents/legal-exporter.md @@ -14,6 +14,9 @@ tools: - mcp__legal-ai__get_block_context - mcp__legal-ai__workflow_status - mcp__legal-ai__export_docx + - mcp__legal-ai__apply_user_edit + - mcp__legal-ai__list_bookmarks + - mcp__legal-ai__revise_draft - mcp__legal-ai__get_style_guide - mcp__legal-ai__validate_decision --- @@ -26,6 +29,14 @@ tools: עבוד תמיד בעברית. +## סינון תיקים לפי חברה + +⚠️ **אתה אחראי רק על תיקים ששייכים לחברה שלך** (`$PAPERCLIP_COMPANY_ID`): +- CMP (`42a7acd0-...`) → רק תיקים **1xxx** (רישוי ובניה) +- CMPA (`8639e837-...`) → רק תיקים **8xxx, 9xxx** (היטל השבחה / פיצויים) + +אם issue מכוון לתיק שלא בטווח שלך — סרב ודווח ב-comment. + ## סקייל ייצוא **חובה לקרוא לפני כל ייצוא:** @@ -45,6 +56,16 @@ tools: 2. קרא פרטי תיק (`case_get`) 3. בדוק סטטוס workflow (`workflow_status`) — ודא שהכתיבה הושלמה **ושבדיקת QA עברה בהצלחה** +### שלב 1.5: זיהוי active_draft ועריכות ממתינות + +1. בדוק אם ב-`data/cases/{case_number}/exports/` יש קבצי `עריכה-v*.docx` (עלו ע"י המשתמש) +2. אם כן — הפעל `apply_user_edit` עם שם הקובץ האחרון; הכלי יזריק bookmarks ויגדיר את הקובץ כמקור האמת +3. אם במצב הזה המשתמש לא ביקש revisions מפורשים — **אל תייצא מחדש** (הקובץ שהועלה *הוא* הטיוטה העדכנית). דווח למשתמש ששמרת את העריכה כמקור האמת, והצע revisions אם נדרש +4. אם המשתמש ביקש שינויים (למשל "הוסף פסק הלכה X" / "תקן את הבלוק"): + - הרץ `list_bookmarks` כדי לראות אילו אנקורים זמינים + - בנה רשימת revisions (ראה פורמט למטה) + - הרץ `revise_draft` — זה ייצור `טיוטה-v{N+1}.docx` חדשה עם Track Changes + ### שלב 2: בדיקה סופית מהירה 1. הרץ `validate_decision` — בדוק שאין כשלים קריטיים 2. בדוק שכל 12 הבלוקים (א-יב) קיימים ומלאים @@ -54,9 +75,30 @@ tools: 6. בדוק שסטטוס ה-QA הוא "passed" — אם ה-QA לא רץ או נכשל, **אל תייצא** ### שלב 3: ייצוא DOCX + +**מצב א' — ייצוא ראשוני (אין active_draft):** 1. קרא את סקייל legal-docx (SKILL.md) כדי להבין את דרישות העיצוב -2. השתמש ב-`export_docx` לייצוא ראשוני לקובץ זמני -3. אם הסקריפט `create-legal-doc.js` מתאים יותר (למשל לעיצוב מותאם) — השתמש בו +2. השתמש ב-`export_docx` לייצוא ראשוני +3. ה-tool יוסיף bookmarks ב-12 הבלוקים ויסמן את הקובץ כ-active_draft_path + +**מצב ב' — יש active_draft + המשתמש ביקש שינויים:** + +1. בנה רשימת revisions ב-JSON. פורמט כל revision: + ```json + { + "id": "r1", + "type": "insert_after", // או insert_before, replace, delete + "anchor_bookmark": "block-yod", // מ-list_bookmarks + "content": "וכך נפסק בעניין פלוני. בבג\"ץ 1234/21 קבע השופט...", + "style": "body", // או heading, quote + "reason": "הוספת פסק הלכה שחסר לפי בקשת יו\"ר" + } + ``` +2. הפעל `revise_draft` — ייצור `טיוטה-v{N+1}.docx` עם `` / `` — המשתמש יקבל/ידחה ב-Word +3. דווח למשתמש על הגרסה החדשה ו-applied/failed count + +**מצב ג' — יש active_draft אך המשתמש לא ביקש שינוי ספציפי:** +הטיוטה כבר עדכנית (המשתמש ערך ב-Word). אל תייצא מחדש. דווח: "הקובץ העדכני הוא ``. רוצה שאבצע שינויים ממוקדים?" ### שלב 4: שמירה מגורסת 1. צור תיקייה `~/legal-ai/data/cases/{מספר-ערר}/exports/` (אם לא קיימת) diff --git a/.claude/agents/legal-writer.md b/.claude/agents/legal-writer.md index c9ea6a2..c796d01 100644 --- a/.claude/agents/legal-writer.md +++ b/.claude/agents/legal-writer.md @@ -78,6 +78,28 @@ tools: ## תהליך עבודה +### מצב revision — תוספת נקודתית לטיוטה קיימת + +כש-CEO מבקש **תוספת נקודתית** (לא כתיבה מאפס) — למשל "הוסף פסק הלכה X בבלוק י" — המצב הוא: + +- המשתמש העלה `עריכה-v*.docx` והוא ה-`active_draft_path` +- נדרש ניסוח של פסקה/פסקאות בסגנון דפנה להכנסה ב-Track Changes +- **אסור להשתמש ב-`save_block_content`** — ה-revision חי בקובץ, לא ב-DB + +**זרימה:** + +1. קרא `get_block_context(case_number, block_id)` להקשר +2. קרא `get_style_guide()` לוודא סגנון דפנה +3. נסח את התוספת — טקסט עברי נקי, בלי placeholders (`X`, `...`, `[לציטוט]`), מוכן להכנסה ישירה ל-DOCX +4. החזר את הטקסט ל-CEO (בקומנט או כ-return value) — **לא** שומר ב-DB +5. CEO יקרא ל-`revise_draft` עם הטקסט שלך + +**דוגמה לפלט מצופה:** + +> בבג"ץ 1234/21 [פלוני נ' הוועדה המחוזית] קבע בית המשפט העליון כי הוועדה המקומית מחויבת לשקול שיקולי Y גם בהיעדר התנגדות מפורשת. הלכה זו חלה ישירות על ענייננו: הוועדה המקומית לא בחנה את Y, ודי בכך כדי להחזיר את הדיון לוועדה. + +--- + ### שלב 0: בדיקת הוראות וטיוטות לפני שתתחיל לכתוב, בדוק אם יש הנחיות ספציפיות: diff --git a/mcp-server/src/legal_mcp/server.py b/mcp-server/src/legal_mcp/server.py index 1160325..8f81aae 100644 --- a/mcp-server/src/legal_mcp/server.py +++ b/mcp-server/src/legal_mcp/server.py @@ -321,6 +321,25 @@ async def export_docx(case_number: str, output_path: str = "") -> str: return await drafting.export_docx(case_number, output_path) +@mcp.tool() +async def apply_user_edit(case_number: str, edit_filename: str) -> str: + """רישום עריכה שהעלה המשתמש (עריכה-v*.docx) כמקור האמת החדש — מזריק bookmarks אם חסר.""" + return await drafting.apply_user_edit(case_number, edit_filename) + + +@mcp.tool() +async def list_bookmarks(case_number: str) -> str: + """רשימת bookmarks הקיימים ב-active_draft של התיק (אנקורים ל-revisions).""" + return await drafting.list_bookmarks(case_number) + + +@mcp.tool() +async def revise_draft(case_number: str, revisions_json: str, + author: str = "מערכת AI") -> str: + """החלת revisions (Track Changes) על ה-active_draft, יוצר טיוטה-v{N+1}.docx חדשה.""" + return await drafting.revise_draft(case_number, revisions_json, author) + + @mcp.tool() async def analyze_style(appeal_subtype: str = "") -> str: """ניתוח סגנון על קורפוס ההחלטות של דפנה. מחלץ ושומר דפוסי כתיבה. סוג ערר: building_permit / betterment_levy / compensation_197 (ריק = הכל).""" diff --git a/mcp-server/src/legal_mcp/services/db.py b/mcp-server/src/legal_mcp/services/db.py index 4056790..656ac32 100644 --- a/mcp-server/src/legal_mcp/services/db.py +++ b/mcp-server/src/legal_mcp/services/db.py @@ -161,6 +161,11 @@ ALTER TABLE decisions ADD COLUMN IF NOT EXISTS outcome_reasoning TEXT DEFAULT '' ALTER TABLE cases ADD COLUMN IF NOT EXISTS appeal_type TEXT DEFAULT ''; ALTER TABLE cases ADD COLUMN IF NOT EXISTS practice_area TEXT DEFAULT 'appeals_committee'; ALTER TABLE cases ADD COLUMN IF NOT EXISTS appeal_subtype TEXT DEFAULT ''; +-- active_draft_path = path to the DOCX that is the current source of truth +-- for this case's decision text. Set to the latest טיוטה-v*.docx after export, +-- or the latest עריכה-v*.docx after user upload. Used by revise_draft to know +-- what file to base Track Changes revisions on. +ALTER TABLE cases ADD COLUMN IF NOT EXISTS active_draft_path TEXT; -- הרחבת style_corpus עם practice_area / appeal_subtype ALTER TABLE style_corpus ADD COLUMN IF NOT EXISTS practice_area TEXT DEFAULT 'appeals_committee'; @@ -520,6 +525,25 @@ async def get_case(case_id: UUID) -> dict | None: return _row_to_case(row) +async def set_active_draft_path(case_id: UUID, path: str | None) -> None: + """Update the case's active_draft_path (the DOCX that is source of truth).""" + pool = await get_pool() + async with pool.acquire() as conn: + await conn.execute( + "UPDATE cases SET active_draft_path = $1, updated_at = now() WHERE id = $2", + path, case_id, + ) + + +async def get_active_draft_path(case_id: UUID) -> str | None: + pool = await get_pool() + async with pool.acquire() as conn: + row = await conn.fetchrow( + "SELECT active_draft_path FROM cases WHERE id = $1", case_id, + ) + return row["active_draft_path"] if row else None + + async def get_case_by_number(case_number: str) -> dict | None: pool = await get_pool() async with pool.acquire() as conn: diff --git a/mcp-server/src/legal_mcp/services/docx_exporter.py b/mcp-server/src/legal_mcp/services/docx_exporter.py index 4dd0d4d..1022d86 100644 --- a/mcp-server/src/legal_mcp/services/docx_exporter.py +++ b/mcp-server/src/legal_mcp/services/docx_exporter.py @@ -58,6 +58,57 @@ def _set_rtl_section(section) -> None: sectPr.append(bidi) +# ── Bookmark helpers ────────────────────────────────────────────── + +# Keep a per-document bookmark id counter. Bookmarks must have unique ids +# across the whole document; we start from a high value to avoid collisions +# with whatever Word's default template already assigned. +_BOOKMARK_ID_START = 10000 + + +def _insert_bookmark_start(paragraph, name: str, bm_id: int) -> None: + """Insert a at the beginning of a paragraph.""" + el = OxmlElement("w:bookmarkStart") + el.set(qn("w:id"), str(bm_id)) + el.set(qn("w:name"), name) + paragraph._p.insert(0, el) + + +def _insert_bookmark_end(paragraph, bm_id: int) -> None: + """Insert a at the end of a paragraph.""" + el = OxmlElement("w:bookmarkEnd") + el.set(qn("w:id"), str(bm_id)) + paragraph._p.append(el) + + +def _wrap_block_with_bookmarks(doc, block_name: str, + write_block_fn, bm_counter: list[int]) -> None: + """Write a block with bookmarkStart before and bookmarkEnd after. + + Uses a mutable counter (list of one int) so the caller keeps state + across multiple blocks. + """ + # Record paragraph count before writing + body = doc.element.body + before_count = len([c for c in body if c.tag == qn("w:p")]) + + write_block_fn() + + after_count = len([c for c in body if c.tag == qn("w:p")]) + if after_count == before_count: + # Block produced no paragraphs — nothing to wrap + return + + # Use python-docx's paragraph indexing + first_new = doc.paragraphs[before_count] + last_new = doc.paragraphs[after_count - 1] + + bm_counter[0] += 1 + bm_id = bm_counter[0] + _insert_bookmark_start(first_new, block_name, bm_id) + _insert_bookmark_end(last_new, bm_id) + + def _add_paragraph(doc, text: str, style: str = "Normal", bold: bool = False, font_size=None, alignment=None, space_after: Pt | None = None) -> None: @@ -160,14 +211,22 @@ async def export_decision(case_id: UUID, output_path: str | None = None) -> str: section.right_margin = PAGE_MARGIN _set_rtl_section(section) - # Write blocks + # Write blocks with bookmarks wrapping each block (anchors for revisions) + bm_counter = [_BOOKMARK_ID_START] for block in blocks: block_id = block["block_id"] content = block["content"] or "" if not content.strip(): continue - _write_block_to_docx(doc, block_id, block["title"], content) + _wrap_block_with_bookmarks( + doc, + f"block-{block_id}", + lambda b=block, bid=block_id, c=content: _write_block_to_docx( + doc, bid, b["title"], c, + ), + bm_counter, + ) # Determine output path — versioned under cases/{case_number}/exports/ if not output_path: diff --git a/mcp-server/src/legal_mcp/services/docx_retrofit.py b/mcp-server/src/legal_mcp/services/docx_retrofit.py new file mode 100644 index 0000000..4291259 --- /dev/null +++ b/mcp-server/src/legal_mcp/services/docx_retrofit.py @@ -0,0 +1,290 @@ +"""הזרקת bookmarks רטרואקטיבית ל-DOCX שלא נוצרו ע"י ה-exporter. + +כאשר משתמש מעלה `עריכה-v*.docx` שנערך ב-Word מחוץ למערכת, אין בו את ה- +bookmarks שאנו מצפים להם (block-alef ... block-yod-bet). השירות כאן +מזהה את תחילת כל בלוק לפי סימני הפתיחה העבריים (א., ב., ... יב.) ב- +הפסקאות הראשונות שלו, ומזריק bookmarkStart/bookmarkEnd בהתאם. + +נעשה בצורה defensive — אם לא מצליחים לזהות בלוק, הוא פשוט לא יקבל +bookmark (`missing_blocks` בתוצאה). השרת אמור להתריע למשתמש. +""" + +from __future__ import annotations + +import logging +import re +import shutil +import zipfile +from io import BytesIO +from pathlib import Path + +from lxml import etree + +from legal_mcp.services.docx_reviser import ( + NSMAP, + _load_docx_xml, + _save_docx_xml, + _w, +) + +logger = logging.getLogger(__name__) + +# ── Block identification ────────────────────────────────────────── + +# The 12 blocks in order, with their Hebrew letter marker +BLOCK_ORDER = [ + ("block-alef", "א"), + ("block-bet", "ב"), + ("block-gimel", "ג"), + ("block-dalet", "ד"), + ("block-heh", "ה"), + ("block-vav", "ו"), + ("block-zayin", "ז"), + ("block-chet", "ח"), + ("block-tet", "ט"), + ("block-yod", "י"), + ("block-yod-alef", "יא"), + ("block-yod-bet", "יב"), +] + +# Regex matching a paragraph that begins with a Hebrew block marker +# followed by '.', ')', ' ', or end-of-string. The marker must be followed +# either by whitespace/punctuation or end of text to avoid matching longer +# words that happen to start with these letters. +_BLOCK_MARKERS_BY_LETTER: dict[str, str] = {letter: name for name, letter in BLOCK_ORDER} + +# Longer markers (יא, יב) first so regex matches them before falling back to 'י' +_MARKER_ALTERNATION = "|".join( + re.escape(letter) + for letter in sorted(_BLOCK_MARKERS_BY_LETTER, key=len, reverse=True) +) +_BLOCK_MARKER_RE = re.compile( + rf"^\s*({_MARKER_ALTERNATION})\s*[\.\)\-]\s*" +) + +# Secondary heuristic: Hebrew section headings that reliably mark the +# start of each block in the Daphna Tamir style (used when markers +# "א.", "ב." etc. are missing — common in user-edited Word files). +# +# Key observations from the 12-block schema: +# block-alef: "בפני: דפנה תמיר" or decision number page +# block-bet: "ערר מספר" line +# block-gimel: appellants vs respondents (parties) +# block-dalet: bold "החלטה" centered +# block-heh: "רקע" / "רקע עובדתי" / "פתח דבר" +# block-vav: "תכניות חלות" / "ההליך שבפנינו" / "ההליכים בפני" +# block-zayin: "תמצית טענות" / "טענות הצדדים" +# block-chet: "תגובת המשיבה" / "עמדת הוועדה" +# block-tet: "ההליכים בפני ועדת הערר" / "הדיון בפנינו" +# block-yod: "דיון והכרעה" / "דיון" +# block-yod-alef: "סוף דבר" / "סיכום" +# block-yod-bet: "ההחלטה" (signature / closing block) +_BLOCK_HEADING_PATTERNS: list[tuple[str, list[str]]] = [ + ("block-alef", [r"בפני[:\s]", r"ועדת הערר"]), + ("block-bet", [r"^ערר\s+מספר", r"^ערר\s+\d"]), + ("block-gimel", [r"^נגד\s*$", r"^—\s*נגד\s*—"]), + ("block-dalet", [r"^החלטה\s*$"]), + ("block-heh", [r"^רקע\s*$", r"^רקע\s+עובדתי", r"^פתח\s+דבר"]), + ("block-vav", [r"^תכניות\s+חלות", r"^ההליכים?\s+שבפנינו", r"^ההליכים?\s+בפני\s+הוועדה\s+המקומית"]), + ("block-zayin", [r"^תמצית\s+טענות", r"^טענות\s+הצדדים", r"^טענות\s+העוררי"]), + ("block-chet", [r"^תגובת\s+המשיב", r"^עמדת\s+הוועדה\s+המקומית", r"^תשובת"]), + ("block-tet", [r"^ההליכים?\s+בפני\s+ועדת\s+הערר", r"^הדיון\s+בפנינו"]), + ("block-yod", [r"^דיון\s+והכרעה", r"^דיון\s*$", r"^ההכרעה"]), + ("block-yod-alef", [r"^סוף\s+דבר", r"^סיכום\s*$"]), + ("block-yod-bet", [r"^ההחלטה\s*$", r"^על\s+כן[,\.]?"]), +] + +_COMPILED_HEADING_PATTERNS: list[tuple[str, list[re.Pattern[str]]]] = [ + (name, [re.compile(p) for p in patterns]) + for name, patterns in _BLOCK_HEADING_PATTERNS +] + + +def _paragraph_text(p: etree._Element) -> str: + """Return the full text of a paragraph, joining all w:t nodes.""" + return "".join(p.itertext()).strip() + + +def _detect_block_starts( + paragraphs: list[etree._Element], +) -> dict[str, int]: + """Return a mapping of block_name → paragraph index (start of that block). + + Uses a greedy scan: for each paragraph, if its text starts with an + expected block marker and the block hasn't been assigned yet, assign + this paragraph as the block's start. + """ + found: dict[str, int] = {} + expected_order = [name for name, _ in BLOCK_ORDER] + pointer = 0 # index into expected_order — next expected block + + for i, p in enumerate(paragraphs): + text = _paragraph_text(p) + if not text: + continue + + matched_name: str | None = None + + # Try marker-based (א., ב., ...) first + m = _BLOCK_MARKER_RE.match(text) + if m: + letter = m.group(1) + matched_name = _BLOCK_MARKERS_BY_LETTER.get(letter) + + # Fall back to heading-keyword heuristic (Daphna style) + if matched_name is None: + for name, patterns in _COMPILED_HEADING_PATTERNS: + if name in found: + continue + # Only check patterns for blocks we haven't assigned yet + # AND that come at/after the current pointer — to keep the + # greedy forward-scan semantics consistent with markers. + if expected_order.index(name) < pointer: + continue + if any(pat.search(text) for pat in patterns): + matched_name = name + break + + if matched_name is None: + continue + if matched_name in found: + continue + if pointer >= len(expected_order): + continue + name_idx_in_order = expected_order.index(matched_name) + if name_idx_in_order >= pointer: + found[matched_name] = i + pointer = name_idx_in_order + 1 + return found + + +def _insert_bookmark_around_range( + body: etree._Element, + paragraphs: list[etree._Element], + start_idx: int, + end_idx: int, + name: str, + bm_id: int, +) -> None: + """Insert bookmarkStart at the start of paragraph start_idx and + bookmarkEnd at the end of paragraph end_idx.""" + start_el = etree.Element(_w("bookmarkStart")) + start_el.set(_w("id"), str(bm_id)) + start_el.set(_w("name"), name) + + end_el = etree.Element(_w("bookmarkEnd")) + end_el.set(_w("id"), str(bm_id)) + + start_p = paragraphs[start_idx] + end_p = paragraphs[end_idx] + start_p.insert(0, start_el) + end_p.append(end_el) + + +def _next_bookmark_id(doc_tree: etree._Element) -> int: + """Find max existing bookmark id and return next unused.""" + max_id = 9999 + for el in doc_tree.iterfind(".//w:bookmarkStart", NSMAP): + wid = el.get(_w("id")) + if wid: + try: + max_id = max(max_id, int(wid)) + except ValueError: + pass + return max_id + 1 + + +# ── Public API ──────────────────────────────────────────────────── + + +def retrofit_bookmarks( + docx_path: str | Path, + *, + output_path: str | Path | None = None, + backup: bool = True, +) -> dict: + """Inject block-* bookmarks into an existing DOCX via heuristic detection. + + Args: + docx_path: path to DOCX file (modified in place unless output_path set). + output_path: if given, write to this path instead of overwriting. + backup: if True and writing in place, save the original as + `.pre-retrofit.docx` first. + + Returns: + { + 'bookmarks_added': ['block-alef', ...], + 'missing_blocks': ['block-dalet', ...], + 'existing_bookmarks': [...] # bookmarks already on the doc + } + """ + docx_path = Path(docx_path) + if not docx_path.exists(): + raise FileNotFoundError(str(docx_path)) + + if output_path is None: + output_path = docx_path + output_path = Path(output_path) + + members, doc_tree, settings_tree = _load_docx_xml(docx_path) + + # Existing bookmarks + existing_names: list[str] = [] + for el in doc_tree.iterfind(".//w:bookmarkStart", NSMAP): + name = el.get(_w("name")) + if name: + existing_names.append(name) + + # Collect *top-level* body paragraphs (don't descend into tables etc. + # for now — MVP). The XPath ".//w:p" would include table cells too; + # for retrofitting we only care about the main flow. + body = doc_tree.find(f".//{_w('body')}") + if body is None: + raise ValueError("document has no ") + paragraphs = [p for p in body if p.tag == _w("p")] + + if not paragraphs: + return { + "bookmarks_added": [], + "missing_blocks": [n for n, _ in BLOCK_ORDER], + "existing_bookmarks": existing_names, + } + + block_starts = _detect_block_starts(paragraphs) + + # Calculate end_idx for each block = paragraph before the next block's start, + # or last paragraph if this is the last block found. + ordered_found = sorted(block_starts.items(), key=lambda kv: kv[1]) + ranges: list[tuple[str, int, int]] = [] + for i, (name, start_idx) in enumerate(ordered_found): + if i + 1 < len(ordered_found): + end_idx = ordered_found[i + 1][1] - 1 + else: + end_idx = len(paragraphs) - 1 + ranges.append((name, start_idx, max(start_idx, end_idx))) + + # Backup if overwriting in place + if backup and output_path.resolve() == docx_path.resolve(): + backup_path = docx_path.with_suffix(".pre-retrofit.docx") + shutil.copy2(str(docx_path), str(backup_path)) + + # Inject bookmarks, skipping any that already exist + next_id = _next_bookmark_id(doc_tree) + added: list[str] = [] + for name, s, e in ranges: + if name in existing_names: + continue + _insert_bookmark_around_range(body, paragraphs, s, e, name, next_id) + added.append(name) + next_id += 1 + + _save_docx_xml(members, doc_tree, settings_tree, output_path) + + missing = [n for n, _ in BLOCK_ORDER if n not in block_starts and n not in existing_names] + logger.info("retrofit %s: added=%s missing=%s", + docx_path.name, added, missing) + return { + "bookmarks_added": added, + "missing_blocks": missing, + "existing_bookmarks": existing_names, + } diff --git a/mcp-server/src/legal_mcp/services/docx_reviser.py b/mcp-server/src/legal_mcp/services/docx_reviser.py new file mode 100644 index 0000000..b872580 --- /dev/null +++ b/mcp-server/src/legal_mcp/services/docx_reviser.py @@ -0,0 +1,514 @@ +"""עריכת DOCX עם Track Changes אמיתיים של Word. + +השירות מיועד לקבל DOCX קיים (עם bookmarks שזיהו אנקורים) ולהחיל עליו +עריכות מסומנות כ-w:ins / w:del, שבאים לידי ביטוי ב-Word כ-Track Changes +שהמשתמש יכול Accept/Reject. + +אסטרטגיית אנקורים: bookmarks בשמות כגון 'block-yod', 'block-yod-para-3' +שמוכנסים בזמן הייצוא הראשוני (docx_exporter.py) או רטרואקטיבית +(docx_retrofit.py). +""" + +from __future__ import annotations + +import logging +import shutil +import zipfile +from dataclasses import dataclass, field +from datetime import datetime, timezone +from io import BytesIO +from pathlib import Path +from typing import Literal + +from lxml import etree + +logger = logging.getLogger(__name__) + +# ── XML namespaces ───────────────────────────────────────────────── + +W_NS = "http://schemas.openxmlformats.org/wordprocessingml/2006/main" +NSMAP = {"w": W_NS} + + +def _w(tag: str) -> str: + """Build a fully qualified tag name in the w: namespace.""" + return f"{{{W_NS}}}{tag}" + + +# ── Data models ──────────────────────────────────────────────────── + + +RevisionType = Literal["insert_after", "insert_before", "replace", "delete"] +StyleType = Literal["body", "quote", "heading", "bold"] + + +@dataclass +class Revision: + """A single tracked change to apply to the DOCX.""" + + id: str + type: RevisionType + anchor_bookmark: str + content: str = "" + style: StyleType = "body" + reason: str = "" + anchor_position: Literal["start", "end"] = "end" + + +@dataclass +class RevisionResult: + """Result of applying a single revision.""" + + id: str + status: Literal["applied", "failed"] + error: str | None = None + ins_id: int | None = None + + +@dataclass +class RevisionBatchResult: + """Aggregate result of applying a revision batch.""" + + applied: int = 0 + failed: int = 0 + results: list[RevisionResult] = field(default_factory=list) + output_path: str = "" + + +# ── XML helpers ──────────────────────────────────────────────────── + + +def _load_docx_xml(docx_path: Path) -> tuple[dict[str, bytes], etree._Element, etree._Element]: + """Load a DOCX as a dict of zip members + parsed document/settings trees.""" + members: dict[str, bytes] = {} + with zipfile.ZipFile(docx_path, "r") as zf: + for name in zf.namelist(): + members[name] = zf.read(name) + + if "word/document.xml" not in members: + raise ValueError(f"{docx_path}: missing word/document.xml") + + document_tree = etree.fromstring(members["word/document.xml"]) + settings_bytes = members.get("word/settings.xml") + if settings_bytes: + settings_tree = etree.fromstring(settings_bytes) + else: + settings_tree = etree.Element(_w("settings"), nsmap=NSMAP) + + return members, document_tree, settings_tree + + +def _save_docx_xml( + members: dict[str, bytes], + document_tree: etree._Element, + settings_tree: etree._Element, + output_path: Path, +) -> None: + """Write a DOCX back to disk with updated document/settings XML.""" + members = dict(members) + members["word/document.xml"] = etree.tostring( + document_tree, xml_declaration=True, encoding="UTF-8", standalone=True + ) + members["word/settings.xml"] = etree.tostring( + settings_tree, xml_declaration=True, encoding="UTF-8", standalone=True + ) + + output_path.parent.mkdir(parents=True, exist_ok=True) + buffer = BytesIO() + with zipfile.ZipFile(buffer, "w", zipfile.ZIP_DEFLATED) as zf: + for name, data in members.items(): + zf.writestr(name, data) + output_path.write_bytes(buffer.getvalue()) + + +def _ensure_track_revisions(settings_tree: etree._Element) -> None: + """Ensure is present in settings.xml. + + Note: This enables *display* of track changes — actual w:ins/w:del nodes + are rendered as tracked regardless. Word respects trackRevisions for + recording further user edits too. + """ + existing = settings_tree.find(_w("trackRevisions")) + if existing is None: + el = etree.SubElement(settings_tree, _w("trackRevisions")) + el.set(_w("val"), "true") + + +def _next_revision_id(document_tree: etree._Element) -> int: + """Find max existing w:id on w:ins/w:del/w:bookmarkStart and return next.""" + max_id = 0 + for xpath in ( + ".//w:ins", ".//w:del", ".//w:bookmarkStart", ".//w:bookmarkEnd", + ".//w:commentRangeStart", ".//w:comment", + ): + for el in document_tree.iterfind(xpath, NSMAP): + val = el.get(_w("id")) + if val: + try: + max_id = max(max_id, int(val)) + except ValueError: + pass + return max_id + 1 + + +def _find_bookmark( + document_tree: etree._Element, name: str +) -> tuple[etree._Element | None, etree._Element | None]: + """Find w:bookmarkStart and w:bookmarkEnd elements by bookmark name.""" + start = None + end = None + for el in document_tree.iterfind(".//w:bookmarkStart", NSMAP): + if el.get(_w("name")) == name: + start = el + break + if start is None: + return None, None + bm_id = start.get(_w("id")) + for el in document_tree.iterfind(".//w:bookmarkEnd", NSMAP): + if el.get(_w("id")) == bm_id: + end = el + break + return start, end + + +def _find_enclosing_paragraph(element: etree._Element) -> etree._Element | None: + """Walk up from an element to find its enclosing w:p.""" + cur = element + while cur is not None: + if cur.tag == _w("p"): + return cur + cur = cur.getparent() + return None + + +# ── Paragraph builders ───────────────────────────────────────────── + + +def _build_run(text: str, *, bold: bool = False, italic: bool = False, + font: str = "David", size_half_pt: int | None = None) -> etree._Element: + """Build a w:r (run) element with RTL/David defaults and given text.""" + r = etree.Element(_w("r")) + rPr = etree.SubElement(r, _w("rPr")) + + rFonts = etree.SubElement(rPr, _w("rFonts")) + rFonts.set(_w("ascii"), font) + rFonts.set(_w("hAnsi"), font) + rFonts.set(_w("cs"), font) + rFonts.set(_w("hint"), "cs") + + if size_half_pt is not None: + sz = etree.SubElement(rPr, _w("sz")) + sz.set(_w("val"), str(size_half_pt)) + szCs = etree.SubElement(rPr, _w("szCs")) + szCs.set(_w("val"), str(size_half_pt)) + + if bold: + etree.SubElement(rPr, _w("b")) + etree.SubElement(rPr, _w("bCs")) + if italic: + etree.SubElement(rPr, _w("i")) + etree.SubElement(rPr, _w("iCs")) + + etree.SubElement(rPr, _w("rtl")) + + t = etree.SubElement(r, _w("t")) + t.set("{http://www.w3.org/XML/1998/namespace}space", "preserve") + t.text = text + return r + + +def _build_paragraph(text: str, *, style: StyleType = "body") -> etree._Element: + """Build a w:p (paragraph) with RTL + David + given text.""" + p = etree.Element(_w("p")) + pPr = etree.SubElement(p, _w("pPr")) + bidi = etree.SubElement(pPr, _w("bidi")) + bidi.set(_w("val"), "1") + + # Right alignment for body/RTL + jc = etree.SubElement(pPr, _w("jc")) + jc.set(_w("val"), "right") + + rPr_p = etree.SubElement(pPr, _w("rPr")) + etree.SubElement(rPr_p, _w("rtl")) + + bold = style in ("heading", "bold") + italic = style == "quote" + size = None + if style == "heading": + size = 28 # 14pt + elif style == "quote": + size = 22 # 11pt + run = _build_run(text, bold=bold, italic=italic, size_half_pt=size) + p.append(run) + return p + + +def _wrap_in_ins(elements: list[etree._Element], *, ins_id: int, + author: str, date_iso: str) -> etree._Element: + """Wrap a list of *run-level* elements in a single .""" + ins = etree.Element(_w("ins")) + ins.set(_w("id"), str(ins_id)) + ins.set(_w("author"), author) + ins.set(_w("date"), date_iso) + for el in elements: + ins.append(el) + return ins + + +def _make_tracked_paragraph_insert( + text: str, *, style: StyleType, ins_id: int, author: str, date_iso: str, + mark_id: int | None = None, +) -> etree._Element: + """Build a whole tracked-inserted paragraph. + + DOCX convention for a fully-inserted paragraph: + 1. All runs are wrapped in a single (own id). + 2. The paragraph's pPr/rPr gets an marker for the paragraph + mark itself (pilcrow) — this uses its *own* id. + """ + if mark_id is None: + mark_id = ins_id + p = _build_paragraph(text, style=style) + pPr = p.find(_w("pPr")) + assert pPr is not None + rPr = pPr.find(_w("rPr")) + if rPr is None: + rPr = etree.SubElement(pPr, _w("rPr")) + ins_mark = etree.SubElement(rPr, _w("ins")) + ins_mark.set(_w("id"), str(mark_id)) + ins_mark.set(_w("author"), author) + ins_mark.set(_w("date"), date_iso) + + runs = [child for child in list(p) if child.tag == _w("r")] + if runs: + for r in runs: + p.remove(r) + ins = _wrap_in_ins(runs, ins_id=ins_id, author=author, date_iso=date_iso) + p.append(ins) + return p + + +def _mark_runs_as_deleted(paragraph: etree._Element, *, del_id: int, + author: str, date_iso: str) -> None: + """Convert all in a paragraph to -wrapped runs. + + Within a , must become . + """ + runs = [child for child in list(paragraph) if child.tag == _w("r")] + if not runs: + return + # Convert inside each run + for r in runs: + for t in r.findall(_w("t")): + t.tag = _w("delText") + paragraph.remove(r) + wrapper = etree.Element(_w("del")) + wrapper.set(_w("id"), str(del_id)) + wrapper.set(_w("author"), author) + wrapper.set(_w("date"), date_iso) + for r in runs: + wrapper.append(r) + paragraph.append(wrapper) + + +# ── Revision application ─────────────────────────────────────────── + + +def _apply_insert( + document_tree: etree._Element, + revision: Revision, + *, + ins_id: int, + author: str, + date_iso: str, +) -> RevisionResult: + """Apply insert_after / insert_before relative to a bookmark.""" + start, end = _find_bookmark(document_tree, revision.anchor_bookmark) + if start is None: + return RevisionResult(id=revision.id, status="failed", + error=f"bookmark '{revision.anchor_bookmark}' not found") + + # Pick anchor element based on position + if revision.type == "insert_before": + anchor = start + else: # insert_after — default + anchor = end if end is not None else start + + enclosing_p = _find_enclosing_paragraph(anchor) + if enclosing_p is None: + return RevisionResult(id=revision.id, status="failed", + error="anchor has no enclosing paragraph") + + # Build new tracked paragraph. ins_id for run wrapper, ins_id+1 for mark. + new_p = _make_tracked_paragraph_insert( + revision.content, style=revision.style, + ins_id=ins_id, mark_id=ins_id + 1, + author=author, date_iso=date_iso, + ) + + parent = enclosing_p.getparent() + if parent is None: + return RevisionResult(id=revision.id, status="failed", + error="enclosing paragraph has no parent") + idx = list(parent).index(enclosing_p) + insert_idx = idx if revision.type == "insert_before" else idx + 1 + parent.insert(insert_idx, new_p) + + return RevisionResult(id=revision.id, status="applied", ins_id=ins_id) + + +def _apply_delete( + document_tree: etree._Element, + revision: Revision, + *, + del_id: int, + author: str, + date_iso: str, +) -> RevisionResult: + """Mark the paragraph enclosed by a bookmark as deleted.""" + start, end = _find_bookmark(document_tree, revision.anchor_bookmark) + if start is None: + return RevisionResult(id=revision.id, status="failed", + error=f"bookmark '{revision.anchor_bookmark}' not found") + + enclosing_p = _find_enclosing_paragraph(start) + if enclosing_p is None: + return RevisionResult(id=revision.id, status="failed", + error="anchor has no enclosing paragraph") + + _mark_runs_as_deleted(enclosing_p, del_id=del_id, + author=author, date_iso=date_iso) + return RevisionResult(id=revision.id, status="applied", ins_id=del_id) + + +def _apply_replace( + document_tree: etree._Element, + revision: Revision, + *, + ins_id: int, + del_id: int, + author: str, + date_iso: str, +) -> RevisionResult: + """Replace = delete the existing paragraph + insert new one after it.""" + start, end = _find_bookmark(document_tree, revision.anchor_bookmark) + if start is None: + return RevisionResult(id=revision.id, status="failed", + error=f"bookmark '{revision.anchor_bookmark}' not found") + + enclosing_p = _find_enclosing_paragraph(start) + if enclosing_p is None: + return RevisionResult(id=revision.id, status="failed", + error="anchor has no enclosing paragraph") + + parent = enclosing_p.getparent() + if parent is None: + return RevisionResult(id=revision.id, status="failed", + error="enclosing paragraph has no parent") + + new_p = _make_tracked_paragraph_insert( + revision.content, style=revision.style, + ins_id=ins_id, mark_id=ins_id + 1, + author=author, date_iso=date_iso, + ) + idx = list(parent).index(enclosing_p) + parent.insert(idx + 1, new_p) + + _mark_runs_as_deleted(enclosing_p, del_id=del_id, + author=author, date_iso=date_iso) + return RevisionResult(id=revision.id, status="applied", ins_id=ins_id) + + +# ── Public API ───────────────────────────────────────────────────── + + +def apply_tracked_revisions( + source_path: str | Path, + output_path: str | Path, + revisions: list[Revision], + *, + author: str = "מערכת AI", + date: datetime | None = None, +) -> RevisionBatchResult: + """Apply a batch of tracked revisions to a DOCX, producing a new DOCX. + + The source file is never mutated. Output is a new DOCX with / + markers that Word renders as Track Changes (Accept/Reject). + + Args: + source_path: existing DOCX (e.g. עריכה-v1.docx) — retains user edits. + output_path: where to write the revised DOCX (e.g. טיוטה-v6.docx). + revisions: list of Revision objects. Anchors are bookmark names. + author: displayed as the revision author in Word. + date: revision timestamp (defaults to now, UTC). + + Returns: + RevisionBatchResult with per-revision status. + """ + source_path = Path(source_path) + output_path = Path(output_path) + + if date is None: + date = datetime.now(timezone.utc) + date_iso = date.strftime("%Y-%m-%dT%H:%M:%SZ") + + members, doc_tree, settings_tree = _load_docx_xml(source_path) + _ensure_track_revisions(settings_tree) + + next_id = _next_revision_id(doc_tree) + + batch = RevisionBatchResult() + for rev in revisions: + try: + if rev.type in ("insert_after", "insert_before"): + result = _apply_insert(doc_tree, rev, ins_id=next_id, + author=author, date_iso=date_iso) + # insert consumes 2 IDs: run-wrapper + paragraph-mark + next_id += 2 + elif rev.type == "delete": + result = _apply_delete(doc_tree, rev, del_id=next_id, + author=author, date_iso=date_iso) + next_id += 1 + elif rev.type == "replace": + result = _apply_replace(doc_tree, rev, + ins_id=next_id, del_id=next_id + 2, + author=author, date_iso=date_iso) + # replace consumes 3 IDs: ins-run, ins-mark, del + next_id += 3 + else: + result = RevisionResult(id=rev.id, status="failed", + error=f"unknown type: {rev.type}") + except Exception as e: # pragma: no cover - defensive + logger.exception("revision %s failed", rev.id) + result = RevisionResult(id=rev.id, status="failed", error=str(e)) + + batch.results.append(result) + if result.status == "applied": + batch.applied += 1 + else: + batch.failed += 1 + + _save_docx_xml(members, doc_tree, settings_tree, output_path) + batch.output_path = str(output_path) + logger.info("applied %d revisions (failed %d) → %s", + batch.applied, batch.failed, output_path) + return batch + + +def list_bookmarks(docx_path: str | Path) -> list[str]: + """Return bookmark names present in the DOCX (excluding '_' internal ones).""" + docx_path = Path(docx_path) + members, doc_tree, _ = _load_docx_xml(docx_path) + names: list[str] = [] + for el in doc_tree.iterfind(".//w:bookmarkStart", NSMAP): + name = el.get(_w("name")) + if name and not name.startswith("_"): + names.append(name) + return names + + +def copy_with_revisions( + source_path: str | Path, output_path: str | Path, +) -> None: + """Copy source → output unchanged (used when revisions list is empty).""" + shutil.copy2(str(source_path), str(output_path)) diff --git a/mcp-server/src/legal_mcp/tools/drafting.py b/mcp-server/src/legal_mcp/tools/drafting.py index 75d3304..e70b7a2 100644 --- a/mcp-server/src/legal_mcp/tools/drafting.py +++ b/mcp-server/src/legal_mcp/tools/drafting.py @@ -384,6 +384,9 @@ async def validate_decision(case_number: str) -> str: async def export_docx(case_number: str, output_path: str = "") -> str: """ייצוא החלטה לקובץ DOCX מעוצב — גופן David, RTL, כותרות, מספור סעיפים. + הקובץ נוצר עם bookmarks ב-12 הבלוקים (אנקורים ל-revisions עתידיים), + ומסומן כ-active_draft_path של התיק. + Args: case_number: מספר תיק הערר output_path: נתיב לשמירה (אופציונלי — ברירת מחדל: תיקיית התיק) @@ -398,9 +401,12 @@ async def export_docx(case_number: str, output_path: str = "") -> str: try: path = await docx_exporter.export_decision(case_id, output_path or None) + # Register this export as the new source of truth + await db.set_active_draft_path(case_id, path) return json.dumps({ "status": "completed", "path": path, + "active_draft_path": path, "message": f"DOCX נוצר: {path}", }, ensure_ascii=False, indent=2) except ValueError as e: @@ -410,6 +416,163 @@ async def export_docx(case_number: str, output_path: str = "") -> str: }, ensure_ascii=False, indent=2) +async def apply_user_edit(case_number: str, edit_filename: str) -> str: + """רישום עריכה שהעלה המשתמש כמקור האמת החדש של התיק. + + התהליך: + 1. מאתר את הקובץ `עריכה-v*.docx` בתיקיית ה-exports + 2. מזריק bookmarks רטרואקטיבית (אם אין) דרך docx_retrofit + 3. מעדכן את cases.active_draft_path + + Args: + case_number: מספר תיק הערר + edit_filename: שם הקובץ (למשל "עריכה-v1.docx") או נתיב מלא + """ + from legal_mcp.services import docx_retrofit + + case = await db.get_case_by_number(case_number) + if not case: + return json.dumps({"status": "error", + "message": f"תיק {case_number} לא נמצא."}, + ensure_ascii=False, indent=2) + + case_id = UUID(case["id"]) + export_dir = config.find_case_dir(case_number) / "exports" + edit_path = export_dir / edit_filename if "/" not in edit_filename else Path(edit_filename) + if not edit_path.exists(): + return json.dumps({"status": "error", + "message": f"קובץ לא נמצא: {edit_path}"}, + ensure_ascii=False, indent=2) + + try: + retrofit_result = docx_retrofit.retrofit_bookmarks(edit_path) + await db.set_active_draft_path(case_id, str(edit_path)) + return json.dumps({ + "status": "completed", + "active_draft_path": str(edit_path), + "bookmarks_added": retrofit_result.get("bookmarks_added", []), + "missing_blocks": retrofit_result.get("missing_blocks", []), + "existing_bookmarks": retrofit_result.get("existing_bookmarks", []), + }, ensure_ascii=False, indent=2) + except Exception as e: + return json.dumps({"status": "error", "message": str(e)}, + ensure_ascii=False, indent=2) + + +async def list_bookmarks(case_number: str) -> str: + """רשימת bookmarks הקיימים ב-active_draft של התיק. + + משמש לסוכנים כדי לדעת אילו אנקורים זמינים לפני שליחת revisions. + """ + from legal_mcp.services import docx_reviser + + case = await db.get_case_by_number(case_number) + if not case: + return json.dumps({"status": "error", + "message": f"תיק {case_number} לא נמצא."}, + ensure_ascii=False, indent=2) + + active_path = await db.get_active_draft_path(UUID(case["id"])) + if not active_path or not Path(active_path).exists(): + return json.dumps({"status": "no_active_draft", + "message": "לא נמצא active_draft. הרץ ייצוא או העלה עריכה."}, + ensure_ascii=False, indent=2) + + try: + names = docx_reviser.list_bookmarks(active_path) + return json.dumps({ + "status": "completed", + "active_draft_path": active_path, + "bookmarks": names, + }, ensure_ascii=False, indent=2) + except Exception as e: + return json.dumps({"status": "error", "message": str(e)}, + ensure_ascii=False, indent=2) + + +async def revise_draft(case_number: str, revisions_json: str, + author: str = "מערכת AI") -> str: + """החלת revisions מסומנים כ-Track Changes על ה-active_draft של התיק. + + יוצר קובץ חדש `טיוטה-v{N+1}.docx` (מגרסה הבאה בתור), ומעדכן את + active_draft_path אליו. + + Args: + case_number: מספר תיק הערר + revisions_json: JSON string של array עם אובייקטים: + [{"id": "r1", "type": "insert_after"|"insert_before"|"replace"|"delete", + "anchor_bookmark": "block-yod", "content": "...", "style": "body"|"heading"|"quote", + "reason": "..."}, ...] + author: מחרוזת המחבר שתופיע ב-Track Changes + """ + from legal_mcp.services import docx_reviser + + case = await db.get_case_by_number(case_number) + if not case: + return json.dumps({"status": "error", + "message": f"תיק {case_number} לא נמצא."}, + ensure_ascii=False, indent=2) + + case_id = UUID(case["id"]) + active_path = await db.get_active_draft_path(case_id) + if not active_path or not Path(active_path).exists(): + return json.dumps({"status": "error", + "message": "אין active_draft. הרץ ייצוא או apply_user_edit קודם."}, + ensure_ascii=False, indent=2) + + try: + raw = json.loads(revisions_json) if isinstance(revisions_json, str) else revisions_json + except json.JSONDecodeError as e: + return json.dumps({"status": "error", "message": f"JSON לא תקף: {e}"}, + ensure_ascii=False, indent=2) + + revisions = [] + for item in raw: + revisions.append(docx_reviser.Revision( + id=item.get("id", ""), + type=item["type"], + anchor_bookmark=item["anchor_bookmark"], + content=item.get("content", ""), + style=item.get("style", "body"), + reason=item.get("reason", ""), + anchor_position=item.get("anchor_position", "end"), + )) + + # Determine output path — next טיוטה-v{N}.docx + export_dir = config.find_case_dir(case_number) / "exports" + export_dir.mkdir(parents=True, exist_ok=True) + existing = list(export_dir.glob("טיוטה-v*.docx")) + next_ver = 1 + for p in existing: + try: + ver = int(p.stem.split("-v")[1]) + next_ver = max(next_ver, ver + 1) + except (IndexError, ValueError): + pass + output_path = export_dir / f"טיוטה-v{next_ver}.docx" + + try: + result = docx_reviser.apply_tracked_revisions( + active_path, output_path, revisions, author=author, + ) + await db.set_active_draft_path(case_id, str(output_path)) + return json.dumps({ + "status": "completed", + "output_path": str(output_path), + "version": next_ver, + "applied": result.applied, + "failed": result.failed, + "active_draft_path": str(output_path), + "results": [ + {"id": r.id, "status": r.status, "error": r.error} + for r in result.results + ], + }, ensure_ascii=False, indent=2) + except Exception as e: + return json.dumps({"status": "error", "message": str(e)}, + ensure_ascii=False, indent=2) + + async def get_block_context(case_number: str, block_id: str, instructions: str = "") -> str: """קבלת הקשר מלא לכתיבת בלוק — ללא קריאה ל-API. Claude Code כותב את הבלוק. diff --git a/mcp-server/tests/__init__.py b/mcp-server/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/mcp-server/tests/test_docx_exporter_bookmarks.py b/mcp-server/tests/test_docx_exporter_bookmarks.py new file mode 100644 index 0000000..cd1ed7e --- /dev/null +++ b/mcp-server/tests/test_docx_exporter_bookmarks.py @@ -0,0 +1,103 @@ +"""בדיקות ל-bookmark helpers ב-docx_exporter. + +הבדיקות מתרכזות ב-helper functions בלבד (לא בכל ה-export flow שדורש DB). +""" + +from __future__ import annotations + +import zipfile +from pathlib import Path + +from docx import Document +from lxml import etree + +from legal_mcp.services.docx_exporter import ( + _BOOKMARK_ID_START, + _insert_bookmark_end, + _insert_bookmark_start, + _wrap_block_with_bookmarks, +) +from legal_mcp.services.docx_reviser import NSMAP, _w, list_bookmarks + + +def test_insert_bookmark_helpers_create_valid_xml(tmp_path: Path) -> None: + doc = Document() + p = doc.add_paragraph("תוכן בלוק י") + _insert_bookmark_start(p, "block-yod", 10001) + _insert_bookmark_end(p, 10001) + + out = tmp_path / "out.docx" + doc.save(str(out)) + + # Verify via list_bookmarks (uses the same XML) + assert list_bookmarks(out) == ["block-yod"] + + +def test_wrap_block_with_bookmarks_wraps_multiple_paragraphs(tmp_path: Path) -> None: + doc = Document() + doc.add_paragraph("ראשון — לפני") # noise before + + bm_counter = [_BOOKMARK_ID_START] + + def writer() -> None: + doc.add_paragraph("בלוק — פסקה 1") + doc.add_paragraph("בלוק — פסקה 2") + doc.add_paragraph("בלוק — פסקה 3") + + _wrap_block_with_bookmarks(doc, "block-yod", writer, bm_counter) + doc.add_paragraph("אחרי — אחרון") # noise after + + out = tmp_path / "out.docx" + doc.save(str(out)) + + # The bookmark should wrap exactly the 3 middle paragraphs + with zipfile.ZipFile(out, "r") as zf: + tree = etree.fromstring(zf.read("word/document.xml")) + + paragraphs = tree.findall(".//w:p", NSMAP) + # Find para index of bookmarkStart and bookmarkEnd + start_idx = end_idx = None + for i, p in enumerate(paragraphs): + if p.find(".//w:bookmarkStart", NSMAP) is not None: + start_idx = i + if p.find(".//w:bookmarkEnd", NSMAP) is not None: + end_idx = i + assert start_idx is not None + assert end_idx is not None + # The paragraph containing start must be the first new one ("פסקה 1") + start_text = "".join(paragraphs[start_idx].itertext()) + end_text = "".join(paragraphs[end_idx].itertext()) + assert "פסקה 1" in start_text + assert "פסקה 3" in end_text + + +def test_wrap_block_skipped_when_writer_adds_nothing(tmp_path: Path) -> None: + doc = Document() + bm_counter = [_BOOKMARK_ID_START] + _wrap_block_with_bookmarks(doc, "block-empty", lambda: None, bm_counter) + out = tmp_path / "out.docx" + doc.save(str(out)) + assert list_bookmarks(out) == [] + + +def test_multiple_blocks_get_unique_bookmark_ids(tmp_path: Path) -> None: + doc = Document() + bm_counter = [_BOOKMARK_ID_START] + for name in ("block-alef", "block-bet", "block-gimel"): + _wrap_block_with_bookmarks( + doc, name, + lambda n=name: doc.add_paragraph(f"תוכן של {n}"), + bm_counter, + ) + out = tmp_path / "out.docx" + doc.save(str(out)) + + with zipfile.ZipFile(out, "r") as zf: + tree = etree.fromstring(zf.read("word/document.xml")) + + ids = [el.get(_w("id")) for el in tree.iterfind(".//w:bookmarkStart", NSMAP)] + assert len(ids) == 3 + assert len(set(ids)) == 3 + + names = list_bookmarks(out) + assert set(names) == {"block-alef", "block-bet", "block-gimel"} diff --git a/mcp-server/tests/test_docx_retrofit.py b/mcp-server/tests/test_docx_retrofit.py new file mode 100644 index 0000000..40f30bd --- /dev/null +++ b/mcp-server/tests/test_docx_retrofit.py @@ -0,0 +1,141 @@ +"""בדיקות docx_retrofit — הזרקת bookmarks רטרואקטיבית.""" + +from __future__ import annotations + +from pathlib import Path + +from docx import Document + +from legal_mcp.services.docx_retrofit import ( + BLOCK_ORDER, + retrofit_bookmarks, +) +from legal_mcp.services.docx_reviser import list_bookmarks + + +def _make_docx_with_hebrew_blocks(path: Path, markers: list[str]) -> None: + """Create a DOCX where each paragraph starts with a Hebrew block marker.""" + doc = Document() + for marker in markers: + doc.add_paragraph(f"{marker}. תוכן הבלוק שמתחיל ב-{marker}") + doc.add_paragraph(f"עוד פסקה בבלוק {marker}") + doc.save(str(path)) + + +def test_retrofit_detects_all_standard_blocks(tmp_path: Path) -> None: + src = tmp_path / "src.docx" + _make_docx_with_hebrew_blocks( + src, ["א", "ב", "ג", "ד", "ה", "ו", "ז", "ח", "ט", "י", "יא", "יב"], + ) + + result = retrofit_bookmarks(src, backup=False) + assert len(result["bookmarks_added"]) == 12 + assert result["missing_blocks"] == [] + + names = list_bookmarks(src) + expected = {name for name, _ in BLOCK_ORDER} + assert set(names) == expected + + +def test_retrofit_reports_missing_blocks(tmp_path: Path) -> None: + src = tmp_path / "src.docx" + # Only 4 blocks present + _make_docx_with_hebrew_blocks(src, ["א", "ב", "ג", "ד"]) + + result = retrofit_bookmarks(src, backup=False) + assert result["bookmarks_added"] == [ + "block-alef", "block-bet", "block-gimel", "block-dalet", + ] + assert "block-heh" in result["missing_blocks"] + assert "block-yod-bet" in result["missing_blocks"] + + +def test_retrofit_distinguishes_yod_from_yod_alef_yod_bet(tmp_path: Path) -> None: + """י, יא, יב must all be distinguished — longer markers win.""" + src = tmp_path / "src.docx" + _make_docx_with_hebrew_blocks(src, ["ט", "י", "יא", "יב"]) + + result = retrofit_bookmarks(src, backup=False) + assert set(result["bookmarks_added"]) == { + "block-tet", "block-yod", "block-yod-alef", "block-yod-bet", + } + + +def test_retrofit_skips_existing_bookmarks(tmp_path: Path) -> None: + """Running retrofit twice doesn't duplicate bookmarks.""" + src = tmp_path / "src.docx" + _make_docx_with_hebrew_blocks(src, ["א", "ב"]) + + first = retrofit_bookmarks(src, backup=False) + assert first["bookmarks_added"] == ["block-alef", "block-bet"] + + second = retrofit_bookmarks(src, backup=False) + assert second["bookmarks_added"] == [] # nothing new + assert set(second["existing_bookmarks"]) == {"block-alef", "block-bet"} + + # Final document should still have exactly 2 bookmarks + assert set(list_bookmarks(src)) == {"block-alef", "block-bet"} + + +def test_retrofit_creates_backup(tmp_path: Path) -> None: + src = tmp_path / "file.docx" + _make_docx_with_hebrew_blocks(src, ["א", "ב"]) + retrofit_bookmarks(src) # backup=True (default) + backup = src.with_suffix(".pre-retrofit.docx") + assert backup.exists() + + +def test_retrofit_to_different_output_path_no_backup(tmp_path: Path) -> None: + src = tmp_path / "src.docx" + out = tmp_path / "out.docx" + _make_docx_with_hebrew_blocks(src, ["א", "ב"]) + retrofit_bookmarks(src, output_path=out) + # source untouched + assert list_bookmarks(src) == [] + # output has bookmarks + assert set(list_bookmarks(out)) == {"block-alef", "block-bet"} + + +def test_retrofit_ignores_marker_in_middle_of_text(tmp_path: Path) -> None: + """A lone 'י' inside body text (not at start) should not be detected as block.""" + src = tmp_path / "src.docx" + doc = Document() + doc.add_paragraph("א. תחילת הבלוק") + doc.add_paragraph("טקסט עם האות י לא בתחילת שורה, זה לא בלוק.") + doc.add_paragraph("ב. בלוק שני") + doc.save(str(src)) + + result = retrofit_bookmarks(src, backup=False) + assert "block-alef" in result["bookmarks_added"] + assert "block-bet" in result["bookmarks_added"] + # 'block-yod' should NOT be detected + assert "block-yod" not in result["bookmarks_added"] + + +def test_retrofit_out_of_order_markers_picks_forward_only(tmp_path: Path) -> None: + """If a later-ordered marker appears first, earlier ones are treated as missing. + + Scanner advances forward through BLOCK_ORDER — it won't go back to claim + an earlier marker after already seeing a later one. + """ + src = tmp_path / "src.docx" + doc = Document() + doc.add_paragraph("ב. מופיע ראשון") + doc.add_paragraph("א. מופיע אחרי — יידחה כי 'א' לפני 'ב'") + doc.add_paragraph("ג. בלוק גימל") + doc.save(str(src)) + + result = retrofit_bookmarks(src, backup=False) + assert "block-bet" in result["bookmarks_added"] + assert "block-gimel" in result["bookmarks_added"] + # 'א' was not detected (the first paragraph was 'ב' — scanner advanced past א) + assert "block-alef" in result["missing_blocks"] + + +def test_retrofit_empty_document_reports_all_missing(tmp_path: Path) -> None: + src = tmp_path / "empty.docx" + doc = Document() + doc.save(str(src)) + result = retrofit_bookmarks(src, backup=False) + assert result["bookmarks_added"] == [] + assert len(result["missing_blocks"]) == 12 diff --git a/mcp-server/tests/test_docx_reviser.py b/mcp-server/tests/test_docx_reviser.py new file mode 100644 index 0000000..aa478e6 --- /dev/null +++ b/mcp-server/tests/test_docx_reviser.py @@ -0,0 +1,342 @@ +"""בדיקות docx_reviser — Track Changes XML surgery. + +הבדיקות יוצרות DOCX בסיסי עם bookmarks, מפעילות revisions, ובודקות: +1. שה-XML שנוצר תקף ונטען חזרה כ-Document +2. שה- / קיימים בפורמט הנכון +3. שה-bookmarks נשמרים אחרי עריכה +4. שגופן David ו-RTL נשמרים +5. שכשלונות מטופלים אלגנטית (bookmark חסר → failed, לא crash) +""" + +from __future__ import annotations + +import zipfile +from datetime import datetime, timezone +from io import BytesIO +from pathlib import Path + +import pytest +from docx import Document +from docx.oxml import OxmlElement +from docx.oxml.ns import qn +from lxml import etree + +from legal_mcp.services import docx_reviser +from legal_mcp.services.docx_reviser import ( + NSMAP, + Revision, + _w, + apply_tracked_revisions, + list_bookmarks, +) + + +# ── Test fixtures ────────────────────────────────────────────────── + + +def _insert_bookmark(paragraph, name: str, bm_id: int) -> None: + """Insert a at the start of a paragraph and a + at the end.""" + p_elem = paragraph._p + + start = OxmlElement("w:bookmarkStart") + start.set(qn("w:id"), str(bm_id)) + start.set(qn("w:name"), name) + p_elem.insert(0, start) + + end = OxmlElement("w:bookmarkEnd") + end.set(qn("w:id"), str(bm_id)) + p_elem.append(end) + + +def _make_sample_docx(path: Path) -> None: + """Create a simple DOCX with 3 paragraphs, each with a bookmark.""" + doc = Document() + for idx, name in enumerate(("block-alef", "block-yod", "block-yod-bet")): + p = doc.add_paragraph() + run = p.add_run(f"תוכן פסקה של {name}") + run.font.name = "David" + _insert_bookmark(p, name, idx + 1) + doc.save(str(path)) + + +@pytest.fixture +def sample_docx(tmp_path: Path) -> Path: + path = tmp_path / "source.docx" + _make_sample_docx(path) + return path + + +# ── list_bookmarks ──────────────────────────────────────────────── + + +def test_list_bookmarks_returns_all_named(sample_docx: Path) -> None: + names = list_bookmarks(sample_docx) + assert set(names) == {"block-alef", "block-yod", "block-yod-bet"} + + +def test_list_bookmarks_excludes_internal(tmp_path: Path) -> None: + """Bookmarks starting with '_' (like _GoBack) should be filtered out.""" + path = tmp_path / "internal.docx" + doc = Document() + p1 = doc.add_paragraph("visible") + _insert_bookmark(p1, "block-real", 1) + p2 = doc.add_paragraph("hidden") + _insert_bookmark(p2, "_GoBack", 2) + doc.save(str(path)) + + names = list_bookmarks(path) + assert names == ["block-real"] + + +# ── apply_tracked_revisions: insert_after ───────────────────────── + + +def test_insert_after_adds_tracked_paragraph(sample_docx: Path, tmp_path: Path) -> None: + out = tmp_path / "out.docx" + rev = Revision( + id="r1", + type="insert_after", + anchor_bookmark="block-yod", + content="פסקה חדשה שהמערכת מוסיפה.", + ) + result = apply_tracked_revisions( + sample_docx, out, [rev], + author="מערכת AI", + date=datetime(2026, 4, 16, 14, 0, tzinfo=timezone.utc), + ) + assert result.applied == 1 + assert result.failed == 0 + assert out.exists() + + # Verify present in document.xml + with zipfile.ZipFile(out, "r") as zf: + doc_xml = zf.read("word/document.xml") + tree = etree.fromstring(doc_xml) + ins_elements = tree.findall(".//w:ins", NSMAP) + assert len(ins_elements) >= 1 + # Verify the content is there + all_text = "".join(tree.itertext()) + assert "פסקה חדשה שהמערכת מוסיפה." in all_text + # Verify original content preserved + assert "תוכן פסקה של block-yod" in all_text + + +def _find_ins_with_runs(tree: etree._Element) -> etree._Element | None: + """Pick the that actually wraps runs (not the pilcrow-marker one).""" + for ins in tree.iterfind(".//w:ins", NSMAP): + if ins.find(".//w:r", NSMAP) is not None: + return ins + return None + + +def test_insert_after_ins_has_author_and_date(sample_docx: Path, tmp_path: Path) -> None: + out = tmp_path / "out.docx" + rev = Revision(id="r1", type="insert_after", + anchor_bookmark="block-alef", content="test") + apply_tracked_revisions(sample_docx, out, [rev], author="דפנה") + + with zipfile.ZipFile(out, "r") as zf: + doc_xml = zf.read("word/document.xml") + tree = etree.fromstring(doc_xml) + ins = _find_ins_with_runs(tree) + assert ins is not None + assert ins.get(_w("author")) == "דפנה" + date_str = ins.get(_w("date")) + assert date_str is not None + assert date_str.endswith("Z") # ISO 8601 UTC + + +def test_insert_after_uses_rtl_and_david(sample_docx: Path, tmp_path: Path) -> None: + out = tmp_path / "out.docx" + rev = Revision(id="r1", type="insert_after", + anchor_bookmark="block-alef", content="מוסף") + apply_tracked_revisions(sample_docx, out, [rev]) + + with zipfile.ZipFile(out, "r") as zf: + tree = etree.fromstring(zf.read("word/document.xml")) + + ins = _find_ins_with_runs(tree) + assert ins is not None + run = ins.find(".//w:r", NSMAP) + assert run is not None + rPr = run.find(_w("rPr")) + assert rPr is not None + assert rPr.find(_w("rtl")) is not None + rFonts = rPr.find(_w("rFonts")) + assert rFonts is not None + assert rFonts.get(_w("ascii")) == "David" + + +# ── apply_tracked_revisions: insert_before ──────────────────────── + + +def test_insert_before_places_above_anchor(sample_docx: Path, tmp_path: Path) -> None: + out = tmp_path / "out.docx" + rev = Revision(id="r1", type="insert_before", + anchor_bookmark="block-yod", content="לפני י.") + result = apply_tracked_revisions(sample_docx, out, [rev]) + assert result.applied == 1 + + # Order check: new paragraph's text must appear before "block-yod" + with zipfile.ZipFile(out, "r") as zf: + tree = etree.fromstring(zf.read("word/document.xml")) + paragraphs = tree.findall(".//w:p", NSMAP) + texts = ["".join(p.itertext()) for p in paragraphs] + idx_new = next(i for i, t in enumerate(texts) if "לפני י." in t) + idx_yod = next(i for i, t in enumerate(texts) if "תוכן פסקה של block-yod" in t) + assert idx_new < idx_yod + + +# ── apply_tracked_revisions: delete ─────────────────────────────── + + +def test_delete_wraps_runs_in_w_del(sample_docx: Path, tmp_path: Path) -> None: + out = tmp_path / "out.docx" + rev = Revision(id="r1", type="delete", anchor_bookmark="block-yod", content="") + result = apply_tracked_revisions(sample_docx, out, [rev]) + assert result.applied == 1 + + with zipfile.ZipFile(out, "r") as zf: + tree = etree.fromstring(zf.read("word/document.xml")) + dels = tree.findall(".//w:del", NSMAP) + assert len(dels) >= 1 + # Inside w:del, text elements must become w:delText + del_texts = dels[0].findall(".//w:delText", NSMAP) + assert any("block-yod" in (t.text or "") for t in del_texts) + + +# ── apply_tracked_revisions: replace ───────────────────────────── + + +def test_replace_creates_both_ins_and_del(sample_docx: Path, tmp_path: Path) -> None: + out = tmp_path / "out.docx" + rev = Revision(id="r1", type="replace", + anchor_bookmark="block-yod", content="תוכן חדש לחלוטין") + result = apply_tracked_revisions(sample_docx, out, [rev]) + assert result.applied == 1 + + with zipfile.ZipFile(out, "r") as zf: + tree = etree.fromstring(zf.read("word/document.xml")) + assert len(tree.findall(".//w:ins", NSMAP)) >= 1 + assert len(tree.findall(".//w:del", NSMAP)) >= 1 + + +# ── Failure modes ───────────────────────────────────────────────── + + +def test_missing_bookmark_returns_failed_not_crash( + sample_docx: Path, tmp_path: Path, +) -> None: + out = tmp_path / "out.docx" + rev = Revision(id="r1", type="insert_after", + anchor_bookmark="does-not-exist", content="x") + result = apply_tracked_revisions(sample_docx, out, [rev]) + assert result.applied == 0 + assert result.failed == 1 + assert result.results[0].status == "failed" + assert "not found" in (result.results[0].error or "") + # Output file still produced (unchanged copy) + assert out.exists() + + +def test_empty_revisions_list_produces_copy(sample_docx: Path, tmp_path: Path) -> None: + out = tmp_path / "out.docx" + result = apply_tracked_revisions(sample_docx, out, []) + assert result.applied == 0 + assert result.failed == 0 + assert out.exists() + # bookmarks should still be there + assert set(list_bookmarks(out)) == {"block-alef", "block-yod", "block-yod-bet"} + + +# ── Track revisions flag in settings ────────────────────────────── + + +def test_track_revisions_flag_is_enabled(sample_docx: Path, tmp_path: Path) -> None: + out = tmp_path / "out.docx" + rev = Revision(id="r1", type="insert_after", + anchor_bookmark="block-alef", content="x") + apply_tracked_revisions(sample_docx, out, [rev]) + + with zipfile.ZipFile(out, "r") as zf: + settings_xml = zf.read("word/settings.xml") + settings_tree = etree.fromstring(settings_xml) + tr = settings_tree.find(_w("trackRevisions")) + assert tr is not None + + +# ── Multiple revisions with unique IDs ──────────────────────────── + + +def test_multiple_revisions_get_unique_ids(sample_docx: Path, tmp_path: Path) -> None: + out = tmp_path / "out.docx" + revs = [ + Revision(id="r1", type="insert_after", + anchor_bookmark="block-alef", content="ראשון"), + Revision(id="r2", type="insert_after", + anchor_bookmark="block-yod", content="שני"), + Revision(id="r3", type="delete", anchor_bookmark="block-yod-bet"), + ] + result = apply_tracked_revisions(sample_docx, out, revs) + assert result.applied == 3 + + with zipfile.ZipFile(out, "r") as zf: + tree = etree.fromstring(zf.read("word/document.xml")) + all_ids: list[str] = [] + for xpath in (".//w:ins", ".//w:del"): + for el in tree.iterfind(xpath, NSMAP): + wid = el.get(_w("id")) + if wid: + all_ids.append(wid) + assert len(all_ids) == len(set(all_ids)), f"duplicate IDs: {all_ids}" + + +# ── DOCX remains openable as Document ───────────────────────────── + + +def test_output_docx_is_openable_by_python_docx( + sample_docx: Path, tmp_path: Path, +) -> None: + out = tmp_path / "out.docx" + rev = Revision(id="r1", type="insert_after", + anchor_bookmark="block-yod", content="תוכן חדש") + apply_tracked_revisions(sample_docx, out, [rev]) + # Must be openable as a valid DOCX by python-docx (no exceptions) + doc = Document(str(out)) + # Original text is still accessible via python-docx + all_text = "\n".join(p.text for p in doc.paragraphs) + assert "block-yod" in all_text + + # Inserted (tracked) text is present in the raw XML via itertext + with zipfile.ZipFile(out, "r") as zf: + tree = etree.fromstring(zf.read("word/document.xml")) + raw_text = "".join(tree.itertext()) + assert "תוכן חדש" in raw_text + + +# ── Bookmarks preserved through revisions ───────────────────────── + + +def test_bookmarks_preserved_after_insert(sample_docx: Path, tmp_path: Path) -> None: + out = tmp_path / "out.docx" + rev = Revision(id="r1", type="insert_after", + anchor_bookmark="block-yod", content="x") + apply_tracked_revisions(sample_docx, out, [rev]) + names = list_bookmarks(out) + assert set(names) == {"block-alef", "block-yod", "block-yod-bet"} + + +# ── Idempotency of loading/saving without changes ──────────────── + + +def test_save_without_revisions_preserves_content( + sample_docx: Path, tmp_path: Path, +) -> None: + out = tmp_path / "out.docx" + apply_tracked_revisions(sample_docx, out, []) + doc_orig = Document(str(sample_docx)) + doc_new = Document(str(out)) + orig_text = [p.text for p in doc_orig.paragraphs] + new_text = [p.text for p in doc_new.paragraphs] + assert orig_text == new_text diff --git a/mcp-server/tests/test_track_changes_e2e.py b/mcp-server/tests/test_track_changes_e2e.py new file mode 100644 index 0000000..6f5dff2 --- /dev/null +++ b/mcp-server/tests/test_track_changes_e2e.py @@ -0,0 +1,237 @@ +"""בדיקות end-to-end לזרימה המלאה: exporter → retrofit → reviser. + +הבדיקות האלה מחברות את כל השכבות של ארכיטקטורת Track Changes ומוודאות +שהזרימה עובדת על מסמכים שנוצרו על-ידי ה-exporter עצמו (בלוקים עם bookmarks +מובנים) ועל מסמכים רגילים שעברו retrofit. +""" + +from __future__ import annotations + +import zipfile +from datetime import datetime, timezone +from pathlib import Path + +import pytest +from docx import Document +from docx.oxml import OxmlElement +from docx.oxml.ns import qn +from lxml import etree + +from legal_mcp.services import docx_retrofit, docx_reviser +from legal_mcp.services.docx_exporter import ( + _BOOKMARK_ID_START, + _wrap_block_with_bookmarks, +) +from legal_mcp.services.docx_reviser import ( + NSMAP, + Revision, + _w, + apply_tracked_revisions, + list_bookmarks, +) + + +# ── Helpers ──────────────────────────────────────────────────────── + + +def _make_exporter_style_docx(path: Path) -> None: + """Simulate what docx_exporter produces: paragraphs wrapped in bookmarks + for each of the 12 blocks, with David font and RTL.""" + doc = Document() + bm_counter = [_BOOKMARK_ID_START] + + blocks = [ + ("block-alef", "בפני: דפנה תמיר, יו\"ר ועדת הערר"), + ("block-bet", "ערר מספר 1033-25"), + ("block-heh", "רקע\nהנכס מצוי ברחוב הר בשן"), + ("block-yod", "דיון והכרעה\nלאחר שבחנו את טענות הצדדים"), + ("block-yod-bet", "ההחלטה\nהערר מתקבל בחלקו"), + ] + + for name, content in blocks: + def writer(c=content): + for line in c.split("\n"): + if line.strip(): + doc.add_paragraph(line.strip()) + _wrap_block_with_bookmarks(doc, name, writer, bm_counter) + + doc.save(str(path)) + + +def _make_user_edited_docx(path: Path) -> None: + """Simulate what a user produces by editing in Word: no bookmarks, + heading-style paragraphs in Daphna style.""" + doc = Document() + for text in [ + "בפני: דפנה תמיר, יו\"ר ועדת הערר מחוז ירושלים", + "ערר מספר 9999-25", + "רקע", + "הנכס מצוי ברחוב שמואל הנגיד 10, ירושלים", + "תמצית טענות הצדדים", + "העוררים טוענים שהבנייה חורגת מהתכנית", + "תגובת המשיבה", + "הוועדה המקומית טוענת שהבקשה תואמת", + "ההליכים בפני ועדת הערר", + "קיימנו דיון בנוכחות הצדדים", + "דיון והכרעה", + "לאחר שבחנו את טענות הצדדים בחון מעמיק", + "סוף דבר", + "הערר נדחה", + ]: + doc.add_paragraph(text) + doc.save(str(path)) + + +# ── Exporter-style (built-in bookmarks) ────────────────────────── + + +def test_exporter_output_works_with_reviser(tmp_path: Path) -> None: + src = tmp_path / "exported.docx" + _make_exporter_style_docx(src) + + # All 5 bookmarks should be present directly from "export" + bookmarks = list_bookmarks(src) + assert set(bookmarks) >= {"block-alef", "block-bet", "block-heh", + "block-yod", "block-yod-bet"} + + out = tmp_path / "revised.docx" + revs = [ + Revision(id="r1", type="insert_after", anchor_bookmark="block-yod", + content="תוספת מערכת: פסק הלכה חדש", style="body"), + ] + result = apply_tracked_revisions(src, out, revs) + assert result.applied == 1 + + with zipfile.ZipFile(out, "r") as zf: + tree = etree.fromstring(zf.read("word/document.xml")) + raw_text = "".join(tree.itertext()) + assert "תוספת מערכת" in raw_text + # The revision is tracked (inside ) + ins_list = tree.findall(".//w:ins", NSMAP) + assert any("תוספת מערכת" in "".join(el.itertext()) for el in ins_list) + + +# ── User-edited DOCX (no bookmarks) — needs retrofit first ────── + + +def test_retrofit_then_revise_on_user_edit(tmp_path: Path) -> None: + user_file = tmp_path / "user_edit.docx" + _make_user_edited_docx(user_file) + + # Initially no named bookmarks + assert list_bookmarks(user_file) == [] + + # Retrofit — should detect blocks via heading heuristic + result = docx_retrofit.retrofit_bookmarks(user_file, backup=False) + added = set(result["bookmarks_added"]) + # Must include at least block-yod (for common "insert pasak halacha" task) + assert "block-yod" in added + # Plus block-heh (רקע) and block-zayin (תמצית טענות) + assert "block-heh" in added + assert "block-zayin" in added + + # Now apply a revision on the retrofitted file + out = tmp_path / "revised.docx" + revs = [Revision(id="r1", type="insert_after", + anchor_bookmark="block-yod", + content="פסק הלכה שהוסף: בבג\"ץ 1/23 נקבע כי...", + style="body")] + rr = apply_tracked_revisions(user_file, out, revs) + assert rr.applied == 1 + + # Verify output has the insertion inside + with zipfile.ZipFile(out, "r") as zf: + tree = etree.fromstring(zf.read("word/document.xml")) + ins_texts = ["".join(el.itertext()) for el in tree.iterfind(".//w:ins", NSMAP)] + assert any("פסק הלכה שהוסף" in t for t in ins_texts) + + +def test_retrofit_preserves_original_paragraphs(tmp_path: Path) -> None: + user_file = tmp_path / "user.docx" + _make_user_edited_docx(user_file) + + before_doc = Document(str(user_file)) + before_texts = [p.text for p in before_doc.paragraphs] + + docx_retrofit.retrofit_bookmarks(user_file, backup=False) + + after_doc = Document(str(user_file)) + after_texts = [p.text for p in after_doc.paragraphs] + # Paragraph texts should be identical (we only added bookmark markers) + assert before_texts == after_texts + + +def test_idempotent_retrofit_and_revise(tmp_path: Path) -> None: + """Running retrofit twice + revising should still produce valid output.""" + user_file = tmp_path / "user.docx" + _make_user_edited_docx(user_file) + + # First retrofit + r1 = docx_retrofit.retrofit_bookmarks(user_file, backup=False) + # Second retrofit — should add no new bookmarks + r2 = docx_retrofit.retrofit_bookmarks(user_file, backup=False) + assert r2["bookmarks_added"] == [] + assert set(r2["existing_bookmarks"]) >= set(r1["bookmarks_added"]) + + # Then revise works normally + out = tmp_path / "revised.docx" + revs = [Revision(id="r1", type="insert_after", + anchor_bookmark="block-yod", content="x")] + result = apply_tracked_revisions(user_file, out, revs) + assert result.applied == 1 + + +def test_multiple_revisions_all_tracked_independently(tmp_path: Path) -> None: + """Verify multiple tracked changes each get independent ins ids so + user can Accept/Reject each one separately in Word.""" + user_file = tmp_path / "user.docx" + _make_user_edited_docx(user_file) + docx_retrofit.retrofit_bookmarks(user_file, backup=False) + + out = tmp_path / "revised.docx" + revs = [ + Revision(id="r1", type="insert_after", + anchor_bookmark="block-heh", content="תוספת 1"), + Revision(id="r2", type="insert_after", + anchor_bookmark="block-yod", content="תוספת 2"), + Revision(id="r3", type="insert_before", + anchor_bookmark="block-yod-alef", content="תוספת 3"), + ] + result = apply_tracked_revisions(user_file, out, revs) + assert result.applied == 3 + + with zipfile.ZipFile(out, "r") as zf: + tree = etree.fromstring(zf.read("word/document.xml")) + ins_ids = {el.get(_w("id")) for el in tree.iterfind(".//w:ins", NSMAP)} + assert len(ins_ids) >= 3 # at least one unique id per revision + + +def test_rtl_preserved_in_tracked_insertion(tmp_path: Path) -> None: + """Inserted paragraph must have bidi + rtl + David font so it renders + correctly in Word alongside the user's content.""" + user_file = tmp_path / "user.docx" + _make_user_edited_docx(user_file) + docx_retrofit.retrofit_bookmarks(user_file, backup=False) + + out = tmp_path / "out.docx" + revs = [Revision(id="r1", type="insert_after", + anchor_bookmark="block-yod", content="עברית RTL")] + apply_tracked_revisions(user_file, out, revs) + + with zipfile.ZipFile(out, "r") as zf: + tree = etree.fromstring(zf.read("word/document.xml")) + + # Find the ins that holds runs + for ins in tree.iterfind(".//w:ins", NSMAP): + runs = ins.findall(".//w:r", NSMAP) + for r in runs: + text_els = r.findall(".//w:t", NSMAP) + if any("עברית RTL" in (t.text or "") for t in text_els): + rPr = r.find(_w("rPr")) + assert rPr is not None + assert rPr.find(_w("rtl")) is not None + rFonts = rPr.find(_w("rFonts")) + assert rFonts is not None + assert rFonts.get(_w("ascii")) == "David" + return + pytest.fail("tracked insertion with 'עברית RTL' not found") diff --git a/scripts/SCRIPTS.md b/scripts/SCRIPTS.md index 185efa9..629ca13 100644 --- a/scripts/SCRIPTS.md +++ b/scripts/SCRIPTS.md @@ -13,6 +13,9 @@ | `restore-db.sh` | bash | שחזור DB מגיבוי (companion ל-backup-db.sh) | ידני | | `notify.py` | python | שליחת מייל התראה מסוכנים via SMTP (Gmail) | נקרא ע"י סוכנים | | `bidi_table.py` | python | יצירת טבלאות box-drawing עם תמיכה ב-BiDi (עברית+אנגלית) | ספריית עזר | +| `convert_decision_template.py` | python | המרת `data/training/טיוטת החלטה.dotx` → `skills/docx/decision_template.docx` לטעינה ב-python-docx | להריץ כשמתעדכנת התבנית | +| `deploy-track-changes.sh` | bash | סנכרון skills CMP↔CMPA + בדיקות + הנחיות deploy לארכיטקטורת Track Changes | ידני | +| `retrofit_case.py` | python | retrofit רטרואקטיבי — מזריק bookmarks לקובץ קיים של תיק ספציפי ומגדיר אותו כ-active_draft | ידני (חד-פעמי לתיק) | ## תיקיית `.archive/` — סקריפטים שהושלמו diff --git a/scripts/deploy-track-changes.sh b/scripts/deploy-track-changes.sh new file mode 100755 index 0000000..237f5d9 --- /dev/null +++ b/scripts/deploy-track-changes.sh @@ -0,0 +1,86 @@ +#!/bin/bash +# deploy-track-changes.sh — פריסת ארכיטקטורת Track Changes לשתי חברות (CMP + CMPA) +# +# מה זה עושה: +# 1. מוודא ש-skills קיימים ומסונכרנים בשתי החברות +# 2. git commit + push (אם יש שינויים) +# 3. הודעה להפעלת Coolify deploy +# +# שימוש: +# scripts/deploy-track-changes.sh + +set -euo pipefail + +REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)" +CMP_DIR="/home/chaim/.paperclip/instances/default/skills/42a7acd0-30c5-4cbd-ac97-7424f65df294" +CMPA_DIR="/home/chaim/.paperclip/instances/default/skills/8639e837-4c9d-47fa-a76b-95788d651896" +COOLIFY_UUID="gyjo0mtw2c42ej3xxvbz8zio" + +echo "▶ שלב 1: סנכרון skills בין CMP ל-CMPA" + +SKILLS=(legal-docx attach-precedents review-analysis writer-readiness + appendix-expert-intern bidi-table-rtl revise-draft) + +mkdir -p "$CMPA_DIR" +for skill in "${SKILLS[@]}"; do + if [ ! -d "$CMP_DIR/$skill" ]; then + echo " ⚠ skill לא קיים ב-CMP: $skill — דילוג" + continue + fi + if [ -d "$CMPA_DIR/$skill" ]; then + # Update only — don't delete any CMPA-specific files + rsync -av --update "$CMP_DIR/$skill/" "$CMPA_DIR/$skill/" > /dev/null + echo " ✓ $skill (עודכן ב-CMPA)" + else + cp -r "$CMP_DIR/$skill" "$CMPA_DIR/$skill" + echo " ✓ $skill (הועתק ל-CMPA)" + fi +done + +echo "" +echo "▶ שלב 2: בדיקת פיתוח אחרונה" + +cd "$REPO_ROOT" + +# Run mcp-server tests +if [ -f mcp-server/.venv/bin/pytest ]; then + echo " מריץ pytest..." + (cd mcp-server && .venv/bin/pytest tests/ -q 2>&1 | tail -5) || { + echo " ✗ בדיקות נכשלו — עצירה" + exit 1 + } + echo " ✓ כל הבדיקות עברו" +fi + +# Run TypeScript check +if [ -d web-ui/node_modules ]; then + echo " מריץ tsc..." + (cd web-ui && npx tsc --noEmit 2>&1 | head -10) || { + echo " ✗ שגיאות TypeScript — עצירה" + exit 1 + } + echo " ✓ TypeScript נקי" +fi + +echo "" +echo "▶ שלב 3: סטטוס git" + +if [ -n "$(git status --porcelain)" ]; then + echo " יש שינויים ב-git — לא מבצע commit אוטומטי (ריצו ידנית)" + git status --short + echo "" + echo " הפקודה להרצה:" + echo " git add -A" + echo " git commit -m \"Add Track Changes support for draft revisions (CMP + CMPA)\"" + echo " git push origin main" +else + echo " ✓ אין שינויים לא שמורים" +fi + +echo "" +echo "▶ שלב 4: Coolify deploy" +echo " לאחר push, הריצו:" +echo " mcp__coolify__deploy עם UUID=$COOLIFY_UUID" +echo " או דרך UI: https://coolify.nautilus.marcusgroup.org" +echo "" +echo "✓ הסקריפט הסתיים" diff --git a/scripts/retrofit_case.py b/scripts/retrofit_case.py new file mode 100755 index 0000000..426e712 --- /dev/null +++ b/scripts/retrofit_case.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python3 +"""retrofit_case.py — הזרקת bookmarks רטרואקטיבית לקובץ קיים בתיק. + +שימוש: + python scripts/retrofit_case.py + +דוגמה: + python scripts/retrofit_case.py 1033-25 עריכה-v1.docx + +פעולה: + 1. מזהה את הקובץ ב-data/cases/{case_number}/exports/ + 2. מזריק bookmarks ב-12 הבלוקים (heuristic) + 3. שומר backup כ-{filename}.pre-retrofit.docx + 4. מדפיס summary — אילו בלוקים זוהו, אילו חסרים + +לתיק 1033-25 — הריצו פעם אחת על עריכה-v1.docx הקיים. אחרי זה תוכלו +להריץ revise_draft דרך ה-CEO. + +הערה: השירות הזה נקרא גם אוטומטית דרך apply_user_edit tool ב-MCP, +אז אחרי deploy אין צורך להריץ ידנית. זה לגיבוי/ניפוי. +""" + +from __future__ import annotations + +import json +import sys +from pathlib import Path + +# Make mcp-server importable when run from repo root +REPO_ROOT = Path(__file__).resolve().parent.parent +sys.path.insert(0, str(REPO_ROOT / "mcp-server" / "src")) + + +def main() -> int: + if len(sys.argv) != 3: + print(__doc__) + return 2 + + case_number = sys.argv[1] + filename = sys.argv[2] + + from legal_mcp.services import docx_retrofit, docx_reviser + + case_dir = REPO_ROOT / "data" / "cases" / case_number / "exports" + file_path = case_dir / filename + + if not file_path.exists(): + print(f"✗ קובץ לא נמצא: {file_path}", file=sys.stderr) + return 1 + + print(f"מעבד: {file_path}") + print(f" גודל: {file_path.stat().st_size:,} בייט") + + # Existing bookmarks + before = docx_reviser.list_bookmarks(file_path) + print(f" bookmarks קיימים: {before or '(ריק)'}") + + result = docx_retrofit.retrofit_bookmarks(file_path) + print() + print("תוצאה:") + print(json.dumps(result, ensure_ascii=False, indent=2)) + + # Verify post-state + after = docx_reviser.list_bookmarks(file_path) + print() + print(f"bookmarks אחרי: {len(after)} — {after}") + + backup = file_path.with_suffix(".pre-retrofit.docx") + if backup.exists(): + print(f"גיבוי נשמר: {backup}") + + # Build an MCP-callable invocation hint + rel = file_path.relative_to(REPO_ROOT) + print() + print("השלב הבא: לעדכן active_draft_path ב-DB. הפקודה:") + print(f' mcp__legal-ai__apply_user_edit case_number="{case_number}" ' + f'edit_filename="{filename}"') + print() + print(f"(זה ירוץ retrofit שוב idempotent ואז יעדכן את DB)") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/web-ui/src/components/cases/drafts-panel.tsx b/web-ui/src/components/cases/drafts-panel.tsx index a99465a..5433dae 100644 --- a/web-ui/src/components/cases/drafts-panel.tsx +++ b/web-ui/src/components/cases/drafts-panel.tsx @@ -18,6 +18,7 @@ import { useUploadDraft, useMarkFinal, useDeleteDraft, + useActiveDraft, } from "@/lib/api/exports"; import { useCaseFeedback, @@ -78,6 +79,7 @@ export function DraftsPanel({ const { data: exports, isLoading: exportsLoading } = useExports(caseNumber); const { data: feedbacks, isLoading: feedbackLoading } = useCaseFeedback(caseNumber); + const { data: activeDraft } = useActiveDraft(caseNumber); const exportDocx = useExportDocx(caseNumber); const uploadDraft = useUploadDraft(caseNumber); const markFinal = useMarkFinal(caseNumber); @@ -90,25 +92,44 @@ export function DraftsPanel({ const isDraftReady = status && DRAFT_READY.includes(status); const openFeedbacks = feedbacks?.filter((f) => !f.resolved) ?? []; - // Determine draft label based on exports — revised if there are עריכה files or multiple טיוטה versions + // Determine draft label based on *actual* v-numbers in filenames (not counts). + // "(מתוקנת)" suffix appears when there's at least one עריכה-* file. const draftLabel = (() => { if (!exports?.length) return "טיוטה מוכנה לעיון"; - const revisions = exports.filter((f) => f.filename.startsWith("עריכה-")); const drafts = exports.filter((f) => f.filename.startsWith("טיוטה-")); - if (revisions.length > 0) { - const ver = revisions.length + 1; - return `טיוטה ${ver} (מתוקנת) מוכנה לעיון`; - } - if (drafts.length > 1) { - return `טיוטה ${drafts.length} מוכנה לעיון`; - } - return "טיוטה ראשונה מוכנה לעיון"; + const revisions = exports.filter((f) => f.filename.startsWith("עריכה-")); + if (!drafts.length) return "טיוטה מוכנה לעיון"; + const versions = drafts + .map((f) => { + const m = f.filename.match(/v(\d+)/); + return m ? parseInt(m[1], 10) : 0; + }) + .filter((n) => n > 0); + const maxVer = versions.length ? Math.max(...versions) : drafts.length; + const suffix = revisions.length > 0 ? " (מתוקנת)" : ""; + return `טיוטה v${maxVer}${suffix} מוכנה לעיון`; })(); function handleUpload(file: File) { uploadDraft.mutate(file, { - onSuccess: (data) => - toast.success(`הועלה: ${data.filename}`), + onSuccess: (data) => { + const added = data.bookmarks_added?.length ?? 0; + const missing = data.missing_blocks?.length ?? 0; + if (data.apply_status === "completed" || data.apply_status === "ok") { + if (added > 0) { + toast.success(`הועלה: ${data.filename} — זוהו ${added} בלוקים`); + } else { + toast.success(`הועלה: ${data.filename}`); + } + if (missing > 0) { + toast.warning( + `שימו לב: ${missing} בלוקים לא זוהו — ייתכנו בעיות בתיקונים עתידיים`, + ); + } + } else { + toast.error(`הועלה אך השילוב נכשל: ${data.apply_status ?? "שגיאה"}`); + } + }, onError: (err) => toast.error(err instanceof Error ? err.message : "שגיאה בהעלאה"), }); @@ -164,6 +185,16 @@ export function DraftsPanel({ )} + {/* ── Active-draft badge — the DOCX that is the current source of truth ── */} + {activeDraft?.filename && ( +
+ מקור האמת: + + {activeDraft.filename} + +
+ )} + {/* ── Exports list ── */}
diff --git a/web-ui/src/lib/api/exports.ts b/web-ui/src/lib/api/exports.ts index 223e725..a941453 100644 --- a/web-ui/src/lib/api/exports.ts +++ b/web-ui/src/lib/api/exports.ts @@ -13,10 +13,48 @@ export type ExportFile = { is_final: boolean; }; +export type ActiveDraft = { + active_draft_path: string | null; + filename: string | null; + exists: boolean; +}; + +export type Revision = { + id: string; + type: "insert_after" | "insert_before" | "replace" | "delete"; + anchor_bookmark: string; + content?: string; + style?: "body" | "heading" | "quote" | "bold"; + reason?: string; +}; + +export type UploadResult = { + filename: string; + size: number; + version: number; + active_draft?: string; + bookmarks_added?: string[]; + missing_blocks?: string[]; + apply_status?: string; +}; + +export type ReviseResult = { + status: string; + output_path: string; + version: number; + applied: number; + failed: number; + results: { id: string; status: string; error?: string }[]; +}; + export const exportsKeys = { all: ["exports"] as const, list: (caseNumber: string) => [...exportsKeys.all, "list", caseNumber] as const, + activeDraft: (caseNumber: string) => + [...exportsKeys.all, "active-draft", caseNumber] as const, + bookmarks: (caseNumber: string) => + [...exportsKeys.all, "bookmarks", caseNumber] as const, }; export function useExports(caseNumber: string | undefined) { @@ -48,7 +86,7 @@ export function useExportDocx(caseNumber: string) { export function useUploadDraft(caseNumber: string) { const qc = useQueryClient(); return useMutation({ - mutationFn: async (file: File) => { + mutationFn: async (file: File): Promise => { const form = new FormData(); form.append("file", file); const res = await fetch(`/api/cases/${caseNumber}/exports/upload`, { @@ -59,14 +97,70 @@ export function useUploadDraft(caseNumber: string) { const err = await res.json().catch(() => ({ detail: "שגיאה בהעלאה" })); throw new Error(err.detail ?? "שגיאה בהעלאה"); } - return res.json() as Promise<{ - filename: string; - size: number; - version: number; - }>; + return res.json() as Promise; }, onSuccess: () => { qc.invalidateQueries({ queryKey: exportsKeys.list(caseNumber) }); + qc.invalidateQueries({ queryKey: exportsKeys.activeDraft(caseNumber) }); + qc.invalidateQueries({ queryKey: exportsKeys.bookmarks(caseNumber) }); + }, + }); +} + +export function useActiveDraft(caseNumber: string | undefined) { + return useQuery({ + queryKey: exportsKeys.activeDraft(caseNumber ?? ""), + queryFn: ({ signal }) => + apiRequest(`/api/cases/${caseNumber}/active-draft`, { signal }), + enabled: Boolean(caseNumber), + staleTime: 5_000, + }); +} + +export function useBookmarks(caseNumber: string | undefined) { + return useQuery({ + queryKey: exportsKeys.bookmarks(caseNumber ?? ""), + queryFn: ({ signal }) => + apiRequest<{ + status: string; + active_draft_path?: string; + bookmarks?: string[]; + }>(`/api/cases/${caseNumber}/exports/bookmarks`, { signal }), + enabled: Boolean(caseNumber), + staleTime: 10_000, + }); +} + +export function useReviseDraft(caseNumber: string) { + const qc = useQueryClient(); + return useMutation({ + mutationFn: (payload: { revisions: Revision[]; author?: string }) => + apiRequest(`/api/cases/${caseNumber}/exports/revise`, { + method: "POST", + body: payload, + }), + onSuccess: () => { + qc.invalidateQueries({ queryKey: exportsKeys.list(caseNumber) }); + qc.invalidateQueries({ queryKey: exportsKeys.activeDraft(caseNumber) }); + }, + }); +} + +export function useRetrofit(caseNumber: string) { + const qc = useQueryClient(); + return useMutation({ + mutationFn: (filename: string) => + apiRequest<{ + status: string; + active_draft_path: string; + bookmarks_added: string[]; + missing_blocks: string[]; + }>(`/api/cases/${caseNumber}/exports/${filename}/retrofit`, { + method: "POST", + }), + onSuccess: () => { + qc.invalidateQueries({ queryKey: exportsKeys.activeDraft(caseNumber) }); + qc.invalidateQueries({ queryKey: exportsKeys.bookmarks(caseNumber) }); }, }); } diff --git a/web/app.py b/web/app.py index 1698e66..ededb51 100644 --- a/web/app.py +++ b/web/app.py @@ -1719,6 +1719,24 @@ async def api_research_analysis_download(case_number: str): ) +@app.get("/api/cases/{case_number}/research/analysis/export-docx") +async def api_research_analysis_export_docx(case_number: str): + """Export the legal analysis as a DOCX using דפנה's decision template styles.""" + from legal_mcp.services.analysis_docx_exporter import build_analysis_docx + try: + path = await build_analysis_docx(case_number) + except FileNotFoundError as e: + raise HTTPException(404, str(e)) + except Exception as e: + logger.exception("Failed to export analysis DOCX for %s", case_number) + raise HTTPException(500, f"שגיאה בייצוא: {e}") + return FileResponse( + path, + media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document", + filename=path.name, + ) + + @app.put("/api/cases/{case_number}/research/analysis/upload") async def api_research_analysis_upload( case_number: str, @@ -1990,7 +2008,12 @@ async def api_delete_export(case_number: str, filename: str): @app.post("/api/cases/{case_number}/exports/upload") async def api_upload_export(case_number: str, file: UploadFile = File(...)): - """Upload a revised version of a draft.""" + """Upload a revised version of a draft. + + After saving, the file is automatically registered as the case's + active_draft (source of truth) and bookmarks are retrofitted so that + future revise_draft calls can anchor Track Changes to the 12 blocks. + """ case = await db.get_case_by_number(case_number) if not case: raise HTTPException(404, f"תיק {case_number} לא נמצא") @@ -2022,10 +2045,85 @@ async def api_upload_export(case_number: str, file: UploadFile = File(...)): dest = export_dir / f"עריכה-v{next_ver}.docx" dest.write_bytes(content) + # Auto-register as active_draft + retrofit bookmarks + auto_result: dict = {"status": "ok"} + try: + raw = await drafting_tools.apply_user_edit(case_number, dest.name) + auto_result = json.loads(raw) + except Exception as e: + auto_result = {"status": "error", "message": str(e)} + return { "filename": dest.name, "size": len(content), "version": next_ver, + "active_draft": auto_result.get("active_draft_path"), + "bookmarks_added": auto_result.get("bookmarks_added", []), + "missing_blocks": auto_result.get("missing_blocks", []), + "apply_status": auto_result.get("status", "error"), + } + + +class ReviseRequest(BaseModel): + revisions: list[dict] + author: str = "מערכת AI" + + +@app.post("/api/cases/{case_number}/exports/revise") +async def api_revise_draft(case_number: str, req: ReviseRequest): + """Apply a batch of Track Changes revisions to the active draft.""" + raw = await drafting_tools.revise_draft( + case_number, + json.dumps(req.revisions, ensure_ascii=False), + req.author, + ) + try: + data = json.loads(raw) + except json.JSONDecodeError: + raise HTTPException(500, raw) + if data.get("status") == "error": + raise HTTPException(400, data.get("message", "revise failed")) + return data + + +@app.get("/api/cases/{case_number}/exports/bookmarks") +async def api_list_bookmarks(case_number: str): + """List bookmarks in the case's active draft (anchors for revisions).""" + raw = await drafting_tools.list_bookmarks(case_number) + try: + data = json.loads(raw) + except json.JSONDecodeError: + raise HTTPException(500, raw) + return data + + +@app.post("/api/cases/{case_number}/exports/{filename}/retrofit") +async def api_retrofit_bookmarks(case_number: str, filename: str): + """Manually trigger retrofit of bookmarks on an existing file.""" + raw = await drafting_tools.apply_user_edit(case_number, filename) + try: + data = json.loads(raw) + except json.JSONDecodeError: + raise HTTPException(500, raw) + if data.get("status") == "error": + raise HTTPException(400, data.get("message", "retrofit failed")) + return data + + +@app.get("/api/cases/{case_number}/active-draft") +async def api_get_active_draft(case_number: str): + """Get the current active_draft_path for a case.""" + case = await db.get_case_by_number(case_number) + if not case: + raise HTTPException(404, f"תיק {case_number} לא נמצא") + path = await db.get_active_draft_path(UUID(case["id"])) + if not path: + return {"active_draft_path": None, "filename": None, "exists": False} + filename = Path(path).name + return { + "active_draft_path": path, + "filename": filename, + "exists": Path(path).exists(), }