Add renumber_all_blocks + fix sequential_numbering check for bold format

block_writer: new renumber_all_blocks() function that renumbers all paragraphs across all blocks sequentially (1, 2, 3...). Handles both plain "N." and bold "**N.**" formats. Added missing 'import re'. qa_validator: sequential_numbering check now matches bold-formatted numbers (**N.**) in addition to plain (N.). Tested on Hecht: renumbered 115 paragraphs across 7 blocks, QA 6/6. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-03 12:30:31 +00:00
parent 7781987c3a
commit e438740ab4
2 changed files with 57 additions and 2 deletions
--- a/mcp-server/src/legal_mcp/services/block_writer.py
+++ b/mcp-server/src/legal_mcp/services/block_writer.py
@@ -14,6 +14,7 @@ from __future__ import annotations

 import json
 import logging
+import re
 from datetime import date
 from uuid import UUID

@@ -580,6 +581,60 @@ async def _build_previous_blocks_context(case_id: UUID, decision: dict | None) -
    return "\n\n".join(parts)


+# ── Renumbering ───────────────────────────────────────────────────
+
+async def renumber_all_blocks(decision_id: UUID) -> dict:
+    """מספור רציף מחדש של כל הבלוקים בהחלטה.
+
+    עובר על כל הבלוקים לפי סדר, ומחליף את כל המספורים
+    (1. 2. 3. או **1.** **2.**) לרצף אחד רציף.
+    """
+    pool = await db.get_pool()
+    async with pool.acquire() as conn:
+        rows = await conn.fetch(
+            """SELECT block_id, block_index, content, word_count
+               FROM decision_blocks WHERE decision_id = $1
+               ORDER BY block_index""",
+            decision_id,
+        )
+
+    current_num = 1
+    updated = 0
+    # Blocks that shouldn't be numbered
+    skip_blocks = {"block-alef", "block-bet", "block-gimel", "block-dalet", "block-yod-bet"}
+
+    for row in rows:
+        if row["block_id"] in skip_blocks or not row["content"]:
+            continue
+
+        content = row["content"]
+        # Replace numbered paragraphs: "N." or "**N.**" or "**N.**" at line start
+        def replace_num(match):
+            nonlocal current_num
+            prefix = match.group(1) or ""  # bold markers
+            suffix = match.group(3) or ""  # bold markers
+            result = f"{prefix}{current_num}{suffix}"
+            current_num += 1
+            return result
+
+        new_content = re.sub(
+            r'^(\*\*)?(\d+)(\.?\*?\*?\.)',
+            replace_num,
+            content,
+            flags=re.MULTILINE,
+        )
+
+        if new_content != content:
+            async with pool.acquire() as conn:
+                await conn.execute(
+                    "UPDATE decision_blocks SET content = $1, updated_at = now() WHERE decision_id = $2 AND block_id = $3",
+                    new_content, decision_id, row["block_id"],
+                )
+            updated += 1
+
+    return {"total_paragraphs": current_num - 1, "blocks_updated": updated}
+
+
 # ── Store block ───────────────────────────────────────────────────

 async def store_block(decision_id: UUID, block_result: dict) -> None:
--- a/mcp-server/src/legal_mcp/services/qa_validator.py
+++ b/mcp-server/src/legal_mcp/services/qa_validator.py
@@ -279,8 +279,8 @@ def check_sequential_numbering(blocks: list[dict]) -> dict:

    for block in blocks:
        content = block.get("content", "")
-        # Find numbered paragraphs (e.g., "1.", "2.", "15.")
-        numbers = re.findall(r"^(\d+)\.", content, re.MULTILINE)
+        # Find numbered paragraphs: "1." or "**1.**" or "**1.**"
+        numbers = re.findall(r"^(?:\*\*)?(\d+)\.(?:\*\*)?", content, re.MULTILINE)
        all_numbers.extend(int(n) for n in numbers)

    if all_numbers: