Add renumber_all_blocks + fix sequential_numbering check for bold format
block_writer: new renumber_all_blocks() function that renumbers all paragraphs across all blocks sequentially (1, 2, 3...). Handles both plain "N." and bold "**N.**" formats. Added missing 'import re'. qa_validator: sequential_numbering check now matches bold-formatted numbers (**N.**) in addition to plain (N.). Tested on Hecht: renumbered 115 paragraphs across 7 blocks, QA 6/6. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -14,6 +14,7 @@ from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from datetime import date
|
||||
from uuid import UUID
|
||||
|
||||
@@ -580,6 +581,60 @@ async def _build_previous_blocks_context(case_id: UUID, decision: dict | None) -
|
||||
return "\n\n".join(parts)
|
||||
|
||||
|
||||
# ── Renumbering ───────────────────────────────────────────────────
|
||||
|
||||
async def renumber_all_blocks(decision_id: UUID) -> dict:
|
||||
"""מספור רציף מחדש של כל הבלוקים בהחלטה.
|
||||
|
||||
עובר על כל הבלוקים לפי סדר, ומחליף את כל המספורים
|
||||
(1. 2. 3. או **1.** **2.**) לרצף אחד רציף.
|
||||
"""
|
||||
pool = await db.get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
rows = await conn.fetch(
|
||||
"""SELECT block_id, block_index, content, word_count
|
||||
FROM decision_blocks WHERE decision_id = $1
|
||||
ORDER BY block_index""",
|
||||
decision_id,
|
||||
)
|
||||
|
||||
current_num = 1
|
||||
updated = 0
|
||||
# Blocks that shouldn't be numbered
|
||||
skip_blocks = {"block-alef", "block-bet", "block-gimel", "block-dalet", "block-yod-bet"}
|
||||
|
||||
for row in rows:
|
||||
if row["block_id"] in skip_blocks or not row["content"]:
|
||||
continue
|
||||
|
||||
content = row["content"]
|
||||
# Replace numbered paragraphs: "N." or "**N.**" or "**N.**" at line start
|
||||
def replace_num(match):
|
||||
nonlocal current_num
|
||||
prefix = match.group(1) or "" # bold markers
|
||||
suffix = match.group(3) or "" # bold markers
|
||||
result = f"{prefix}{current_num}{suffix}"
|
||||
current_num += 1
|
||||
return result
|
||||
|
||||
new_content = re.sub(
|
||||
r'^(\*\*)?(\d+)(\.?\*?\*?\.)',
|
||||
replace_num,
|
||||
content,
|
||||
flags=re.MULTILINE,
|
||||
)
|
||||
|
||||
if new_content != content:
|
||||
async with pool.acquire() as conn:
|
||||
await conn.execute(
|
||||
"UPDATE decision_blocks SET content = $1, updated_at = now() WHERE decision_id = $2 AND block_id = $3",
|
||||
new_content, decision_id, row["block_id"],
|
||||
)
|
||||
updated += 1
|
||||
|
||||
return {"total_paragraphs": current_num - 1, "blocks_updated": updated}
|
||||
|
||||
|
||||
# ── Store block ───────────────────────────────────────────────────
|
||||
|
||||
async def store_block(decision_id: UUID, block_result: dict) -> None:
|
||||
|
||||
@@ -279,8 +279,8 @@ def check_sequential_numbering(blocks: list[dict]) -> dict:
|
||||
|
||||
for block in blocks:
|
||||
content = block.get("content", "")
|
||||
# Find numbered paragraphs (e.g., "1.", "2.", "15.")
|
||||
numbers = re.findall(r"^(\d+)\.", content, re.MULTILINE)
|
||||
# Find numbered paragraphs: "1." or "**1.**" or "**1.**"
|
||||
numbers = re.findall(r"^(?:\*\*)?(\d+)\.(?:\*\*)?", content, re.MULTILINE)
|
||||
all_numbers.extend(int(n) for n in numbers)
|
||||
|
||||
if all_numbers:
|
||||
|
||||
Reference in New Issue
Block a user