Improve document processing pipeline and agent workflows
- Add delete_document_chunks for reprocessing, save extracted text to disk - Expand case directory structure (original/extracted/proofread/backup) - Update classifier patterns (תגובה, הודעת עמדה) - Fix proofreader agent paths for new directory layout - Update HEARTBEAT to notify on every task completion - Improve bidi_table with LRE/PDF directional embedding - Add Paperclip project verification and auto-close setup issue - Add auto-sync-cases.sh for Gitea synchronization Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,8 +1,8 @@
|
||||
#!/usr/bin/env python3
|
||||
"""BiDi-safe box-drawing table renderer for mixed Hebrew/English terminal output.
|
||||
|
||||
Uses LRM (Left-to-Right Mark, U+200E) before box-drawing characters to prevent
|
||||
the BiDi algorithm from breaking table alignment when Hebrew text is present.
|
||||
Uses Unicode directional marks to prevent the BiDi algorithm from breaking
|
||||
table alignment when Hebrew text is present.
|
||||
|
||||
Usage as module:
|
||||
from scripts.bidi_table import bidi_table
|
||||
@@ -14,14 +14,25 @@ Usage from CLI:
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
LRM = "\u200E" # Left-to-Right Mark — invisible, prevents BiDi reordering
|
||||
import re
|
||||
|
||||
LRM = "\u200E" # Left-to-Right Mark
|
||||
RLM = "\u200F" # Right-to-Left Mark
|
||||
LRE = "\u202A" # Left-to-Right Embedding
|
||||
PDF = "\u202C" # Pop Directional Formatting
|
||||
|
||||
_HEB_RE = re.compile(r'[\u0590-\u05FF]')
|
||||
|
||||
|
||||
def _has_hebrew(text: str) -> bool:
|
||||
return bool(_HEB_RE.search(text))
|
||||
|
||||
|
||||
def bidi_table(headers: list[str], rows: list[list[str]]) -> str:
|
||||
"""Render a box-drawing table safe for mixed RTL/LTR terminal display."""
|
||||
ncols = len(headers)
|
||||
|
||||
# Calculate column widths
|
||||
# Calculate column widths (visual length, not counting bidi marks)
|
||||
col_widths = [len(h) for h in headers]
|
||||
for row in rows:
|
||||
for i, cell in enumerate(row[:ncols]):
|
||||
@@ -35,8 +46,10 @@ def bidi_table(headers: list[str], rows: list[list[str]]) -> str:
|
||||
for i in range(ncols):
|
||||
cell = cells[i] if i < len(cells) else ""
|
||||
padded = cell + " " * max(0, col_widths[i] - len(cell))
|
||||
parts.append(" " + padded + " ")
|
||||
return LRM + "│" + (LRM + "│").join(parts) + LRM + "│"
|
||||
# Wrap each cell: LRE forces left-to-right context for the cell,
|
||||
# so box-drawing chars stay in place. PDF closes the embedding.
|
||||
parts.append(LRE + " " + padded + " " + PDF)
|
||||
return LRM + "│" + ("│").join(parts) + "│"
|
||||
|
||||
lines = [hline("┌", "┬", "┐")]
|
||||
lines.append(dataline(headers))
|
||||
|
||||
Reference in New Issue
Block a user