Files
legal-ai/scripts/bidi_table.py
Chaim 3f759d3610 Improve document processing pipeline and agent workflows
- Add delete_document_chunks for reprocessing, save extracted text to disk
- Expand case directory structure (original/extracted/proofread/backup)
- Update classifier patterns (תגובה, הודעת עמדה)
- Fix proofreader agent paths for new directory layout
- Update HEARTBEAT to notify on every task completion
- Improve bidi_table with LRE/PDF directional embedding
- Add Paperclip project verification and auto-close setup issue
- Add auto-sync-cases.sh for Gitea synchronization

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-09 16:45:49 +00:00

76 lines
2.7 KiB
Python

#!/usr/bin/env python3
"""BiDi-safe box-drawing table renderer for mixed Hebrew/English terminal output.
Uses Unicode directional marks to prevent the BiDi algorithm from breaking
table alignment when Hebrew text is present.
Usage as module:
from scripts.bidi_table import bidi_table
print(bidi_table(['Col1', 'Col2'], [['val1', 'ערך2']]))
Usage from CLI:
python3 scripts/bidi_table.py
"""
from __future__ import annotations
import re
LRM = "\u200E" # Left-to-Right Mark
RLM = "\u200F" # Right-to-Left Mark
LRE = "\u202A" # Left-to-Right Embedding
PDF = "\u202C" # Pop Directional Formatting
_HEB_RE = re.compile(r'[\u0590-\u05FF]')
def _has_hebrew(text: str) -> bool:
return bool(_HEB_RE.search(text))
def bidi_table(headers: list[str], rows: list[list[str]]) -> str:
"""Render a box-drawing table safe for mixed RTL/LTR terminal display."""
ncols = len(headers)
# Calculate column widths (visual length, not counting bidi marks)
col_widths = [len(h) for h in headers]
for row in rows:
for i, cell in enumerate(row[:ncols]):
col_widths[i] = max(col_widths[i], len(cell))
def hline(left: str, mid: str, right: str) -> str:
return left + mid.join("" * (w + 2) for w in col_widths) + right
def dataline(cells: list[str]) -> str:
parts = []
for i in range(ncols):
cell = cells[i] if i < len(cells) else ""
padded = cell + " " * max(0, col_widths[i] - len(cell))
# Wrap each cell: LRE forces left-to-right context for the cell,
# so box-drawing chars stay in place. PDF closes the embedding.
parts.append(LRE + " " + padded + " " + PDF)
return LRM + "" + ("").join(parts) + ""
lines = [hline("", "", "")]
lines.append(dataline(headers))
lines.append(hline("", "", ""))
for row in rows:
lines.append(dataline(row))
lines.append(hline("", "", ""))
return "\n".join(lines)
if __name__ == "__main__":
table = bidi_table(
["File", "Description", "Model", "Step"],
[
["claims_extractor.py", "חילוץ טענות מכתבי טענות", "Sonnet", "שלב 3 — הבא בתור"],
["brainstorm.py", "סיעור מוחות — כיווני נימוק", "Sonnet", "שלב 4"],
["block_writer.py", "כתיבת בלוקים של החלטה", "Sonnet/Opus", "שלב 5"],
["qa_validator.py", "בדיקת איכות QA", "Sonnet", "שלב 6"],
["style_analyzer.py", "ניתוח סגנון דפנה", "Opus", "חד-פעמי"],
["learning_loop.py", "למידה מהחלטה סופית", "Sonnet", "סוף תהליך"],
],
)
print(table)