Improve document processing pipeline and agent workflows
- Add delete_document_chunks for reprocessing, save extracted text to disk - Expand case directory structure (original/extracted/proofread/backup) - Update classifier patterns (תגובה, הודעת עמדה) - Fix proofreader agent paths for new directory layout - Update HEARTBEAT to notify on every task completion - Improve bidi_table with LRE/PDF directional embedding - Add Paperclip project verification and auto-close setup issue - Add auto-sync-cases.sh for Gitea synchronization Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
37
scripts/auto-sync-cases.sh
Executable file
37
scripts/auto-sync-cases.sh
Executable file
@@ -0,0 +1,37 @@
|
||||
#!/bin/bash
|
||||
# Auto-sync case repos to Gitea
|
||||
# Runs via crontab every minute, commits and pushes any changes found.
|
||||
|
||||
CASES_DIR="/home/chaim/legal-ai/data/cases"
|
||||
LOG="/home/chaim/legal-ai/data/.auto-sync.log"
|
||||
GIT_ENV="GIT_AUTHOR_NAME=Ezer Mishpati GIT_AUTHOR_EMAIL=legal@local GIT_COMMITTER_NAME=Ezer Mishpati GIT_COMMITTER_EMAIL=legal@local GIT_TERMINAL_PROMPT=0"
|
||||
|
||||
for status_dir in "$CASES_DIR"/new "$CASES_DIR"/in-progress "$CASES_DIR"/completed; do
|
||||
[ -d "$status_dir" ] || continue
|
||||
for case_dir in "$status_dir"/*/; do
|
||||
[ -d "$case_dir/.git" ] || continue
|
||||
|
||||
cd "$case_dir" || continue
|
||||
|
||||
# Check for any changes (modified, new, deleted)
|
||||
changes=$(git status --porcelain 2>/dev/null)
|
||||
[ -z "$changes" ] && continue
|
||||
|
||||
# Stage all changes
|
||||
git add -A 2>/dev/null
|
||||
|
||||
# Build commit message from changed files
|
||||
changed_files=$(git diff --cached --name-only 2>/dev/null | head -5)
|
||||
count=$(git diff --cached --name-only 2>/dev/null | wc -l)
|
||||
case_name=$(basename "$case_dir")
|
||||
msg="סנכרון אוטומטי — ${count} קבצים שונו"
|
||||
|
||||
# Commit
|
||||
env $GIT_ENV git commit -m "$msg" --quiet 2>/dev/null
|
||||
if [ $? -eq 0 ]; then
|
||||
# Push (non-blocking, ignore errors)
|
||||
git push origin main --quiet 2>/dev/null
|
||||
echo "$(date '+%Y-%m-%d %H:%M:%S') | $case_name | $count files synced" >> "$LOG"
|
||||
fi
|
||||
done
|
||||
done
|
||||
@@ -1,8 +1,8 @@
|
||||
#!/usr/bin/env python3
|
||||
"""BiDi-safe box-drawing table renderer for mixed Hebrew/English terminal output.
|
||||
|
||||
Uses LRM (Left-to-Right Mark, U+200E) before box-drawing characters to prevent
|
||||
the BiDi algorithm from breaking table alignment when Hebrew text is present.
|
||||
Uses Unicode directional marks to prevent the BiDi algorithm from breaking
|
||||
table alignment when Hebrew text is present.
|
||||
|
||||
Usage as module:
|
||||
from scripts.bidi_table import bidi_table
|
||||
@@ -14,14 +14,25 @@ Usage from CLI:
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
LRM = "\u200E" # Left-to-Right Mark — invisible, prevents BiDi reordering
|
||||
import re
|
||||
|
||||
LRM = "\u200E" # Left-to-Right Mark
|
||||
RLM = "\u200F" # Right-to-Left Mark
|
||||
LRE = "\u202A" # Left-to-Right Embedding
|
||||
PDF = "\u202C" # Pop Directional Formatting
|
||||
|
||||
_HEB_RE = re.compile(r'[\u0590-\u05FF]')
|
||||
|
||||
|
||||
def _has_hebrew(text: str) -> bool:
|
||||
return bool(_HEB_RE.search(text))
|
||||
|
||||
|
||||
def bidi_table(headers: list[str], rows: list[list[str]]) -> str:
|
||||
"""Render a box-drawing table safe for mixed RTL/LTR terminal display."""
|
||||
ncols = len(headers)
|
||||
|
||||
# Calculate column widths
|
||||
# Calculate column widths (visual length, not counting bidi marks)
|
||||
col_widths = [len(h) for h in headers]
|
||||
for row in rows:
|
||||
for i, cell in enumerate(row[:ncols]):
|
||||
@@ -35,8 +46,10 @@ def bidi_table(headers: list[str], rows: list[list[str]]) -> str:
|
||||
for i in range(ncols):
|
||||
cell = cells[i] if i < len(cells) else ""
|
||||
padded = cell + " " * max(0, col_widths[i] - len(cell))
|
||||
parts.append(" " + padded + " ")
|
||||
return LRM + "│" + (LRM + "│").join(parts) + LRM + "│"
|
||||
# Wrap each cell: LRE forces left-to-right context for the cell,
|
||||
# so box-drawing chars stay in place. PDF closes the embedding.
|
||||
parts.append(LRE + " " + padded + " " + PDF)
|
||||
return LRM + "│" + ("│").join(parts) + "│"
|
||||
|
||||
lines = [hline("┌", "┬", "┐")]
|
||||
lines.append(dataline(headers))
|
||||
|
||||
Reference in New Issue
Block a user