Improve document processing pipeline and agent workflows

- Add delete_document_chunks for reprocessing, save extracted text to disk
- Expand case directory structure (original/extracted/proofread/backup)
- Update classifier patterns (תגובה, הודעת עמדה)
- Fix proofreader agent paths for new directory layout
- Update HEARTBEAT to notify on every task completion
- Improve bidi_table with LRE/PDF directional embedding
- Add Paperclip project verification and auto-close setup issue
- Add auto-sync-cases.sh for Gitea synchronization

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-09 16:45:49 +00:00
parent 63c9ca184b
commit 3f759d3610
10 changed files with 164 additions and 19 deletions

View File

@@ -84,6 +84,9 @@ async def create_project(
# Link issue to legal-ai case via plugin state
await _link_case_to_issue(conn, issue_id, case_number)
# Verify project creation and close the setup issue
await _verify_and_close_setup_issue(conn, project_id, issue_id, identifier, case_number)
return {
"id": project_id,
"company_id": company_id,
@@ -140,6 +143,70 @@ async def _link_case_to_issue(conn: asyncpg.Connection, issue_id: str, case_numb
logger.info("Linked issue %s to case %s via plugin state", issue_id, case_number)
async def _verify_and_close_setup_issue(
conn: asyncpg.Connection,
project_id: str,
issue_id: str,
identifier: str,
case_number: str,
) -> None:
"""Verify the project was created correctly, then transition the setup issue to done."""
# Move to in_progress while verifying
await conn.execute(
"UPDATE issues SET status = 'in_progress', started_at = now() WHERE id = $1",
issue_id,
)
logger.info("%s: בביצוע — מאמת יצירת פרויקט", identifier)
# Verify: project exists, issue is linked, plugin state exists
checks = []
project = await conn.fetchrow("SELECT id, name FROM projects WHERE id = $1::uuid", project_id)
checks.append(("פרויקט נוצר", project is not None))
issue = await conn.fetchrow(
"SELECT id, project_id FROM issues WHERE id = $1 AND project_id = $2::uuid",
issue_id, project_id,
)
checks.append(("משימה משויכת לפרויקט", issue is not None))
plugin_link = await conn.fetchrow(
"SELECT value_json FROM plugin_state WHERE scope_id = $1 AND state_key = 'legal-case-number'",
issue_id,
)
checks.append(("קישור למערכת המשפטית", plugin_link is not None))
all_ok = all(ok for _, ok in checks)
report_lines = [f"{'' if ok else ''} {name}" for name, ok in checks]
report = "\n".join(report_lines)
if all_ok:
await conn.execute(
"UPDATE issues SET status = 'done', completed_at = now() WHERE id = $1",
issue_id,
)
# Document the verification in a comment
await conn.execute(
"""INSERT INTO issue_comments (id, company_id, issue_id, body)
VALUES ($1, (SELECT company_id FROM issues WHERE id = $2), $2,
$3)""",
str(uuid.uuid4()), issue_id,
f"## אימות יצירת פרויקט — ערר {case_number}\n\n{report}\n\nהפרויקט נוצר בהצלחה. משימה נסגרה אוטומטית.",
)
logger.info("%s: הושלם — פרויקט אומת ונסגר", identifier)
else:
# Leave in_progress with a warning comment
failed = [name for name, ok in checks if not ok]
await conn.execute(
"""INSERT INTO issue_comments (id, company_id, issue_id, body)
VALUES ($1, (SELECT company_id FROM issues WHERE id = $2), $2,
$3)""",
str(uuid.uuid4()), issue_id,
f"## אימות יצירת פרויקט — ערר {case_number}\n\n{report}\n\n⚠️ בדיקות שנכשלו: {', '.join(failed)}",
)
logger.warning("%s: אימות נכשל — %s", identifier, ", ".join(failed))
async def get_project_url(case_number: str) -> str | None:
"""Find existing Paperclip project for a case number."""
conn = await asyncpg.connect(PAPERCLIP_DB_URL)