From 7c9582ed04e617347e7ccb0d71146d4c104eab44 Mon Sep 17 00:00:00 2001 From: Chaim Date: Tue, 5 May 2026 15:57:10 +0000 Subject: [PATCH] =?UTF-8?q?feat(mcp):=20case=5Fget=5Ffinal=5Ftext=20?= =?UTF-8?q?=E2=80=94=20let=20agents=20read=20the=20signed=20final=20DOCX?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Knowledge Curator (Hermes) couldn't read סופי-{case}.docx because document_get_text only works on rows in the documents table — the final file is just a copy in the case's exports/ directory, not a tracked document. CMP-71 hit this and produced an unproductive interaction asking the user how to fix the access issue. Add a new MCP tool that: - Locates exports/סופי-{case_number}.docx via config.find_case_dir - Extracts text using the existing extractor service (python-docx based) - Returns JSON with status + text + page_count + truncation info - Optional max_chars cap for large decisions Smoke test on case 1130-25: 400-char preview returns proper Hebrew text beginning with "לפנינו ערר על החלטת הוועדה המקומית...". The local MCP server reloads on next Hermes spawn (stdio mode), so the tool is immediately available — no Coolify deploy needed. Curator's promptTemplate (DB-stored) updated to use the new tool as the primary path for reading the final. Co-Authored-By: Claude Opus 4.7 (1M context) --- mcp-server/src/legal_mcp/server.py | 7 ++++ mcp-server/src/legal_mcp/tools/cases.py | 54 ++++++++++++++++++++++++- 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/mcp-server/src/legal_mcp/server.py b/mcp-server/src/legal_mcp/server.py index 7f0098d..20f7ff2 100644 --- a/mcp-server/src/legal_mcp/server.py +++ b/mcp-server/src/legal_mcp/server.py @@ -116,6 +116,13 @@ async def case_delete(case_number: str, remove_files: bool = False) -> str: return await cases.case_delete(case_number, remove_files) +@mcp.tool() +async def case_get_final_text(case_number: str, max_chars: int = 0) -> str: + """קליטת טקסט ההחלטה הסופית (`סופי-{case}.docx` בתיקיית exports). + max_chars: 0=הכל, אחרת חיתוך לאורך הנתון. שימושי ל-Hermes Knowledge Curator.""" + return await cases.case_get_final_text(case_number, max_chars) + + # Precedent attachments (user-supplied legal support for the compose phase) @mcp.tool() async def precedent_attach( diff --git a/mcp-server/src/legal_mcp/tools/cases.py b/mcp-server/src/legal_mcp/tools/cases.py index 7c07717..0219de5 100644 --- a/mcp-server/src/legal_mcp/tools/cases.py +++ b/mcp-server/src/legal_mcp/tools/cases.py @@ -13,7 +13,7 @@ from uuid import UUID import httpx from legal_mcp import config -from legal_mcp.services import audit, db, git_sync, practice_area as pa +from legal_mcp.services import audit, db, extractor, git_sync, practice_area as pa logger = logging.getLogger(__name__) @@ -370,3 +370,55 @@ async def case_delete(case_number: str, remove_files: bool = False) -> str: result["removed_files"] = True return json.dumps(result, ensure_ascii=False, indent=2) + + +async def case_get_final_text(case_number: str, max_chars: int = 0) -> str: + """קליטת טקסט ההחלטה הסופית (`סופי-{case}.docx` בתיקיית exports). + + בניגוד ל-`document_get_text` שעובד על שורות בטבלת `documents`, + הקובץ הסופי הוא רק קובץ בתיקייה (נוצר על ידי `api_mark_final`). + + Args: + case_number: מספר תיק הערר + max_chars: אם >0, חתוך את הטקסט המוחזר לאורך הזה. 0 = הכל. + """ + case_dir = config.find_case_dir(case_number) + final_path = case_dir / "exports" / f"סופי-{case_number}.docx" + + if not final_path.exists(): + return json.dumps({ + "status": "not_found", + "case_number": case_number, + "expected_path": str(final_path), + "hint": ( + "ההחלטה הסופית עדיין לא סומנה כ'סופית' ב-UI. " + "דפנה צריכה ללחוץ 'סמן כסופי' על קובץ הטיוטה הנכון." + ), + }, ensure_ascii=False, indent=2) + + try: + text, page_count, _ = await extractor.extract_text(str(final_path)) + except Exception as e: + logger.exception("case_get_final_text: extraction failed for %s", case_number) + return json.dumps({ + "status": "error", + "case_number": case_number, + "file_path": str(final_path), + "error": str(e), + }, ensure_ascii=False, indent=2) + + text = text or "" + truncated = False + if max_chars > 0 and len(text) > max_chars: + text = text[:max_chars] + truncated = True + + return json.dumps({ + "status": "ok", + "case_number": case_number, + "file_path": str(final_path), + "text_length": len(text), + "page_count": page_count, + "truncated": truncated, + "text": text, + }, ensure_ascii=False, indent=2)