From bd4b0ca76640bc629402a941cacff6348d2d2e44 Mon Sep 17 00:00:00 2001 From: Chaim Date: Tue, 5 May 2026 19:18:57 +0000 Subject: [PATCH] =?UTF-8?q?feat(mcp):=20case=5Fget=5Ffinal=5Ftext=20?= =?UTF-8?q?=E2=80=94=20fall=20back=20to=20PDF/DOC/RTF/TXT/MD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Hermes Knowledge Curator's hermes-curator.md says it must be able to read both DOCX and PDF final decisions. The original implementation hardcoded the .docx extension only. Extend to try .docx → .pdf → .doc → .rtf → .txt → .md, returning the first match. extractor.extract_text already supports all six formats, so no extractor changes needed. If none found, the not_found response now includes the tried_extensions list so the caller knows what was attempted. Verified on case 1130-25 (.docx still picked first) and tested via `curator-cmp mcp test legal-ai`. --- mcp-server/src/legal_mcp/tools/cases.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/mcp-server/src/legal_mcp/tools/cases.py b/mcp-server/src/legal_mcp/tools/cases.py index 0219de5..1ebd8de 100644 --- a/mcp-server/src/legal_mcp/tools/cases.py +++ b/mcp-server/src/legal_mcp/tools/cases.py @@ -377,19 +377,30 @@ async def case_get_final_text(case_number: str, max_chars: int = 0) -> str: בניגוד ל-`document_get_text` שעובד על שורות בטבלת `documents`, הקובץ הסופי הוא רק קובץ בתיקייה (נוצר על ידי `api_mark_final`). + תומך בכל הפורמטים ש-extractor.extract_text מטפל בהם — מנסה + `.docx` תחילה, ואז `.pdf`, `.doc`, `.rtf`, `.txt`, `.md`. Args: case_number: מספר תיק הערר max_chars: אם >0, חתוך את הטקסט המוחזר לאורך הזה. 0 = הכל. """ case_dir = config.find_case_dir(case_number) - final_path = case_dir / "exports" / f"סופי-{case_number}.docx" + exports_dir = case_dir / "exports" + final_stem = f"סופי-{case_number}" - if not final_path.exists(): + final_path = None + for ext in (".docx", ".pdf", ".doc", ".rtf", ".txt", ".md"): + candidate = exports_dir / f"{final_stem}{ext}" + if candidate.exists(): + final_path = candidate + break + + if final_path is None: return json.dumps({ "status": "not_found", "case_number": case_number, - "expected_path": str(final_path), + "expected_path": str(exports_dir / f"{final_stem}.docx"), + "tried_extensions": [".docx", ".pdf", ".doc", ".rtf", ".txt", ".md"], "hint": ( "ההחלטה הסופית עדיין לא סומנה כ'סופית' ב-UI. " "דפנה צריכה ללחוץ 'סמן כסופי' על קובץ הטיוטה הנכון."