Files
legal-ai/mcp-server/src/legal_mcp/tools/cases.py
Chaim 7c9582ed04
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m36s
feat(mcp): case_get_final_text — let agents read the signed final DOCX
The Knowledge Curator (Hermes) couldn't read סופי-{case}.docx because
document_get_text only works on rows in the documents table — the final
file is just a copy in the case's exports/ directory, not a tracked
document. CMP-71 hit this and produced an unproductive interaction
asking the user how to fix the access issue.

Add a new MCP tool that:
- Locates exports/סופי-{case_number}.docx via config.find_case_dir
- Extracts text using the existing extractor service (python-docx based)
- Returns JSON with status + text + page_count + truncation info
- Optional max_chars cap for large decisions

Smoke test on case 1130-25: 400-char preview returns proper Hebrew text
beginning with "לפנינו ערר על החלטת הוועדה המקומית...".

The local MCP server reloads on next Hermes spawn (stdio mode), so the
tool is immediately available — no Coolify deploy needed.

Curator's promptTemplate (DB-stored) updated to use the new tool as the
primary path for reading the final.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-05 15:57:10 +00:00

425 lines
15 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""MCP tools for case management."""
from __future__ import annotations
import json
import logging
import os
import shutil
import subprocess
from pathlib import Path
from uuid import UUID
import httpx
from legal_mcp import config
from legal_mcp.services import audit, db, extractor, git_sync, practice_area as pa
logger = logging.getLogger(__name__)
GITEA_ORG = "cases"
def _gitea_host() -> str:
return os.environ.get("GITEA_HOST", "https://gitea.nautilus.marcusgroup.org")
def _gitea_token() -> str:
return os.environ.get("GITEA_ACCESS_TOKEN") or os.environ.get("GITEA_TOKEN", "")
async def _setup_gitea_remote(case_number: str, title: str, case_dir: Path) -> dict:
"""Create Gitea repo and configure git remote.
Returns a dict with: ok (bool), url (str|None), error (str|None).
Never raises — failures are reported via the dict so callers can surface
them to the UI instead of silently swallowing them.
"""
token = _gitea_token()
if not token:
logger.info("No GITEA_TOKEN — skipping Gitea repo creation for %s", case_number)
return {"ok": False, "url": None, "error": "no_token"}
try:
async with httpx.AsyncClient(verify=False, timeout=30) as client:
resp = await client.post(
f"{_gitea_host()}/api/v1/orgs/{GITEA_ORG}/repos",
headers={"Authorization": f"token {token}"},
json={
"name": case_number,
"description": f"ערר {case_number}{title}"[:255],
"private": True,
"auto_init": False,
},
)
if resp.status_code == 409:
resp2 = await client.get(
f"{_gitea_host()}/api/v1/repos/{GITEA_ORG}/{case_number}",
headers={"Authorization": f"token {token}"},
)
resp2.raise_for_status()
repo = resp2.json()
else:
resp.raise_for_status()
repo = resp.json()
clone_url = repo.get("clone_url", "")
html_url = repo.get("html_url", "")
if not clone_url:
return {"ok": False, "url": None, "error": "no_clone_url"}
auth_url = clone_url.replace("https://", f"https://chaim:{token}@")
git_env = {
"GIT_AUTHOR_NAME": "Ezer Mishpati",
"GIT_AUTHOR_EMAIL": "legal@local",
"GIT_COMMITTER_NAME": "Ezer Mishpati",
"GIT_COMMITTER_EMAIL": "legal@local",
"PATH": os.environ.get("PATH", "/usr/bin:/bin"),
}
# Add or update remote
result = subprocess.run(
["git", "remote", "get-url", "origin"],
cwd=case_dir, capture_output=True, text=True,
)
if result.returncode == 0:
subprocess.run(
["git", "remote", "set-url", "origin", auth_url],
cwd=case_dir, capture_output=True, env=git_env,
)
else:
subprocess.run(
["git", "remote", "add", "origin", auth_url],
cwd=case_dir, capture_output=True, env=git_env,
)
# Push
push = subprocess.run(
["git", "push", "-u", "origin", "HEAD"],
cwd=case_dir, capture_output=True, text=True, env=git_env,
)
if push.returncode != 0:
stderr = push.stderr.strip()
logger.warning("Gitea push failed for %s: %s", case_number, stderr)
return {"ok": False, "url": html_url or None, "error": f"push_failed: {stderr[:200]}"}
logger.info("Gitea repo created and pushed for %s", case_number)
return {"ok": True, "url": html_url or None, "error": None}
except httpx.HTTPStatusError as exc:
msg = f"http_{exc.response.status_code}"
logger.warning("Gitea setup failed for %s: %s", case_number, msg)
return {"ok": False, "url": None, "error": msg}
except Exception as exc:
logger.warning("Gitea setup failed for %s: %s", case_number, exc)
return {"ok": False, "url": None, "error": f"{type(exc).__name__}: {exc}"[:200]}
async def case_create(
case_number: str,
title: str,
appellants: list[str] | None = None,
respondents: list[str] | None = None,
subject: str = "",
property_address: str = "",
permit_number: str = "",
committee_type: str = "ועדה מקומית",
hearing_date: str = "",
notes: str = "",
expected_outcome: str = "",
practice_area: str = "appeals_committee",
appeal_subtype: str = "",
) -> str:
"""יצירת תיק ערר חדש.
Args:
case_number: מספר תיק הערר (לדוגמה: 123-24)
title: כותרת קצרה של הערר
appellants: שמות העוררים
respondents: שמות המשיבים
subject: נושא הערר
property_address: כתובת הנכס
permit_number: מספר היתר
committee_type: סוג הוועדה (ברירת מחדל: ועדה מקומית)
hearing_date: תאריך דיון (YYYY-MM-DD)
notes: הערות
expected_outcome: תוצאה צפויה (rejection/partial_acceptance/full_acceptance/betterment_levy)
practice_area: תחום משפטי (appeals_committee / national_insurance / labor_law)
appeal_subtype: סוג ערר (building_permit / betterment_levy / compensation_197).
ריק = יוסק אוטומטית ממספר התיק
"""
from datetime import date as date_type
h_date = None
if hearing_date:
h_date = date_type.fromisoformat(hearing_date)
# Resolve appeal_subtype: explicit override > auto-derive > 'unknown'
derived_subtype = pa.derive_subtype(case_number, practice_area)
if not appeal_subtype:
appeal_subtype = derived_subtype
pa.validate(practice_area, appeal_subtype)
case = await db.create_case(
case_number=case_number,
title=title,
appellants=appellants,
respondents=respondents,
subject=subject,
property_address=property_address,
permit_number=permit_number,
committee_type=committee_type,
hearing_date=h_date,
notes=notes,
expected_outcome=expected_outcome,
practice_area=practice_area,
appeal_subtype=appeal_subtype,
)
# If the user overrode the case-number convention (e.g. case 8500 marked
# as building_permit), record it so we can audit later.
if pa.is_override(case_number, practice_area, appeal_subtype):
await audit.log_action(
action="case_subtype_override",
case_id=UUID(case["id"]),
details={
"case_number": case_number,
"derived_subtype": derived_subtype,
"chosen_subtype": appeal_subtype,
"practice_area": practice_area,
},
)
# Initialize git repo for the case
case_dir = config.find_case_dir(case_number)
case_dir.mkdir(parents=True, exist_ok=True)
docs_dir = case_dir / "documents"
docs_dir.mkdir(exist_ok=True)
(docs_dir / "originals").mkdir(exist_ok=True)
(docs_dir / "extracted").mkdir(exist_ok=True)
(docs_dir / "proofread").mkdir(exist_ok=True)
(docs_dir / "backup").mkdir(exist_ok=True)
(case_dir / "drafts").mkdir(exist_ok=True)
# Save case metadata
case_json = case_dir / "case.json"
case_json.write_text(json.dumps(case, default=str, ensure_ascii=False, indent=2))
# Create notes file
notes_file = case_dir / "notes.md"
notes_file.write_text(f"# הערות - תיק {case_number}\n\n{notes}\n")
# Initialize git repo (best-effort)
try:
subprocess.run(["git", "init"], cwd=case_dir, capture_output=True)
subprocess.run(["git", "add", "."], cwd=case_dir, capture_output=True)
subprocess.run(
["git", "commit", "-m", f"אתחול תיק {case_number}: {title}"],
cwd=case_dir,
capture_output=True,
env={"GIT_AUTHOR_NAME": "Ezer Mishpati", "GIT_AUTHOR_EMAIL": "legal@local",
"GIT_COMMITTER_NAME": "Ezer Mishpati", "GIT_COMMITTER_EMAIL": "legal@local",
"PATH": "/usr/bin:/bin"},
)
except Exception:
pass # git not available — non-critical
# Create Gitea repo and configure remote — surface result so callers can
# show failures (e.g. stale token) and offer a retry button instead of
# silently producing a case with no remote.
case["gitea"] = await _setup_gitea_remote(case_number, title, case_dir)
return json.dumps(case, default=str, ensure_ascii=False, indent=2)
async def case_list(status: str = "", limit: int = 50) -> str:
"""רשימת תיקי ערר עם אפשרות סינון לפי סטטוס.
Args:
status: סינון לפי סטטוס (new, in_progress, drafted, reviewed, final). ריק = הכל
limit: מספר תוצאות מקסימלי
"""
cases = await db.list_cases(status=status or None, limit=limit)
if not cases:
return "אין תיקים."
return json.dumps(cases, default=str, ensure_ascii=False, indent=2)
async def case_get(case_number: str) -> str:
"""קבלת פרטי תיק מלאים כולל רשימת מסמכים.
Args:
case_number: מספר תיק הערר
"""
case = await db.get_case_by_number(case_number)
if not case:
return f"תיק {case_number} לא נמצא."
docs = await db.list_documents(UUID(case["id"]))
case["documents"] = docs
return json.dumps(case, default=str, ensure_ascii=False, indent=2)
async def case_update(
case_number: str,
status: str = "",
title: str = "",
subject: str = "",
notes: str = "",
hearing_date: str = "",
decision_date: str = "",
tags: list[str] | None = None,
expected_outcome: str = "",
) -> str:
"""עדכון פרטי תיק.
Args:
case_number: מספר תיק הערר
status: סטטוס חדש (new, in_progress, drafted, reviewed, final)
title: כותרת חדשה
subject: נושא חדש
notes: הערות חדשות
hearing_date: תאריך דיון (YYYY-MM-DD)
decision_date: תאריך החלטה (YYYY-MM-DD)
tags: תגיות
expected_outcome: תוצאה צפויה (rejection/partial_acceptance/full_acceptance/betterment_levy)
"""
from datetime import date as date_type
# Ordered workflow statuses — regression protection
STATUS_ORDER = [
"new", "uploading", "processing", "documents_ready",
"analyst_verified", "research_complete", "outcome_set",
"brainstorming", "direction_approved", "analysis_enriched", "ready_for_writing",
"drafting", "qa_review", "drafted",
"exported", "reviewed", "final",
]
case = await db.get_case_by_number(case_number)
if not case:
return f"תיק {case_number} לא נמצא."
fields = {}
if status:
current = case.get("status", "")
cur_idx = STATUS_ORDER.index(current) if current in STATUS_ORDER else -1
new_idx = STATUS_ORDER.index(status) if status in STATUS_ORDER else -1
# Only update if advancing or status is unknown to the order
if new_idx >= cur_idx or new_idx == -1:
fields["status"] = status
if title:
fields["title"] = title
if subject:
fields["subject"] = subject
if notes:
fields["notes"] = notes
if hearing_date:
fields["hearing_date"] = date_type.fromisoformat(hearing_date)
if decision_date:
fields["decision_date"] = date_type.fromisoformat(decision_date)
if tags is not None:
fields["tags"] = tags
if expected_outcome:
fields["expected_outcome"] = expected_outcome
updated = await db.update_case(UUID(case["id"]), **fields)
# Git commit + push the update (best-effort)
try:
case_dir = config.find_case_dir(case_number)
if case_dir.exists():
case_json = case_dir / "case.json"
case_json.write_text(json.dumps(updated, default=str, ensure_ascii=False, indent=2))
git_sync.commit_and_push(case_dir, f"עדכון תיק: {', '.join(fields.keys())}")
except Exception:
pass # git not available — non-critical
return json.dumps(updated, default=str, ensure_ascii=False, indent=2)
async def case_delete(case_number: str, remove_files: bool = False) -> str:
"""מחיקת תיק ערר. מסיר את התיק מ-DB עם cascade לכל המסמכים והטענות.
Args:
case_number: מספר תיק הערר
remove_files: האם למחוק גם את תיקיית הדיסק (drafts, git repo).
ברירת מחדל False — ה-DB נמחק אבל הקבצים נשמרים לגיבוי.
"""
case = await db.get_case_by_number(case_number)
if not case:
return json.dumps(
{"deleted": False, "reason": f"תיק {case_number} לא נמצא."},
ensure_ascii=False,
)
case_id = UUID(case["id"])
ok = await db.delete_case(case_id)
result = {
"deleted": ok,
"case_number": case_number,
"case_id": str(case_id),
"removed_files": False,
}
if ok and remove_files:
case_dir = config.find_case_dir(case_number)
if case_dir.exists():
shutil.rmtree(case_dir, ignore_errors=True)
result["removed_files"] = True
return json.dumps(result, ensure_ascii=False, indent=2)
async def case_get_final_text(case_number: str, max_chars: int = 0) -> str:
"""קליטת טקסט ההחלטה הסופית (`סופי-{case}.docx` בתיקיית exports).
בניגוד ל-`document_get_text` שעובד על שורות בטבלת `documents`,
הקובץ הסופי הוא רק קובץ בתיקייה (נוצר על ידי `api_mark_final`).
Args:
case_number: מספר תיק הערר
max_chars: אם >0, חתוך את הטקסט המוחזר לאורך הזה. 0 = הכל.
"""
case_dir = config.find_case_dir(case_number)
final_path = case_dir / "exports" / f"סופי-{case_number}.docx"
if not final_path.exists():
return json.dumps({
"status": "not_found",
"case_number": case_number,
"expected_path": str(final_path),
"hint": (
"ההחלטה הסופית עדיין לא סומנה כ'סופית' ב-UI. "
"דפנה צריכה ללחוץ 'סמן כסופי' על קובץ הטיוטה הנכון."
),
}, ensure_ascii=False, indent=2)
try:
text, page_count, _ = await extractor.extract_text(str(final_path))
except Exception as e:
logger.exception("case_get_final_text: extraction failed for %s", case_number)
return json.dumps({
"status": "error",
"case_number": case_number,
"file_path": str(final_path),
"error": str(e),
}, ensure_ascii=False, indent=2)
text = text or ""
truncated = False
if max_chars > 0 and len(text) > max_chars:
text = text[:max_chars]
truncated = True
return json.dumps({
"status": "ok",
"case_number": case_number,
"file_path": str(final_path),
"text_length": len(text),
"page_count": page_count,
"truncated": truncated,
"text": text,
}, ensure_ascii=False, indent=2)