All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 3m58s
The Hermes Knowledge Curator's hermes-curator.md says it must be able to read both DOCX and PDF final decisions. The original implementation hardcoded the .docx extension only. Extend to try .docx → .pdf → .doc → .rtf → .txt → .md, returning the first match. extractor.extract_text already supports all six formats, so no extractor changes needed. If none found, the not_found response now includes the tried_extensions list so the caller knows what was attempted. Verified on case 1130-25 (.docx still picked first) and tested via `curator-cmp mcp test legal-ai`.
436 lines
16 KiB
Python
436 lines
16 KiB
Python
"""MCP tools for case management."""
|
||
|
||
from __future__ import annotations
|
||
|
||
import json
|
||
import logging
|
||
import os
|
||
import shutil
|
||
import subprocess
|
||
from pathlib import Path
|
||
from uuid import UUID
|
||
|
||
import httpx
|
||
|
||
from legal_mcp import config
|
||
from legal_mcp.services import audit, db, extractor, git_sync, practice_area as pa
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
GITEA_ORG = "cases"
|
||
|
||
|
||
def _gitea_host() -> str:
|
||
return os.environ.get("GITEA_HOST", "https://gitea.nautilus.marcusgroup.org")
|
||
|
||
|
||
def _gitea_token() -> str:
|
||
return os.environ.get("GITEA_ACCESS_TOKEN") or os.environ.get("GITEA_TOKEN", "")
|
||
|
||
|
||
async def _setup_gitea_remote(case_number: str, title: str, case_dir: Path) -> dict:
|
||
"""Create Gitea repo and configure git remote.
|
||
|
||
Returns a dict with: ok (bool), url (str|None), error (str|None).
|
||
Never raises — failures are reported via the dict so callers can surface
|
||
them to the UI instead of silently swallowing them.
|
||
"""
|
||
token = _gitea_token()
|
||
if not token:
|
||
logger.info("No GITEA_TOKEN — skipping Gitea repo creation for %s", case_number)
|
||
return {"ok": False, "url": None, "error": "no_token"}
|
||
|
||
try:
|
||
async with httpx.AsyncClient(verify=False, timeout=30) as client:
|
||
resp = await client.post(
|
||
f"{_gitea_host()}/api/v1/orgs/{GITEA_ORG}/repos",
|
||
headers={"Authorization": f"token {token}"},
|
||
json={
|
||
"name": case_number,
|
||
"description": f"ערר {case_number} — {title}"[:255],
|
||
"private": True,
|
||
"auto_init": False,
|
||
},
|
||
)
|
||
if resp.status_code == 409:
|
||
resp2 = await client.get(
|
||
f"{_gitea_host()}/api/v1/repos/{GITEA_ORG}/{case_number}",
|
||
headers={"Authorization": f"token {token}"},
|
||
)
|
||
resp2.raise_for_status()
|
||
repo = resp2.json()
|
||
else:
|
||
resp.raise_for_status()
|
||
repo = resp.json()
|
||
|
||
clone_url = repo.get("clone_url", "")
|
||
html_url = repo.get("html_url", "")
|
||
if not clone_url:
|
||
return {"ok": False, "url": None, "error": "no_clone_url"}
|
||
|
||
auth_url = clone_url.replace("https://", f"https://chaim:{token}@")
|
||
|
||
git_env = {
|
||
"GIT_AUTHOR_NAME": "Ezer Mishpati",
|
||
"GIT_AUTHOR_EMAIL": "legal@local",
|
||
"GIT_COMMITTER_NAME": "Ezer Mishpati",
|
||
"GIT_COMMITTER_EMAIL": "legal@local",
|
||
"PATH": os.environ.get("PATH", "/usr/bin:/bin"),
|
||
}
|
||
|
||
# Add or update remote
|
||
result = subprocess.run(
|
||
["git", "remote", "get-url", "origin"],
|
||
cwd=case_dir, capture_output=True, text=True,
|
||
)
|
||
if result.returncode == 0:
|
||
subprocess.run(
|
||
["git", "remote", "set-url", "origin", auth_url],
|
||
cwd=case_dir, capture_output=True, env=git_env,
|
||
)
|
||
else:
|
||
subprocess.run(
|
||
["git", "remote", "add", "origin", auth_url],
|
||
cwd=case_dir, capture_output=True, env=git_env,
|
||
)
|
||
|
||
# Push
|
||
push = subprocess.run(
|
||
["git", "push", "-u", "origin", "HEAD"],
|
||
cwd=case_dir, capture_output=True, text=True, env=git_env,
|
||
)
|
||
if push.returncode != 0:
|
||
stderr = push.stderr.strip()
|
||
logger.warning("Gitea push failed for %s: %s", case_number, stderr)
|
||
return {"ok": False, "url": html_url or None, "error": f"push_failed: {stderr[:200]}"}
|
||
|
||
logger.info("Gitea repo created and pushed for %s", case_number)
|
||
return {"ok": True, "url": html_url or None, "error": None}
|
||
|
||
except httpx.HTTPStatusError as exc:
|
||
msg = f"http_{exc.response.status_code}"
|
||
logger.warning("Gitea setup failed for %s: %s", case_number, msg)
|
||
return {"ok": False, "url": None, "error": msg}
|
||
except Exception as exc:
|
||
logger.warning("Gitea setup failed for %s: %s", case_number, exc)
|
||
return {"ok": False, "url": None, "error": f"{type(exc).__name__}: {exc}"[:200]}
|
||
|
||
|
||
async def case_create(
|
||
case_number: str,
|
||
title: str,
|
||
appellants: list[str] | None = None,
|
||
respondents: list[str] | None = None,
|
||
subject: str = "",
|
||
property_address: str = "",
|
||
permit_number: str = "",
|
||
committee_type: str = "ועדה מקומית",
|
||
hearing_date: str = "",
|
||
notes: str = "",
|
||
expected_outcome: str = "",
|
||
practice_area: str = "appeals_committee",
|
||
appeal_subtype: str = "",
|
||
) -> str:
|
||
"""יצירת תיק ערר חדש.
|
||
|
||
Args:
|
||
case_number: מספר תיק הערר (לדוגמה: 123-24)
|
||
title: כותרת קצרה של הערר
|
||
appellants: שמות העוררים
|
||
respondents: שמות המשיבים
|
||
subject: נושא הערר
|
||
property_address: כתובת הנכס
|
||
permit_number: מספר היתר
|
||
committee_type: סוג הוועדה (ברירת מחדל: ועדה מקומית)
|
||
hearing_date: תאריך דיון (YYYY-MM-DD)
|
||
notes: הערות
|
||
expected_outcome: תוצאה צפויה (rejection/partial_acceptance/full_acceptance/betterment_levy)
|
||
practice_area: תחום משפטי (appeals_committee / national_insurance / labor_law)
|
||
appeal_subtype: סוג ערר (building_permit / betterment_levy / compensation_197).
|
||
ריק = יוסק אוטומטית ממספר התיק
|
||
"""
|
||
from datetime import date as date_type
|
||
|
||
h_date = None
|
||
if hearing_date:
|
||
h_date = date_type.fromisoformat(hearing_date)
|
||
|
||
# Resolve appeal_subtype: explicit override > auto-derive > 'unknown'
|
||
derived_subtype = pa.derive_subtype(case_number, practice_area)
|
||
if not appeal_subtype:
|
||
appeal_subtype = derived_subtype
|
||
pa.validate(practice_area, appeal_subtype)
|
||
|
||
case = await db.create_case(
|
||
case_number=case_number,
|
||
title=title,
|
||
appellants=appellants,
|
||
respondents=respondents,
|
||
subject=subject,
|
||
property_address=property_address,
|
||
permit_number=permit_number,
|
||
committee_type=committee_type,
|
||
hearing_date=h_date,
|
||
notes=notes,
|
||
expected_outcome=expected_outcome,
|
||
practice_area=practice_area,
|
||
appeal_subtype=appeal_subtype,
|
||
)
|
||
|
||
# If the user overrode the case-number convention (e.g. case 8500 marked
|
||
# as building_permit), record it so we can audit later.
|
||
if pa.is_override(case_number, practice_area, appeal_subtype):
|
||
await audit.log_action(
|
||
action="case_subtype_override",
|
||
case_id=UUID(case["id"]),
|
||
details={
|
||
"case_number": case_number,
|
||
"derived_subtype": derived_subtype,
|
||
"chosen_subtype": appeal_subtype,
|
||
"practice_area": practice_area,
|
||
},
|
||
)
|
||
|
||
# Initialize git repo for the case
|
||
case_dir = config.find_case_dir(case_number)
|
||
case_dir.mkdir(parents=True, exist_ok=True)
|
||
docs_dir = case_dir / "documents"
|
||
docs_dir.mkdir(exist_ok=True)
|
||
(docs_dir / "originals").mkdir(exist_ok=True)
|
||
(docs_dir / "extracted").mkdir(exist_ok=True)
|
||
(docs_dir / "proofread").mkdir(exist_ok=True)
|
||
(docs_dir / "backup").mkdir(exist_ok=True)
|
||
(case_dir / "drafts").mkdir(exist_ok=True)
|
||
|
||
# Save case metadata
|
||
case_json = case_dir / "case.json"
|
||
case_json.write_text(json.dumps(case, default=str, ensure_ascii=False, indent=2))
|
||
|
||
# Create notes file
|
||
notes_file = case_dir / "notes.md"
|
||
notes_file.write_text(f"# הערות - תיק {case_number}\n\n{notes}\n")
|
||
|
||
# Initialize git repo (best-effort)
|
||
try:
|
||
subprocess.run(["git", "init"], cwd=case_dir, capture_output=True)
|
||
subprocess.run(["git", "add", "."], cwd=case_dir, capture_output=True)
|
||
subprocess.run(
|
||
["git", "commit", "-m", f"אתחול תיק {case_number}: {title}"],
|
||
cwd=case_dir,
|
||
capture_output=True,
|
||
env={"GIT_AUTHOR_NAME": "Ezer Mishpati", "GIT_AUTHOR_EMAIL": "legal@local",
|
||
"GIT_COMMITTER_NAME": "Ezer Mishpati", "GIT_COMMITTER_EMAIL": "legal@local",
|
||
"PATH": "/usr/bin:/bin"},
|
||
)
|
||
except Exception:
|
||
pass # git not available — non-critical
|
||
|
||
# Create Gitea repo and configure remote — surface result so callers can
|
||
# show failures (e.g. stale token) and offer a retry button instead of
|
||
# silently producing a case with no remote.
|
||
case["gitea"] = await _setup_gitea_remote(case_number, title, case_dir)
|
||
|
||
return json.dumps(case, default=str, ensure_ascii=False, indent=2)
|
||
|
||
|
||
async def case_list(status: str = "", limit: int = 50) -> str:
|
||
"""רשימת תיקי ערר עם אפשרות סינון לפי סטטוס.
|
||
|
||
Args:
|
||
status: סינון לפי סטטוס (new, in_progress, drafted, reviewed, final). ריק = הכל
|
||
limit: מספר תוצאות מקסימלי
|
||
"""
|
||
cases = await db.list_cases(status=status or None, limit=limit)
|
||
if not cases:
|
||
return "אין תיקים."
|
||
return json.dumps(cases, default=str, ensure_ascii=False, indent=2)
|
||
|
||
|
||
async def case_get(case_number: str) -> str:
|
||
"""קבלת פרטי תיק מלאים כולל רשימת מסמכים.
|
||
|
||
Args:
|
||
case_number: מספר תיק הערר
|
||
"""
|
||
case = await db.get_case_by_number(case_number)
|
||
if not case:
|
||
return f"תיק {case_number} לא נמצא."
|
||
|
||
docs = await db.list_documents(UUID(case["id"]))
|
||
case["documents"] = docs
|
||
return json.dumps(case, default=str, ensure_ascii=False, indent=2)
|
||
|
||
|
||
async def case_update(
|
||
case_number: str,
|
||
status: str = "",
|
||
title: str = "",
|
||
subject: str = "",
|
||
notes: str = "",
|
||
hearing_date: str = "",
|
||
decision_date: str = "",
|
||
tags: list[str] | None = None,
|
||
expected_outcome: str = "",
|
||
) -> str:
|
||
"""עדכון פרטי תיק.
|
||
|
||
Args:
|
||
case_number: מספר תיק הערר
|
||
status: סטטוס חדש (new, in_progress, drafted, reviewed, final)
|
||
title: כותרת חדשה
|
||
subject: נושא חדש
|
||
notes: הערות חדשות
|
||
hearing_date: תאריך דיון (YYYY-MM-DD)
|
||
decision_date: תאריך החלטה (YYYY-MM-DD)
|
||
tags: תגיות
|
||
expected_outcome: תוצאה צפויה (rejection/partial_acceptance/full_acceptance/betterment_levy)
|
||
"""
|
||
from datetime import date as date_type
|
||
|
||
# Ordered workflow statuses — regression protection
|
||
STATUS_ORDER = [
|
||
"new", "uploading", "processing", "documents_ready",
|
||
"analyst_verified", "research_complete", "outcome_set",
|
||
"brainstorming", "direction_approved", "analysis_enriched", "ready_for_writing",
|
||
"drafting", "qa_review", "drafted",
|
||
"exported", "reviewed", "final",
|
||
]
|
||
|
||
case = await db.get_case_by_number(case_number)
|
||
if not case:
|
||
return f"תיק {case_number} לא נמצא."
|
||
|
||
fields = {}
|
||
if status:
|
||
current = case.get("status", "")
|
||
cur_idx = STATUS_ORDER.index(current) if current in STATUS_ORDER else -1
|
||
new_idx = STATUS_ORDER.index(status) if status in STATUS_ORDER else -1
|
||
# Only update if advancing or status is unknown to the order
|
||
if new_idx >= cur_idx or new_idx == -1:
|
||
fields["status"] = status
|
||
if title:
|
||
fields["title"] = title
|
||
if subject:
|
||
fields["subject"] = subject
|
||
if notes:
|
||
fields["notes"] = notes
|
||
if hearing_date:
|
||
fields["hearing_date"] = date_type.fromisoformat(hearing_date)
|
||
if decision_date:
|
||
fields["decision_date"] = date_type.fromisoformat(decision_date)
|
||
if tags is not None:
|
||
fields["tags"] = tags
|
||
if expected_outcome:
|
||
fields["expected_outcome"] = expected_outcome
|
||
|
||
updated = await db.update_case(UUID(case["id"]), **fields)
|
||
|
||
# Git commit + push the update (best-effort)
|
||
try:
|
||
case_dir = config.find_case_dir(case_number)
|
||
if case_dir.exists():
|
||
case_json = case_dir / "case.json"
|
||
case_json.write_text(json.dumps(updated, default=str, ensure_ascii=False, indent=2))
|
||
git_sync.commit_and_push(case_dir, f"עדכון תיק: {', '.join(fields.keys())}")
|
||
except Exception:
|
||
pass # git not available — non-critical
|
||
|
||
return json.dumps(updated, default=str, ensure_ascii=False, indent=2)
|
||
|
||
|
||
async def case_delete(case_number: str, remove_files: bool = False) -> str:
|
||
"""מחיקת תיק ערר. מסיר את התיק מ-DB עם cascade לכל המסמכים והטענות.
|
||
|
||
Args:
|
||
case_number: מספר תיק הערר
|
||
remove_files: האם למחוק גם את תיקיית הדיסק (drafts, git repo).
|
||
ברירת מחדל False — ה-DB נמחק אבל הקבצים נשמרים לגיבוי.
|
||
"""
|
||
case = await db.get_case_by_number(case_number)
|
||
if not case:
|
||
return json.dumps(
|
||
{"deleted": False, "reason": f"תיק {case_number} לא נמצא."},
|
||
ensure_ascii=False,
|
||
)
|
||
|
||
case_id = UUID(case["id"])
|
||
ok = await db.delete_case(case_id)
|
||
|
||
result = {
|
||
"deleted": ok,
|
||
"case_number": case_number,
|
||
"case_id": str(case_id),
|
||
"removed_files": False,
|
||
}
|
||
|
||
if ok and remove_files:
|
||
case_dir = config.find_case_dir(case_number)
|
||
if case_dir.exists():
|
||
shutil.rmtree(case_dir, ignore_errors=True)
|
||
result["removed_files"] = True
|
||
|
||
return json.dumps(result, ensure_ascii=False, indent=2)
|
||
|
||
|
||
async def case_get_final_text(case_number: str, max_chars: int = 0) -> str:
|
||
"""קליטת טקסט ההחלטה הסופית (`סופי-{case}.docx` בתיקיית exports).
|
||
|
||
בניגוד ל-`document_get_text` שעובד על שורות בטבלת `documents`,
|
||
הקובץ הסופי הוא רק קובץ בתיקייה (נוצר על ידי `api_mark_final`).
|
||
תומך בכל הפורמטים ש-extractor.extract_text מטפל בהם — מנסה
|
||
`.docx` תחילה, ואז `.pdf`, `.doc`, `.rtf`, `.txt`, `.md`.
|
||
|
||
Args:
|
||
case_number: מספר תיק הערר
|
||
max_chars: אם >0, חתוך את הטקסט המוחזר לאורך הזה. 0 = הכל.
|
||
"""
|
||
case_dir = config.find_case_dir(case_number)
|
||
exports_dir = case_dir / "exports"
|
||
final_stem = f"סופי-{case_number}"
|
||
|
||
final_path = None
|
||
for ext in (".docx", ".pdf", ".doc", ".rtf", ".txt", ".md"):
|
||
candidate = exports_dir / f"{final_stem}{ext}"
|
||
if candidate.exists():
|
||
final_path = candidate
|
||
break
|
||
|
||
if final_path is None:
|
||
return json.dumps({
|
||
"status": "not_found",
|
||
"case_number": case_number,
|
||
"expected_path": str(exports_dir / f"{final_stem}.docx"),
|
||
"tried_extensions": [".docx", ".pdf", ".doc", ".rtf", ".txt", ".md"],
|
||
"hint": (
|
||
"ההחלטה הסופית עדיין לא סומנה כ'סופית' ב-UI. "
|
||
"דפנה צריכה ללחוץ 'סמן כסופי' על קובץ הטיוטה הנכון."
|
||
),
|
||
}, ensure_ascii=False, indent=2)
|
||
|
||
try:
|
||
text, page_count, _ = await extractor.extract_text(str(final_path))
|
||
except Exception as e:
|
||
logger.exception("case_get_final_text: extraction failed for %s", case_number)
|
||
return json.dumps({
|
||
"status": "error",
|
||
"case_number": case_number,
|
||
"file_path": str(final_path),
|
||
"error": str(e),
|
||
}, ensure_ascii=False, indent=2)
|
||
|
||
text = text or ""
|
||
truncated = False
|
||
if max_chars > 0 and len(text) > max_chars:
|
||
text = text[:max_chars]
|
||
truncated = True
|
||
|
||
return json.dumps({
|
||
"status": "ok",
|
||
"case_number": case_number,
|
||
"file_path": str(final_path),
|
||
"text_length": len(text),
|
||
"page_count": page_count,
|
||
"truncated": truncated,
|
||
"text": text,
|
||
}, ensure_ascii=False, indent=2)
|