Files
legal-ai/mcp-server/src/legal_mcp/tools/cases.py
Chaim d359ab9884
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m40s
feat(proceeding-type): explicit ערר/בל"מ field for cases + corpus
Same case_number can exist as both a regular appeal (ערר) and an
extension-of-time request (בל"מ), and we were inferring the difference
from appeal_subtype prefixes — fragile, and case-number lookups
weren't disambiguated. Now stored as a first-class field on both
case_law (corpus) and cases (live cases), with partial unique indexes
on (case_number, proceeding_type).

- SCHEMA_V15: column + CHECK constraints + backfill from
  appeal_subtype LIKE 'extension_request_%' + partial unique indexes
  replace the old global UNIQUE(case_number).
- derive_proceeding_type() centralizes the inference rule
  (extension_request_* → בל"מ; subject regex fallback; default ערר).
- Metadata extractor prompt asks Claude to populate the new field
  explicitly; apply_to_record writes it for internal_committee rows.
- internal_decision_upload, case_create, case_update accept an
  optional proceeding_type; FastAPI request models expose it.
- Wizard + edit dialog get a sided Select; case header renders the
  resolved label (ערר / בל"מ).
- Uploaded the 2 staged בל"מ decisions on betterment levy:
  8126/24 (סופר נוח, 13 chunks), 8047/23 (הרנון, 48 chunks).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-26 09:17:33 +00:00

489 lines
19 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""MCP tools for case management."""
from __future__ import annotations
import json
import logging
import os
import shutil
import subprocess
from pathlib import Path
from uuid import UUID
import httpx
from legal_mcp import config
from legal_mcp.services import audit, db, extractor, git_sync, practice_area as pa
logger = logging.getLogger(__name__)
GITEA_ORG = "cases"
def _gitea_host() -> str:
return os.environ.get("GITEA_HOST", "https://gitea.nautilus.marcusgroup.org")
def _gitea_token() -> str:
return os.environ.get("GITEA_ACCESS_TOKEN") or os.environ.get("GITEA_TOKEN", "")
async def _setup_gitea_remote(case_number: str, title: str, case_dir: Path) -> dict:
"""Create Gitea repo and configure git remote.
Returns a dict with: ok (bool), url (str|None), error (str|None).
Never raises — failures are reported via the dict so callers can surface
them to the UI instead of silently swallowing them.
"""
token = _gitea_token()
if not token:
logger.info("No GITEA_TOKEN — skipping Gitea repo creation for %s", case_number)
return {"ok": False, "url": None, "error": "no_token"}
try:
async with httpx.AsyncClient(verify=False, timeout=30) as client:
resp = await client.post(
f"{_gitea_host()}/api/v1/orgs/{GITEA_ORG}/repos",
headers={"Authorization": f"token {token}"},
json={
"name": case_number,
"description": f"ערר {case_number}{title}"[:255],
"private": True,
"auto_init": False,
},
)
if resp.status_code == 409:
resp2 = await client.get(
f"{_gitea_host()}/api/v1/repos/{GITEA_ORG}/{case_number}",
headers={"Authorization": f"token {token}"},
)
resp2.raise_for_status()
repo = resp2.json()
else:
resp.raise_for_status()
repo = resp.json()
clone_url = repo.get("clone_url", "")
html_url = repo.get("html_url", "")
if not clone_url:
return {"ok": False, "url": None, "error": "no_clone_url"}
auth_url = clone_url.replace("https://", f"https://chaim:{token}@")
git_env = {
"GIT_AUTHOR_NAME": "Ezer Mishpati",
"GIT_AUTHOR_EMAIL": "legal@local",
"GIT_COMMITTER_NAME": "Ezer Mishpati",
"GIT_COMMITTER_EMAIL": "legal@local",
"PATH": os.environ.get("PATH", "/usr/bin:/bin"),
}
# Add or update remote
result = subprocess.run(
["git", "remote", "get-url", "origin"],
cwd=case_dir, capture_output=True, text=True,
)
if result.returncode == 0:
subprocess.run(
["git", "remote", "set-url", "origin", auth_url],
cwd=case_dir, capture_output=True, env=git_env,
)
else:
subprocess.run(
["git", "remote", "add", "origin", auth_url],
cwd=case_dir, capture_output=True, env=git_env,
)
# Push
push = subprocess.run(
["git", "push", "-u", "origin", "HEAD"],
cwd=case_dir, capture_output=True, text=True, env=git_env,
)
if push.returncode != 0:
stderr = push.stderr.strip()
logger.warning("Gitea push failed for %s: %s", case_number, stderr)
return {"ok": False, "url": html_url or None, "error": f"push_failed: {stderr[:200]}"}
logger.info("Gitea repo created and pushed for %s", case_number)
return {"ok": True, "url": html_url or None, "error": None}
except httpx.HTTPStatusError as exc:
msg = f"http_{exc.response.status_code}"
logger.warning("Gitea setup failed for %s: %s", case_number, msg)
return {"ok": False, "url": None, "error": msg}
except Exception as exc:
logger.warning("Gitea setup failed for %s: %s", case_number, exc)
return {"ok": False, "url": None, "error": f"{type(exc).__name__}: {exc}"[:200]}
async def case_create(
case_number: str,
title: str,
appellants: list[str] | None = None,
respondents: list[str] | None = None,
subject: str = "",
property_address: str = "",
permit_number: str = "",
committee_type: str = "ועדה מקומית",
hearing_date: str = "",
notes: str = "",
expected_outcome: str = "",
practice_area: str = "",
appeal_subtype: str = "",
proceeding_type: str = "",
) -> str:
"""יצירת תיק ערר חדש.
Args:
case_number: מספר תיק הערר (לדוגמה: 123-24)
title: כותרת קצרה של הערר
appellants: שמות העוררים
respondents: שמות המשיבים
subject: נושא הערר
property_address: כתובת הנכס
permit_number: מספר היתר
committee_type: סוג הוועדה (ברירת מחדל: ועדה מקומית)
hearing_date: תאריך דיון (YYYY-MM-DD)
notes: הערות
expected_outcome: תוצאה צפויה (rejection/partial_acceptance/full_acceptance/betterment_levy)
practice_area: תחום משפטי — domain value (rishuy_uvniya / betterment_levy /
compensation_197). ריק או "appeals_committee" = יוסק
אוטומטית ממספר התיק (1xxx→רישוי, 8xxx→השבחה, 9xxx→197)
appeal_subtype: סוג ערר (building_permit / betterment_levy / compensation_197).
ריק = יוסק אוטומטית ממספר התיק
proceeding_type: 'ערר' / 'בל"מ'. ריק = יוסק מ-appeal_subtype/subject.
"""
from datetime import date as date_type
h_date = None
if hearing_date:
h_date = date_type.fromisoformat(hearing_date)
# Auto-derive practice_area when missing or set to the legacy multi-tenant
# value. The DB's cases_practice_area_check rejects 'appeals_committee',
# so we MUST map it to a domain value before INSERT. If derivation fails
# (unknown case number format), fall back to '' which the constraint allows.
if not practice_area or practice_area == "appeals_committee":
practice_area = pa.derive_domain_practice_area(case_number)
# Resolve appeal_subtype: explicit override > auto-derive > 'unknown'.
# derive_subtype_with_blam inspects the subject to detect בל"מ
# (בקשה להארכת מועד) and returns an extension_request_* variant when
# appropriate. Falls back to regular derive_subtype when subject is empty.
derived_subtype = pa.derive_subtype_with_blam(case_number, subject, practice_area)
if not appeal_subtype:
appeal_subtype = derived_subtype
pa.validate(practice_area, appeal_subtype)
# proceeding_type: explicit override > derived from subtype/subject > 'ערר'
resolved_proc = proceeding_type.strip() or pa.derive_proceeding_type(
appeal_subtype=appeal_subtype, subject=subject,
)
case = await db.create_case(
case_number=case_number,
title=title,
appellants=appellants,
respondents=respondents,
subject=subject,
property_address=property_address,
permit_number=permit_number,
committee_type=committee_type,
hearing_date=h_date,
notes=notes,
expected_outcome=expected_outcome,
practice_area=practice_area,
appeal_subtype=appeal_subtype,
proceeding_type=resolved_proc,
)
# If the user overrode the case-number convention (e.g. case 8500 marked
# as building_permit), record it so we can audit later.
if pa.is_override(case_number, practice_area, appeal_subtype):
await audit.log_action(
action="case_subtype_override",
case_id=UUID(case["id"]),
details={
"case_number": case_number,
"derived_subtype": derived_subtype,
"chosen_subtype": appeal_subtype,
"practice_area": practice_area,
},
)
# Initialize git repo for the case
case_dir = config.find_case_dir(case_number)
case_dir.mkdir(parents=True, exist_ok=True)
docs_dir = case_dir / "documents"
docs_dir.mkdir(exist_ok=True)
(docs_dir / "originals").mkdir(exist_ok=True)
(docs_dir / "extracted").mkdir(exist_ok=True)
(docs_dir / "proofread").mkdir(exist_ok=True)
(docs_dir / "backup").mkdir(exist_ok=True)
(case_dir / "drafts").mkdir(exist_ok=True)
# Save case metadata
case_json = case_dir / "case.json"
case_json.write_text(json.dumps(case, default=str, ensure_ascii=False, indent=2))
# Create notes file
notes_file = case_dir / "notes.md"
notes_file.write_text(f"# הערות - תיק {case_number}\n\n{notes}\n")
# Initialize git repo (best-effort)
try:
subprocess.run(["git", "init"], cwd=case_dir, capture_output=True)
subprocess.run(["git", "add", "."], cwd=case_dir, capture_output=True)
subprocess.run(
["git", "commit", "-m", f"אתחול תיק {case_number}: {title}"],
cwd=case_dir,
capture_output=True,
env={"GIT_AUTHOR_NAME": "Ezer Mishpati", "GIT_AUTHOR_EMAIL": "legal@local",
"GIT_COMMITTER_NAME": "Ezer Mishpati", "GIT_COMMITTER_EMAIL": "legal@local",
"PATH": "/usr/bin:/bin"},
)
except Exception:
pass # git not available — non-critical
# Create Gitea repo and configure remote — surface result so callers can
# show failures (e.g. stale token) and offer a retry button instead of
# silently producing a case with no remote.
case["gitea"] = await _setup_gitea_remote(case_number, title, case_dir)
return json.dumps(case, default=str, ensure_ascii=False, indent=2)
async def case_list(status: str = "", limit: int = 50) -> str:
"""רשימת תיקי ערר עם אפשרות סינון לפי סטטוס.
Args:
status: סינון לפי סטטוס (new, processing, proofread, documents_ready, analyst_verified,
research_complete, outcome_set, direction_pending, direction_approved,
analysis_enriched, ready_for_writing, drafted, qa_passed, qa_failed,
exported, done). ריק = הכל
limit: מספר תוצאות מקסימלי
"""
cases = await db.list_cases(status=status or None, limit=limit)
if not cases:
return "אין תיקים."
return json.dumps(cases, default=str, ensure_ascii=False, indent=2)
async def case_get(case_number: str) -> str:
"""קבלת פרטי תיק מלאים כולל רשימת מסמכים.
Args:
case_number: מספר תיק הערר
"""
case = await db.get_case_by_number(case_number)
if not case:
return f"תיק {case_number} לא נמצא."
docs = await db.list_documents(UUID(case["id"]))
case["documents"] = docs
return json.dumps(case, default=str, ensure_ascii=False, indent=2)
async def case_update(
case_number: str,
status: str = "",
title: str = "",
subject: str = "",
notes: str = "",
hearing_date: str = "",
decision_date: str = "",
tags: list[str] | None = None,
expected_outcome: str = "",
appellants: list[str] | None = None,
respondents: list[str] | None = None,
property_address: str = "",
permit_number: str = "",
proceeding_type: str = "",
) -> str:
"""עדכון פרטי תיק.
Args:
case_number: מספר תיק הערר
status: סטטוס חדש (new, in_progress, drafted, reviewed, final)
title: כותרת חדשה
subject: נושא חדש
notes: הערות חדשות
hearing_date: תאריך דיון (YYYY-MM-DD)
decision_date: תאריך החלטה (YYYY-MM-DD)
tags: תגיות
expected_outcome: תוצאה צפויה (rejection/partial_acceptance/full_acceptance/betterment_levy)
appellants: רשימת עוררים חדשה
respondents: רשימת משיבים חדשה
property_address: כתובת נכס חדשה
permit_number: מספר תכנית/בקשה חדש
proceeding_type: 'ערר' / 'בל"מ' — ריק = ללא שינוי
"""
from datetime import date as date_type
# Ordered workflow statuses — regression protection
STATUS_ORDER = [
"new", "uploading", "processing", "documents_ready",
"analyst_verified", "research_complete", "outcome_set",
"brainstorming", "direction_approved", "analysis_enriched", "ready_for_writing",
"drafting", "qa_review", "drafted",
"exported", "reviewed", "final",
]
case = await db.get_case_by_number(case_number)
if not case:
return f"תיק {case_number} לא נמצא."
fields = {}
if status:
current = case.get("status", "")
cur_idx = STATUS_ORDER.index(current) if current in STATUS_ORDER else -1
new_idx = STATUS_ORDER.index(status) if status in STATUS_ORDER else -1
# Only update if advancing or status is unknown to the order
if new_idx >= cur_idx or new_idx == -1:
fields["status"] = status
if title:
fields["title"] = title
if subject:
fields["subject"] = subject
if notes:
fields["notes"] = notes
if hearing_date:
try:
fields["hearing_date"] = date_type.fromisoformat(hearing_date)
except ValueError as exc:
raise ValueError(f"Invalid hearing_date format: {hearing_date!r}") from exc
if decision_date:
try:
fields["decision_date"] = date_type.fromisoformat(decision_date)
except ValueError as exc:
raise ValueError(f"Invalid decision_date format: {decision_date!r}") from exc
if tags is not None:
fields["tags"] = tags
if expected_outcome:
fields["expected_outcome"] = expected_outcome
if appellants is not None:
fields["appellants"] = appellants
if respondents is not None:
fields["respondents"] = respondents
if property_address:
fields["property_address"] = property_address
if permit_number:
fields["permit_number"] = permit_number
if proceeding_type:
if proceeding_type not in {"ערר", 'בל"מ'}:
raise ValueError(
f"proceeding_type לא תקין: {proceeding_type!r}. ערכים תקפים: ערר / בל\"מ"
)
fields["proceeding_type"] = proceeding_type
updated = await db.update_case(UUID(case["id"]), **fields)
# Git commit + push the update (best-effort)
try:
case_dir = config.find_case_dir(case_number)
if case_dir.exists():
case_json = case_dir / "case.json"
case_json.write_text(json.dumps(updated, default=str, ensure_ascii=False, indent=2))
git_sync.commit_and_push(case_dir, f"עדכון תיק: {', '.join(fields.keys())}")
except Exception:
pass # git not available — non-critical
return json.dumps(updated, default=str, ensure_ascii=False, indent=2)
async def case_delete(case_number: str, remove_files: bool = False) -> str:
"""מחיקת תיק ערר. מסיר את התיק מ-DB עם cascade לכל המסמכים והטענות.
Args:
case_number: מספר תיק הערר
remove_files: האם למחוק גם את תיקיית הדיסק (drafts, git repo).
ברירת מחדל False — ה-DB נמחק אבל הקבצים נשמרים לגיבוי.
"""
case = await db.get_case_by_number(case_number)
if not case:
return json.dumps(
{"deleted": False, "reason": f"תיק {case_number} לא נמצא."},
ensure_ascii=False,
)
case_id = UUID(case["id"])
ok = await db.delete_case(case_id)
result = {
"deleted": ok,
"case_number": case_number,
"case_id": str(case_id),
"removed_files": False,
}
if ok and remove_files:
case_dir = config.find_case_dir(case_number)
if case_dir.exists():
shutil.rmtree(case_dir, ignore_errors=True)
result["removed_files"] = True
return json.dumps(result, ensure_ascii=False, indent=2)
async def case_get_final_text(case_number: str, max_chars: int = 0) -> str:
"""קליטת טקסט ההחלטה הסופית (`סופי-{case}.docx` בתיקיית exports).
בניגוד ל-`document_get_text` שעובד על שורות בטבלת `documents`,
הקובץ הסופי הוא רק קובץ בתיקייה (נוצר על ידי `api_mark_final`).
תומך בכל הפורמטים ש-extractor.extract_text מטפל בהם — מנסה
`.docx` תחילה, ואז `.pdf`, `.doc`, `.rtf`, `.txt`, `.md`.
Args:
case_number: מספר תיק הערר
max_chars: אם >0, חתוך את הטקסט המוחזר לאורך הזה. 0 = הכל.
"""
case_dir = config.find_case_dir(case_number)
exports_dir = case_dir / "exports"
final_stem = f"סופי-{case_number}"
final_path = None
for ext in (".docx", ".pdf", ".doc", ".rtf", ".txt", ".md"):
candidate = exports_dir / f"{final_stem}{ext}"
if candidate.exists():
final_path = candidate
break
if final_path is None:
return json.dumps({
"status": "not_found",
"case_number": case_number,
"expected_path": str(exports_dir / f"{final_stem}.docx"),
"tried_extensions": [".docx", ".pdf", ".doc", ".rtf", ".txt", ".md"],
"hint": (
"ההחלטה הסופית עדיין לא סומנה כ'סופית' ב-UI. "
"דפנה צריכה ללחוץ 'סמן כסופי' על קובץ הטיוטה הנכון."
),
}, ensure_ascii=False, indent=2)
try:
text, page_count, _ = await extractor.extract_text(str(final_path))
except Exception as e:
logger.exception("case_get_final_text: extraction failed for %s", case_number)
return json.dumps({
"status": "error",
"case_number": case_number,
"file_path": str(final_path),
"error": str(e),
}, ensure_ascii=False, indent=2)
text = text or ""
truncated = False
if max_chars > 0 and len(text) > max_chars:
text = text[:max_chars]
truncated = True
return json.dumps({
"status": "ok",
"case_number": case_number,
"file_path": str(final_path),
"text_length": len(text),
"page_count": page_count,
"truncated": truncated,
"text": text,
}, ensure_ascii=False, indent=2)