Initial commit: MCP server + web upload interface
Ezer Mishpati - AI legal decision drafting system with: - MCP server (FastMCP) with document processing pipeline - Web upload interface (FastAPI) for file upload and classification - pgvector-based semantic search - Hebrew legal document chunking and embedding
This commit is contained in:
0
mcp-server/src/legal_mcp/tools/__init__.py
Normal file
0
mcp-server/src/legal_mcp/tools/__init__.py
Normal file
177
mcp-server/src/legal_mcp/tools/cases.py
Normal file
177
mcp-server/src/legal_mcp/tools/cases.py
Normal file
@@ -0,0 +1,177 @@
|
||||
"""MCP tools for case management."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from uuid import UUID
|
||||
|
||||
from legal_mcp import config
|
||||
from legal_mcp.services import db
|
||||
|
||||
|
||||
async def case_create(
|
||||
case_number: str,
|
||||
title: str,
|
||||
appellants: list[str] | None = None,
|
||||
respondents: list[str] | None = None,
|
||||
subject: str = "",
|
||||
property_address: str = "",
|
||||
permit_number: str = "",
|
||||
committee_type: str = "ועדה מקומית",
|
||||
hearing_date: str = "",
|
||||
notes: str = "",
|
||||
) -> str:
|
||||
"""יצירת תיק ערר חדש.
|
||||
|
||||
Args:
|
||||
case_number: מספר תיק הערר (לדוגמה: 123-24)
|
||||
title: כותרת קצרה של הערר
|
||||
appellants: שמות העוררים
|
||||
respondents: שמות המשיבים
|
||||
subject: נושא הערר
|
||||
property_address: כתובת הנכס
|
||||
permit_number: מספר היתר
|
||||
committee_type: סוג הוועדה (ברירת מחדל: ועדה מקומית)
|
||||
hearing_date: תאריך דיון (YYYY-MM-DD)
|
||||
notes: הערות
|
||||
"""
|
||||
from datetime import date as date_type
|
||||
|
||||
h_date = None
|
||||
if hearing_date:
|
||||
h_date = date_type.fromisoformat(hearing_date)
|
||||
|
||||
case = await db.create_case(
|
||||
case_number=case_number,
|
||||
title=title,
|
||||
appellants=appellants,
|
||||
respondents=respondents,
|
||||
subject=subject,
|
||||
property_address=property_address,
|
||||
permit_number=permit_number,
|
||||
committee_type=committee_type,
|
||||
hearing_date=h_date,
|
||||
notes=notes,
|
||||
)
|
||||
|
||||
# Initialize git repo for the case
|
||||
case_dir = config.CASES_DIR / case_number
|
||||
case_dir.mkdir(parents=True, exist_ok=True)
|
||||
(case_dir / "documents").mkdir(exist_ok=True)
|
||||
(case_dir / "drafts").mkdir(exist_ok=True)
|
||||
|
||||
# Save case metadata
|
||||
case_json = case_dir / "case.json"
|
||||
case_json.write_text(json.dumps(case, default=str, ensure_ascii=False, indent=2))
|
||||
|
||||
# Create notes file
|
||||
notes_file = case_dir / "notes.md"
|
||||
notes_file.write_text(f"# הערות - תיק {case_number}\n\n{notes}\n")
|
||||
|
||||
# Initialize git repo
|
||||
subprocess.run(["git", "init"], cwd=case_dir, capture_output=True)
|
||||
subprocess.run(["git", "add", "."], cwd=case_dir, capture_output=True)
|
||||
subprocess.run(
|
||||
["git", "commit", "-m", f"אתחול תיק {case_number}: {title}"],
|
||||
cwd=case_dir,
|
||||
capture_output=True,
|
||||
env={"GIT_AUTHOR_NAME": "Ezer Mishpati", "GIT_AUTHOR_EMAIL": "legal@local",
|
||||
"GIT_COMMITTER_NAME": "Ezer Mishpati", "GIT_COMMITTER_EMAIL": "legal@local",
|
||||
"PATH": "/usr/bin:/bin"},
|
||||
)
|
||||
|
||||
return json.dumps(case, default=str, ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
async def case_list(status: str = "", limit: int = 50) -> str:
|
||||
"""רשימת תיקי ערר עם אפשרות סינון לפי סטטוס.
|
||||
|
||||
Args:
|
||||
status: סינון לפי סטטוס (new, in_progress, drafted, reviewed, final). ריק = הכל
|
||||
limit: מספר תוצאות מקסימלי
|
||||
"""
|
||||
cases = await db.list_cases(status=status or None, limit=limit)
|
||||
if not cases:
|
||||
return "אין תיקים."
|
||||
return json.dumps(cases, default=str, ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
async def case_get(case_number: str) -> str:
|
||||
"""קבלת פרטי תיק מלאים כולל רשימת מסמכים.
|
||||
|
||||
Args:
|
||||
case_number: מספר תיק הערר
|
||||
"""
|
||||
case = await db.get_case_by_number(case_number)
|
||||
if not case:
|
||||
return f"תיק {case_number} לא נמצא."
|
||||
|
||||
docs = await db.list_documents(UUID(case["id"]))
|
||||
case["documents"] = docs
|
||||
return json.dumps(case, default=str, ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
async def case_update(
|
||||
case_number: str,
|
||||
status: str = "",
|
||||
title: str = "",
|
||||
subject: str = "",
|
||||
notes: str = "",
|
||||
hearing_date: str = "",
|
||||
decision_date: str = "",
|
||||
tags: list[str] | None = None,
|
||||
) -> str:
|
||||
"""עדכון פרטי תיק.
|
||||
|
||||
Args:
|
||||
case_number: מספר תיק הערר
|
||||
status: סטטוס חדש (new, in_progress, drafted, reviewed, final)
|
||||
title: כותרת חדשה
|
||||
subject: נושא חדש
|
||||
notes: הערות חדשות
|
||||
hearing_date: תאריך דיון (YYYY-MM-DD)
|
||||
decision_date: תאריך החלטה (YYYY-MM-DD)
|
||||
tags: תגיות
|
||||
"""
|
||||
from datetime import date as date_type
|
||||
|
||||
case = await db.get_case_by_number(case_number)
|
||||
if not case:
|
||||
return f"תיק {case_number} לא נמצא."
|
||||
|
||||
fields = {}
|
||||
if status:
|
||||
fields["status"] = status
|
||||
if title:
|
||||
fields["title"] = title
|
||||
if subject:
|
||||
fields["subject"] = subject
|
||||
if notes:
|
||||
fields["notes"] = notes
|
||||
if hearing_date:
|
||||
fields["hearing_date"] = date_type.fromisoformat(hearing_date)
|
||||
if decision_date:
|
||||
fields["decision_date"] = date_type.fromisoformat(decision_date)
|
||||
if tags is not None:
|
||||
fields["tags"] = tags
|
||||
|
||||
updated = await db.update_case(UUID(case["id"]), **fields)
|
||||
|
||||
# Git commit the update
|
||||
case_dir = config.CASES_DIR / case_number
|
||||
if case_dir.exists():
|
||||
case_json = case_dir / "case.json"
|
||||
case_json.write_text(json.dumps(updated, default=str, ensure_ascii=False, indent=2))
|
||||
subprocess.run(["git", "add", "case.json"], cwd=case_dir, capture_output=True)
|
||||
subprocess.run(
|
||||
["git", "commit", "-m", f"עדכון תיק: {', '.join(fields.keys())}"],
|
||||
cwd=case_dir,
|
||||
capture_output=True,
|
||||
env={"GIT_AUTHOR_NAME": "Ezer Mishpati", "GIT_AUTHOR_EMAIL": "legal@local",
|
||||
"GIT_COMMITTER_NAME": "Ezer Mishpati", "GIT_COMMITTER_EMAIL": "legal@local",
|
||||
"PATH": "/usr/bin:/bin"},
|
||||
)
|
||||
|
||||
return json.dumps(updated, default=str, ensure_ascii=False, indent=2)
|
||||
218
mcp-server/src/legal_mcp/tools/documents.py
Normal file
218
mcp-server/src/legal_mcp/tools/documents.py
Normal file
@@ -0,0 +1,218 @@
|
||||
"""MCP tools for document management and processing."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import shutil
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from uuid import UUID
|
||||
|
||||
from legal_mcp import config
|
||||
from legal_mcp.services import db, processor
|
||||
|
||||
|
||||
async def document_upload(
|
||||
case_number: str,
|
||||
file_path: str,
|
||||
doc_type: str = "appeal",
|
||||
title: str = "",
|
||||
) -> str:
|
||||
"""העלאה ועיבוד מסמך לתיק ערר. מחלץ טקסט, יוצר chunks ו-embeddings.
|
||||
|
||||
Args:
|
||||
case_number: מספר תיק הערר
|
||||
file_path: נתיב מלא לקובץ (PDF, DOCX, RTF, TXT)
|
||||
doc_type: סוג מסמך (appeal=כתב ערר, response=תשובה, decision=החלטה, reference=מסמך עזר, exhibit=נספח)
|
||||
title: שם המסמך (אם ריק, ייקח משם הקובץ)
|
||||
"""
|
||||
case = await db.get_case_by_number(case_number)
|
||||
if not case:
|
||||
return f"תיק {case_number} לא נמצא."
|
||||
|
||||
source = Path(file_path)
|
||||
if not source.exists():
|
||||
return f"קובץ לא נמצא: {file_path}"
|
||||
|
||||
case_id = UUID(case["id"])
|
||||
if not title:
|
||||
title = source.stem
|
||||
|
||||
# Copy file to case directory
|
||||
case_dir = config.CASES_DIR / case_number / "documents"
|
||||
case_dir.mkdir(parents=True, exist_ok=True)
|
||||
dest = case_dir / source.name
|
||||
shutil.copy2(str(source), str(dest))
|
||||
|
||||
# Create document record
|
||||
doc = await db.create_document(
|
||||
case_id=case_id,
|
||||
doc_type=doc_type,
|
||||
title=title,
|
||||
file_path=str(dest),
|
||||
)
|
||||
|
||||
# Process document (extract → chunk → embed → store)
|
||||
result = await processor.process_document(UUID(doc["id"]), case_id)
|
||||
|
||||
# Git commit
|
||||
repo_dir = config.CASES_DIR / case_number
|
||||
if repo_dir.exists():
|
||||
subprocess.run(["git", "add", "."], cwd=repo_dir, capture_output=True)
|
||||
doc_type_hebrew = {
|
||||
"appeal": "כתב ערר",
|
||||
"response": "תשובה",
|
||||
"decision": "החלטה",
|
||||
"reference": "מסמך עזר",
|
||||
"exhibit": "נספח",
|
||||
}.get(doc_type, doc_type)
|
||||
subprocess.run(
|
||||
["git", "commit", "-m", f"הוספת {doc_type_hebrew}: {title}"],
|
||||
cwd=repo_dir,
|
||||
capture_output=True,
|
||||
env={"GIT_AUTHOR_NAME": "Ezer Mishpati", "GIT_AUTHOR_EMAIL": "legal@local",
|
||||
"GIT_COMMITTER_NAME": "Ezer Mishpati", "GIT_COMMITTER_EMAIL": "legal@local",
|
||||
"PATH": "/usr/bin:/bin"},
|
||||
)
|
||||
|
||||
return json.dumps({
|
||||
"document": doc,
|
||||
"processing": result,
|
||||
}, default=str, ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
async def document_upload_training(
|
||||
file_path: str,
|
||||
decision_number: str = "",
|
||||
decision_date: str = "",
|
||||
subject_categories: list[str] | None = None,
|
||||
title: str = "",
|
||||
) -> str:
|
||||
"""העלאת החלטה קודמת של דפנה לקורפוס הסגנון (training).
|
||||
|
||||
Args:
|
||||
file_path: נתיב מלא לקובץ ההחלטה
|
||||
decision_number: מספר ההחלטה
|
||||
decision_date: תאריך ההחלטה (YYYY-MM-DD)
|
||||
subject_categories: קטגוריות - אפשר לבחור כמה (בנייה, שימוש חורג, תכנית, היתר, הקלה, חלוקה, תמ"א 38, היטל השבחה, פיצויים 197)
|
||||
title: שם המסמך
|
||||
"""
|
||||
from datetime import date as date_type
|
||||
|
||||
from legal_mcp.services import extractor, embeddings, chunker
|
||||
|
||||
source = Path(file_path)
|
||||
if not source.exists():
|
||||
return f"קובץ לא נמצא: {file_path}"
|
||||
|
||||
if not title:
|
||||
title = source.stem
|
||||
|
||||
# Copy to training directory (skip if already there)
|
||||
config.TRAINING_DIR.mkdir(parents=True, exist_ok=True)
|
||||
dest = config.TRAINING_DIR / source.name
|
||||
if source.resolve() != dest.resolve():
|
||||
shutil.copy2(str(source), str(dest))
|
||||
|
||||
# Extract text
|
||||
text, page_count = await extractor.extract_text(str(dest))
|
||||
|
||||
# Parse date
|
||||
d_date = None
|
||||
if decision_date:
|
||||
d_date = date_type.fromisoformat(decision_date)
|
||||
|
||||
# Add to style corpus
|
||||
corpus_id = await db.add_to_style_corpus(
|
||||
document_id=None,
|
||||
decision_number=decision_number,
|
||||
decision_date=d_date,
|
||||
subject_categories=subject_categories or [],
|
||||
full_text=text,
|
||||
)
|
||||
|
||||
# Chunk and embed for RAG search over training corpus
|
||||
chunks = chunker.chunk_document(text)
|
||||
if chunks:
|
||||
# Create a document record (no case association)
|
||||
doc = await db.create_document(
|
||||
case_id=None,
|
||||
doc_type="decision",
|
||||
title=f"[קורפוס] {title}",
|
||||
file_path=str(dest),
|
||||
page_count=page_count,
|
||||
)
|
||||
doc_id = UUID(doc["id"])
|
||||
await db.update_document(doc_id, extracted_text=text, extraction_status="completed")
|
||||
|
||||
# Generate embeddings and store chunks
|
||||
texts = [c.content for c in chunks]
|
||||
embs = await embeddings.embed_texts(texts, input_type="document")
|
||||
chunk_dicts = [
|
||||
{
|
||||
"content": c.content,
|
||||
"section_type": c.section_type,
|
||||
"embedding": emb,
|
||||
"page_number": c.page_number,
|
||||
"chunk_index": c.chunk_index,
|
||||
}
|
||||
for c, emb in zip(chunks, embs)
|
||||
]
|
||||
await db.store_chunks(doc_id, None, chunk_dicts)
|
||||
|
||||
return json.dumps({
|
||||
"corpus_id": str(corpus_id),
|
||||
"title": title,
|
||||
"pages": page_count,
|
||||
"text_length": len(text),
|
||||
"chunks": len(chunks) if chunks else 0,
|
||||
}, default=str, ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
async def document_get_text(case_number: str, doc_title: str = "") -> str:
|
||||
"""קבלת טקסט מלא של מסמך מתוך תיק.
|
||||
|
||||
Args:
|
||||
case_number: מספר תיק הערר
|
||||
doc_title: שם המסמך (אם ריק, מחזיר את כל המסמכים)
|
||||
"""
|
||||
case = await db.get_case_by_number(case_number)
|
||||
if not case:
|
||||
return f"תיק {case_number} לא נמצא."
|
||||
|
||||
docs = await db.list_documents(UUID(case["id"]))
|
||||
if not docs:
|
||||
return f"אין מסמכים בתיק {case_number}."
|
||||
|
||||
if doc_title:
|
||||
docs = [d for d in docs if doc_title.lower() in d["title"].lower()]
|
||||
if not docs:
|
||||
return f"מסמך '{doc_title}' לא נמצא בתיק."
|
||||
|
||||
results = []
|
||||
for doc in docs:
|
||||
text = await db.get_document_text(UUID(doc["id"]))
|
||||
results.append({
|
||||
"title": doc["title"],
|
||||
"doc_type": doc["doc_type"],
|
||||
"text": text[:10000] if text else "(ללא טקסט)",
|
||||
})
|
||||
|
||||
return json.dumps(results, ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
async def document_list(case_number: str) -> str:
|
||||
"""רשימת מסמכים בתיק.
|
||||
|
||||
Args:
|
||||
case_number: מספר תיק הערר
|
||||
"""
|
||||
case = await db.get_case_by_number(case_number)
|
||||
if not case:
|
||||
return f"תיק {case_number} לא נמצא."
|
||||
|
||||
docs = await db.list_documents(UUID(case["id"]))
|
||||
if not docs:
|
||||
return f"אין מסמכים בתיק {case_number}."
|
||||
|
||||
return json.dumps(docs, default=str, ensure_ascii=False, indent=2)
|
||||
202
mcp-server/src/legal_mcp/tools/drafting.py
Normal file
202
mcp-server/src/legal_mcp/tools/drafting.py
Normal file
@@ -0,0 +1,202 @@
|
||||
"""MCP tools for decision drafting support."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from uuid import UUID
|
||||
|
||||
from legal_mcp.services import db, embeddings
|
||||
|
||||
|
||||
DECISION_TEMPLATE = """# החלטה
|
||||
|
||||
## בפני: דפנה תמיר, יו"ר ועדת הערר מחוז ירושלים
|
||||
|
||||
**ערר מספר:** {case_number}
|
||||
**נושא:** {subject}
|
||||
**העוררים:** {appellants}
|
||||
**המשיבים:** {respondents}
|
||||
**כתובת הנכס:** {property_address}
|
||||
|
||||
---
|
||||
|
||||
## א. רקע עובדתי
|
||||
|
||||
[תיאור הרקע העובדתי של הערר]
|
||||
|
||||
## ב. טענות העוררים
|
||||
|
||||
[סיכום טענות העוררים]
|
||||
|
||||
## ג. טענות המשיבים
|
||||
|
||||
[סיכום טענות המשיבים]
|
||||
|
||||
## ד. דיון והכרעה
|
||||
|
||||
[ניתוח משפטי]
|
||||
|
||||
## ה. מסקנה
|
||||
|
||||
[מסקנת הוועדה]
|
||||
|
||||
## ו. החלטה
|
||||
|
||||
[ההחלטה הסופית]
|
||||
|
||||
---
|
||||
ניתנה היום, {date}
|
||||
דפנה תמיר, יו"ר ועדת הערר
|
||||
"""
|
||||
|
||||
|
||||
async def get_style_guide() -> str:
|
||||
"""שליפת דפוסי הסגנון של דפנה - נוסחאות, ביטויים אופייניים ומבנה."""
|
||||
patterns = await db.get_style_patterns()
|
||||
|
||||
if not patterns:
|
||||
return "לא נמצאו דפוסי סגנון. יש להעלות החלטות קודמות ולהריץ ניתוח סגנון (/style-report)."
|
||||
|
||||
grouped: dict[str, list] = {}
|
||||
for p in patterns:
|
||||
pt = p["pattern_type"]
|
||||
if pt not in grouped:
|
||||
grouped[pt] = []
|
||||
grouped[pt].append({
|
||||
"text": p["pattern_text"],
|
||||
"context": p["context"],
|
||||
"frequency": p["frequency"],
|
||||
})
|
||||
|
||||
type_names = {
|
||||
"opening_formula": "נוסחאות פתיחה",
|
||||
"transition": "ביטויי מעבר",
|
||||
"citation_style": "סגנון ציטוט",
|
||||
"analysis_structure": "מבנה ניתוח",
|
||||
"closing_formula": "נוסחאות סיום",
|
||||
"characteristic_phrase": "ביטויים אופייניים",
|
||||
}
|
||||
|
||||
result = "# מדריך סגנון - דפנה תמיר\n\n"
|
||||
for ptype, items in grouped.items():
|
||||
result += f"## {type_names.get(ptype, ptype)}\n\n"
|
||||
for item in items:
|
||||
result += f"- **{item['text']}** ({item['context']}, תדירות: {item['frequency']})\n"
|
||||
result += "\n"
|
||||
|
||||
return result
|
||||
|
||||
|
||||
async def draft_section(
|
||||
case_number: str,
|
||||
section: str,
|
||||
instructions: str = "",
|
||||
) -> str:
|
||||
"""הרכבת הקשר מלא לניסוח סעיף בהחלטה - כולל עובדות מהמסמכים, תקדימים רלוונטיים ודפוסי סגנון.
|
||||
|
||||
Args:
|
||||
case_number: מספר תיק הערר
|
||||
section: סוג הסעיף (facts, appellant_claims, respondent_claims, legal_analysis, conclusion, ruling)
|
||||
instructions: הנחיות נוספות לניסוח
|
||||
"""
|
||||
case = await db.get_case_by_number(case_number)
|
||||
if not case:
|
||||
return f"תיק {case_number} לא נמצא."
|
||||
|
||||
case_id = UUID(case["id"])
|
||||
|
||||
# 1. Get relevant chunks from case documents
|
||||
section_query = {
|
||||
"facts": "רקע עובדתי של התיק",
|
||||
"appellant_claims": "טענות העוררים",
|
||||
"respondent_claims": "טענות המשיבים",
|
||||
"legal_analysis": "ניתוח משפטי ודיון",
|
||||
"conclusion": "מסקנות",
|
||||
"ruling": "החלטה",
|
||||
}.get(section, section)
|
||||
|
||||
query_emb = await embeddings.embed_query(section_query)
|
||||
case_chunks = await db.search_similar(
|
||||
query_embedding=query_emb, limit=10, case_id=case_id
|
||||
)
|
||||
|
||||
# 2. Get similar sections from precedents
|
||||
precedent_chunks = await db.search_similar(
|
||||
query_embedding=query_emb, limit=5, section_type=section
|
||||
)
|
||||
# Filter out chunks from the same case
|
||||
precedent_chunks = [c for c in precedent_chunks if str(c["case_id"]) != case["id"]]
|
||||
|
||||
# 3. Get style patterns
|
||||
style_patterns = await db.get_style_patterns()
|
||||
|
||||
# Build context
|
||||
context = {
|
||||
"case": {
|
||||
"case_number": case["case_number"],
|
||||
"title": case["title"],
|
||||
"appellants": case["appellants"],
|
||||
"respondents": case["respondents"],
|
||||
"subject": case["subject"],
|
||||
"property_address": case["property_address"],
|
||||
},
|
||||
"section": section,
|
||||
"instructions": instructions,
|
||||
"case_documents": [
|
||||
{
|
||||
"document": c["document_title"],
|
||||
"section_type": c["section_type"],
|
||||
"content": c["content"],
|
||||
}
|
||||
for c in case_chunks
|
||||
],
|
||||
"precedents": [
|
||||
{
|
||||
"case_number": c["case_number"],
|
||||
"document": c["document_title"],
|
||||
"content": c["content"][:500],
|
||||
}
|
||||
for c in precedent_chunks[:3]
|
||||
],
|
||||
"style_patterns": [
|
||||
{
|
||||
"type": p["pattern_type"],
|
||||
"text": p["pattern_text"],
|
||||
}
|
||||
for p in style_patterns[:15]
|
||||
],
|
||||
}
|
||||
|
||||
return json.dumps(context, ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
async def get_decision_template(case_number: str) -> str:
|
||||
"""קבלת תבנית מבנית להחלטה מלאה עם פרטי התיק.
|
||||
|
||||
Args:
|
||||
case_number: מספר תיק הערר
|
||||
"""
|
||||
from datetime import date
|
||||
|
||||
case = await db.get_case_by_number(case_number)
|
||||
if not case:
|
||||
return f"תיק {case_number} לא נמצא."
|
||||
|
||||
template = DECISION_TEMPLATE.format(
|
||||
case_number=case["case_number"],
|
||||
subject=case["subject"],
|
||||
appellants=", ".join(case.get("appellants", [])),
|
||||
respondents=", ".join(case.get("respondents", [])),
|
||||
property_address=case.get("property_address", ""),
|
||||
date=date.today().strftime("%d.%m.%Y"),
|
||||
)
|
||||
|
||||
return template
|
||||
|
||||
|
||||
async def analyze_style() -> str:
|
||||
"""הרצת ניתוח סגנון על קורפוס ההחלטות של דפנה. מחלץ דפוסי כתיבה ושומר אותם."""
|
||||
from legal_mcp.services.style_analyzer import analyze_corpus
|
||||
|
||||
result = await analyze_corpus()
|
||||
return json.dumps(result, ensure_ascii=False, indent=2)
|
||||
124
mcp-server/src/legal_mcp/tools/search.py
Normal file
124
mcp-server/src/legal_mcp/tools/search.py
Normal file
@@ -0,0 +1,124 @@
|
||||
"""MCP tools for RAG search over legal documents and decisions."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from uuid import UUID
|
||||
|
||||
from legal_mcp.services import db, embeddings
|
||||
|
||||
|
||||
async def search_decisions(
|
||||
query: str,
|
||||
limit: int = 10,
|
||||
section_type: str = "",
|
||||
) -> str:
|
||||
"""חיפוש סמנטי בהחלטות קודמות ובמסמכים.
|
||||
|
||||
Args:
|
||||
query: שאילתת חיפוש בעברית (לדוגמה: "שימוש חורג למסחר באזור מגורים")
|
||||
limit: מספר תוצאות מקסימלי
|
||||
section_type: סינון לפי סוג סעיף (facts, legal_analysis, conclusion, ruling, וכו'). ריק = הכל
|
||||
"""
|
||||
query_emb = await embeddings.embed_query(query)
|
||||
results = await db.search_similar(
|
||||
query_embedding=query_emb,
|
||||
limit=limit,
|
||||
section_type=section_type or None,
|
||||
)
|
||||
|
||||
if not results:
|
||||
return "לא נמצאו תוצאות."
|
||||
|
||||
formatted = []
|
||||
for r in results:
|
||||
formatted.append({
|
||||
"score": round(float(r["score"]), 4),
|
||||
"case_number": r["case_number"],
|
||||
"document": r["document_title"],
|
||||
"section": r["section_type"],
|
||||
"page": r["page_number"],
|
||||
"content": r["content"],
|
||||
})
|
||||
|
||||
return json.dumps(formatted, ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
async def search_case_documents(
|
||||
case_number: str,
|
||||
query: str,
|
||||
limit: int = 10,
|
||||
) -> str:
|
||||
"""חיפוש סמנטי בתוך מסמכי תיק ספציפי.
|
||||
|
||||
Args:
|
||||
case_number: מספר תיק הערר
|
||||
query: שאילתת חיפוש
|
||||
limit: מספר תוצאות מקסימלי
|
||||
"""
|
||||
case = await db.get_case_by_number(case_number)
|
||||
if not case:
|
||||
return f"תיק {case_number} לא נמצא."
|
||||
|
||||
query_emb = await embeddings.embed_query(query)
|
||||
results = await db.search_similar(
|
||||
query_embedding=query_emb,
|
||||
limit=limit,
|
||||
case_id=UUID(case["id"]),
|
||||
)
|
||||
|
||||
if not results:
|
||||
return f"לא נמצאו תוצאות בתיק {case_number}."
|
||||
|
||||
formatted = []
|
||||
for r in results:
|
||||
formatted.append({
|
||||
"score": round(float(r["score"]), 4),
|
||||
"document": r["document_title"],
|
||||
"section": r["section_type"],
|
||||
"page": r["page_number"],
|
||||
"content": r["content"],
|
||||
})
|
||||
|
||||
return json.dumps(formatted, ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
async def find_similar_cases(
|
||||
description: str,
|
||||
limit: int = 5,
|
||||
) -> str:
|
||||
"""מציאת תיקים דומים על בסיס תיאור.
|
||||
|
||||
Args:
|
||||
description: תיאור התיק או הנושא (לדוגמה: "ערר על סירוב להיתר בנייה לתוספת קומה")
|
||||
limit: מספר תוצאות מקסימלי
|
||||
"""
|
||||
query_emb = await embeddings.embed_query(description)
|
||||
results = await db.search_similar(
|
||||
query_embedding=query_emb,
|
||||
limit=limit * 3, # Get more to deduplicate by case
|
||||
)
|
||||
|
||||
if not results:
|
||||
return "לא נמצאו תיקים דומים."
|
||||
|
||||
# Deduplicate by case_number, keep best score per case
|
||||
seen_cases = {}
|
||||
for r in results:
|
||||
cn = r["case_number"]
|
||||
if cn not in seen_cases or r["score"] > seen_cases[cn]["score"]:
|
||||
seen_cases[cn] = r
|
||||
|
||||
# Sort by score and limit
|
||||
top_cases = sorted(seen_cases.values(), key=lambda x: x["score"], reverse=True)[:limit]
|
||||
|
||||
formatted = []
|
||||
for r in top_cases:
|
||||
formatted.append({
|
||||
"score": round(float(r["score"]), 4),
|
||||
"case_number": r["case_number"],
|
||||
"document": r["document_title"],
|
||||
"relevant_section": r["content"][:500],
|
||||
})
|
||||
|
||||
return json.dumps(formatted, ensure_ascii=False, indent=2)
|
||||
118
mcp-server/src/legal_mcp/tools/workflow.py
Normal file
118
mcp-server/src/legal_mcp/tools/workflow.py
Normal file
@@ -0,0 +1,118 @@
|
||||
"""MCP tools for workflow status tracking."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from uuid import UUID
|
||||
|
||||
from legal_mcp.services import db
|
||||
|
||||
|
||||
async def workflow_status(case_number: str) -> str:
|
||||
"""סטטוס תהליך עבודה מלא לתיק - מסמכים, עיבוד, טיוטות.
|
||||
|
||||
Args:
|
||||
case_number: מספר תיק הערר
|
||||
"""
|
||||
case = await db.get_case_by_number(case_number)
|
||||
if not case:
|
||||
return f"תיק {case_number} לא נמצא."
|
||||
|
||||
case_id = UUID(case["id"])
|
||||
docs = await db.list_documents(case_id)
|
||||
|
||||
# Count chunks per document
|
||||
pool = await db.get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
chunk_counts = await conn.fetch(
|
||||
"SELECT document_id, COUNT(*) as count FROM document_chunks WHERE case_id = $1 GROUP BY document_id",
|
||||
case_id,
|
||||
)
|
||||
chunk_map = {str(r["document_id"]): r["count"] for r in chunk_counts}
|
||||
|
||||
doc_status = []
|
||||
for doc in docs:
|
||||
doc_status.append({
|
||||
"title": doc["title"],
|
||||
"type": doc["doc_type"],
|
||||
"extraction": doc["extraction_status"],
|
||||
"chunks": chunk_map.get(doc["id"], 0),
|
||||
"pages": doc.get("page_count"),
|
||||
})
|
||||
|
||||
# Check draft status
|
||||
from pathlib import Path
|
||||
from legal_mcp import config
|
||||
|
||||
case_dir = config.CASES_DIR / case_number
|
||||
draft_path = case_dir / "drafts" / "decision.md"
|
||||
has_draft = draft_path.exists()
|
||||
draft_size = draft_path.stat().st_size if has_draft else 0
|
||||
|
||||
status = {
|
||||
"case_number": case["case_number"],
|
||||
"title": case["title"],
|
||||
"status": case["status"],
|
||||
"documents": doc_status,
|
||||
"total_documents": len(docs),
|
||||
"total_chunks": sum(chunk_map.values()),
|
||||
"has_draft": has_draft,
|
||||
"draft_size_bytes": draft_size,
|
||||
"next_steps": _suggest_next_steps(case, docs, has_draft),
|
||||
}
|
||||
|
||||
return json.dumps(status, ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
def _suggest_next_steps(case: dict, docs: list, has_draft: bool) -> list[str]:
|
||||
"""Suggest next steps based on case state."""
|
||||
steps = []
|
||||
doc_types = {d["doc_type"] for d in docs}
|
||||
|
||||
if not docs:
|
||||
steps.append("העלה מסמכים לתיק (כתב ערר, תשובת ועדה)")
|
||||
else:
|
||||
if "appeal" not in doc_types:
|
||||
steps.append("העלה כתב ערר")
|
||||
if "response" not in doc_types:
|
||||
steps.append("העלה תשובת ועדה/משיבים")
|
||||
|
||||
pending = [d for d in docs if d["extraction_status"] == "pending"]
|
||||
if pending:
|
||||
steps.append(f"עיבוד {len(pending)} מסמכים ממתינים")
|
||||
|
||||
if docs and not has_draft:
|
||||
steps.append("התחל ניסוח טיוטת החלטה (/draft-decision)")
|
||||
elif has_draft and case["status"] in ("new", "in_progress"):
|
||||
steps.append("סקור ועדכן את הטיוטה")
|
||||
steps.append("עדכן סטטוס ל-drafted")
|
||||
|
||||
if case["status"] == "drafted":
|
||||
steps.append("סקירה סופית ועדכון סטטוס ל-reviewed")
|
||||
elif case["status"] == "reviewed":
|
||||
steps.append("אישור סופי ועדכון סטטוס ל-final")
|
||||
|
||||
return steps
|
||||
|
||||
|
||||
async def processing_status() -> str:
|
||||
"""סטטוס כללי - מספר תיקים, מסמכים ממתינים לעיבוד."""
|
||||
pool = await db.get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
case_count = await conn.fetchval("SELECT COUNT(*) FROM cases")
|
||||
doc_count = await conn.fetchval("SELECT COUNT(*) FROM documents")
|
||||
pending_count = await conn.fetchval(
|
||||
"SELECT COUNT(*) FROM documents WHERE extraction_status = 'pending'"
|
||||
)
|
||||
chunk_count = await conn.fetchval("SELECT COUNT(*) FROM document_chunks")
|
||||
corpus_count = await conn.fetchval("SELECT COUNT(*) FROM style_corpus")
|
||||
pattern_count = await conn.fetchval("SELECT COUNT(*) FROM style_patterns")
|
||||
|
||||
return json.dumps({
|
||||
"cases": case_count,
|
||||
"documents": doc_count,
|
||||
"pending_processing": pending_count,
|
||||
"chunks": chunk_count,
|
||||
"style_corpus_entries": corpus_count,
|
||||
"style_patterns": pattern_count,
|
||||
}, ensure_ascii=False, indent=2)
|
||||
Reference in New Issue
Block a user