Initial commit: MCP server + web upload interface

Ezer Mishpati - AI legal decision drafting system with: - MCP server (FastMCP) with document processing pipeline - Web upload interface (FastAPI) for file upload and classification - pgvector-based semantic search - Hebrew legal document chunking and embedding
2026-03-23 12:33:07 +00:00
commit 6f515dc2cb
33 changed files with 3297 additions and 0 deletions
--- a/mcp-server/src/legal_mcp/tools/drafting.py
+++ b/mcp-server/src/legal_mcp/tools/drafting.py
@@ -0,0 +1,202 @@
+"""MCP tools for decision drafting support."""
+
+from __future__ import annotations
+
+import json
+from uuid import UUID
+
+from legal_mcp.services import db, embeddings
+
+
+DECISION_TEMPLATE = """# החלטה
+
+## בפני: דפנה תמיר, יו"ר ועדת הערר מחוז ירושלים
+
+**ערר מספר:** {case_number}
+**נושא:** {subject}
+**העוררים:** {appellants}
+**המשיבים:** {respondents}
+**כתובת הנכס:** {property_address}
+
+---
+
+## א. רקע עובדתי
+
+[תיאור הרקע העובדתי של הערר]
+
+## ב. טענות העוררים
+
+[סיכום טענות העוררים]
+
+## ג. טענות המשיבים
+
+[סיכום טענות המשיבים]
+
+## ד. דיון והכרעה
+
+[ניתוח משפטי]
+
+## ה. מסקנה
+
+[מסקנת הוועדה]
+
+## ו. החלטה
+
+[ההחלטה הסופית]
+
+---
+ניתנה היום, {date}
+דפנה תמיר, יו"ר ועדת הערר
+"""
+
+
+async def get_style_guide() -> str:
+    """שליפת דפוסי הסגנון של דפנה - נוסחאות, ביטויים אופייניים ומבנה."""
+    patterns = await db.get_style_patterns()
+
+    if not patterns:
+        return "לא נמצאו דפוסי סגנון. יש להעלות החלטות קודמות ולהריץ ניתוח סגנון (/style-report)."
+
+    grouped: dict[str, list] = {}
+    for p in patterns:
+        pt = p["pattern_type"]
+        if pt not in grouped:
+            grouped[pt] = []
+        grouped[pt].append({
+            "text": p["pattern_text"],
+            "context": p["context"],
+            "frequency": p["frequency"],
+        })
+
+    type_names = {
+        "opening_formula": "נוסחאות פתיחה",
+        "transition": "ביטויי מעבר",
+        "citation_style": "סגנון ציטוט",
+        "analysis_structure": "מבנה ניתוח",
+        "closing_formula": "נוסחאות סיום",
+        "characteristic_phrase": "ביטויים אופייניים",
+    }
+
+    result = "# מדריך סגנון - דפנה תמיר\n\n"
+    for ptype, items in grouped.items():
+        result += f"## {type_names.get(ptype, ptype)}\n\n"
+        for item in items:
+            result += f"- **{item['text']}** ({item['context']}, תדירות: {item['frequency']})\n"
+        result += "\n"
+
+    return result
+
+
+async def draft_section(
+    case_number: str,
+    section: str,
+    instructions: str = "",
+) -> str:
+    """הרכבת הקשר מלא לניסוח סעיף בהחלטה - כולל עובדות מהמסמכים, תקדימים רלוונטיים ודפוסי סגנון.
+
+    Args:
+        case_number: מספר תיק הערר
+        section: סוג הסעיף (facts, appellant_claims, respondent_claims, legal_analysis, conclusion, ruling)
+        instructions: הנחיות נוספות לניסוח
+    """
+    case = await db.get_case_by_number(case_number)
+    if not case:
+        return f"תיק {case_number} לא נמצא."
+
+    case_id = UUID(case["id"])
+
+    # 1. Get relevant chunks from case documents
+    section_query = {
+        "facts": "רקע עובדתי של התיק",
+        "appellant_claims": "טענות העוררים",
+        "respondent_claims": "טענות המשיבים",
+        "legal_analysis": "ניתוח משפטי ודיון",
+        "conclusion": "מסקנות",
+        "ruling": "החלטה",
+    }.get(section, section)
+
+    query_emb = await embeddings.embed_query(section_query)
+    case_chunks = await db.search_similar(
+        query_embedding=query_emb, limit=10, case_id=case_id
+    )
+
+    # 2. Get similar sections from precedents
+    precedent_chunks = await db.search_similar(
+        query_embedding=query_emb, limit=5, section_type=section
+    )
+    # Filter out chunks from the same case
+    precedent_chunks = [c for c in precedent_chunks if str(c["case_id"]) != case["id"]]
+
+    # 3. Get style patterns
+    style_patterns = await db.get_style_patterns()
+
+    # Build context
+    context = {
+        "case": {
+            "case_number": case["case_number"],
+            "title": case["title"],
+            "appellants": case["appellants"],
+            "respondents": case["respondents"],
+            "subject": case["subject"],
+            "property_address": case["property_address"],
+        },
+        "section": section,
+        "instructions": instructions,
+        "case_documents": [
+            {
+                "document": c["document_title"],
+                "section_type": c["section_type"],
+                "content": c["content"],
+            }
+            for c in case_chunks
+        ],
+        "precedents": [
+            {
+                "case_number": c["case_number"],
+                "document": c["document_title"],
+                "content": c["content"][:500],
+            }
+            for c in precedent_chunks[:3]
+        ],
+        "style_patterns": [
+            {
+                "type": p["pattern_type"],
+                "text": p["pattern_text"],
+            }
+            for p in style_patterns[:15]
+        ],
+    }
+
+    return json.dumps(context, ensure_ascii=False, indent=2)
+
+
+async def get_decision_template(case_number: str) -> str:
+    """קבלת תבנית מבנית להחלטה מלאה עם פרטי התיק.
+
+    Args:
+        case_number: מספר תיק הערר
+    """
+    from datetime import date
+
+    case = await db.get_case_by_number(case_number)
+    if not case:
+        return f"תיק {case_number} לא נמצא."
+
+    template = DECISION_TEMPLATE.format(
+        case_number=case["case_number"],
+        subject=case["subject"],
+        appellants=", ".join(case.get("appellants", [])),
+        respondents=", ".join(case.get("respondents", [])),
+        property_address=case.get("property_address", ""),
+        date=date.today().strftime("%d.%m.%Y"),
+    )
+
+    return template
+
+
+async def analyze_style() -> str:
+    """הרצת ניתוח סגנון על קורפוס ההחלטות של דפנה. מחלץ דפוסי כתיבה ושומר אותם."""
+    from legal_mcp.services.style_analyzer import analyze_corpus
+
+    result = await analyze_corpus()
+    return json.dumps(result, ensure_ascii=False, indent=2)