Initial commit: MCP server + web upload interface
Ezer Mishpati - AI legal decision drafting system with: - MCP server (FastMCP) with document processing pipeline - Web upload interface (FastAPI) for file upload and classification - pgvector-based semantic search - Hebrew legal document chunking and embedding
This commit is contained in:
7
.claude/commands/case-status.md
Normal file
7
.claude/commands/case-status.md
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
הצגת סטטוס מלא של תיק ערר.
|
||||||
|
|
||||||
|
שאל את המשתמש על מספר התיק, ואז השתמש ב-workflow_status להצגת:
|
||||||
|
- פרטי התיק
|
||||||
|
- סטטוס עיבוד מסמכים
|
||||||
|
- מצב הטיוטה
|
||||||
|
- הצעדים הבאים
|
||||||
24
.claude/commands/draft-decision.md
Normal file
24
.claude/commands/draft-decision.md
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
ניסוח החלטה מלאה לתיק ערר - סעיף אחר סעיף.
|
||||||
|
|
||||||
|
שאל את המשתמש על מספר התיק.
|
||||||
|
|
||||||
|
תהליך הניסוח:
|
||||||
|
|
||||||
|
1. שלוף את פרטי התיק עם case_get
|
||||||
|
2. שלוף את מדריך הסגנון עם get_style_guide
|
||||||
|
3. שלוף את תבנית ההחלטה עם get_decision_template
|
||||||
|
|
||||||
|
לכל סעיף:
|
||||||
|
4. השתמש ב-draft_section כדי לקבל הקשר מלא (מסמכי התיק + תקדימים + סגנון)
|
||||||
|
5. נסח את הסעיף בסגנון דפנה על בסיס ההקשר
|
||||||
|
6. הצג למשתמש ובקש אישור/עריכה לפני המשך לסעיף הבא
|
||||||
|
|
||||||
|
סדר הסעיפים:
|
||||||
|
- א. רקע עובדתי (facts)
|
||||||
|
- ב. טענות העוררים (appellant_claims)
|
||||||
|
- ג. טענות המשיבים (respondent_claims)
|
||||||
|
- ד. דיון והכרעה (legal_analysis)
|
||||||
|
- ה. מסקנה (conclusion)
|
||||||
|
- ו. החלטה (ruling)
|
||||||
|
|
||||||
|
בסיום, שמור את הטיוטה המלאה בקובץ drafts/decision.md בתיקיית התיק.
|
||||||
13
.claude/commands/new-case.md
Normal file
13
.claude/commands/new-case.md
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
יצירת תיק ערר חדש.
|
||||||
|
|
||||||
|
שאל את המשתמש על הפרטים הבאים ואז צור את התיק עם כלי case_create:
|
||||||
|
|
||||||
|
1. מספר תיק (לדוגמה: 123-24)
|
||||||
|
2. כותרת קצרה
|
||||||
|
3. שמות העוררים
|
||||||
|
4. שמות המשיבים
|
||||||
|
5. נושא הערר
|
||||||
|
6. כתובת הנכס (אם רלוונטי)
|
||||||
|
7. תאריך דיון (אם ידוע)
|
||||||
|
|
||||||
|
אחרי יצירת התיק, הצע למשתמש להעלות מסמכים עם /upload-doc.
|
||||||
12
.claude/commands/search.md
Normal file
12
.claude/commands/search.md
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
חיפוש סמנטי בהחלטות קודמות ובמסמכים.
|
||||||
|
|
||||||
|
שאל את המשתמש מה הוא מחפש (בעברית), ואז השתמש ב-search_decisions.
|
||||||
|
|
||||||
|
הצג את התוצאות בצורה מסודרת:
|
||||||
|
- ציון רלוונטיות
|
||||||
|
- מספר תיק
|
||||||
|
- שם מסמך
|
||||||
|
- סוג סעיף
|
||||||
|
- תוכן רלוונטי
|
||||||
|
|
||||||
|
אם המשתמש רוצה לחפש בתיק ספציפי, השתמש ב-search_case_documents.
|
||||||
13
.claude/commands/style-report.md
Normal file
13
.claude/commands/style-report.md
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
הרצת ניתוח סגנון על קורפוס ההחלטות של דפנה.
|
||||||
|
|
||||||
|
השתמש ב-analyze_style כדי לחלץ דפוסי כתיבה מההחלטות שהועלו.
|
||||||
|
|
||||||
|
הצג את הדפוסים שנמצאו:
|
||||||
|
- נוסחאות פתיחה
|
||||||
|
- ביטויי מעבר
|
||||||
|
- סגנון ציטוט
|
||||||
|
- מבנה ניתוח
|
||||||
|
- נוסחאות סיום
|
||||||
|
- ביטויים אופייניים
|
||||||
|
|
||||||
|
אם אין מספיק החלטות בקורפוס, הצע למשתמש להעלות עוד עם /upload-training.
|
||||||
16
.claude/commands/upload-doc.md
Normal file
16
.claude/commands/upload-doc.md
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
העלאת מסמך לתיק ערר.
|
||||||
|
|
||||||
|
שאל את המשתמש:
|
||||||
|
1. מספר תיק
|
||||||
|
2. נתיב לקובץ (PDF, DOCX, RTF, או TXT)
|
||||||
|
3. סוג המסמך:
|
||||||
|
- appeal = כתב ערר
|
||||||
|
- response = תשובת ועדה/משיבים
|
||||||
|
- decision = החלטה
|
||||||
|
- reference = מסמך עזר
|
||||||
|
- exhibit = נספח
|
||||||
|
4. שם המסמך (אופציונלי)
|
||||||
|
|
||||||
|
השתמש בכלי document_upload להעלאה. המסמך יעובד אוטומטית (חילוץ טקסט, חיתוך, ויצירת embeddings).
|
||||||
|
|
||||||
|
הצג למשתמש את תוצאות העיבוד (מספר chunks, עמודים).
|
||||||
20
.claude/commands/upload-training.md
Normal file
20
.claude/commands/upload-training.md
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
העלאת החלטה קודמת של דפנה לקורפוס הסגנון.
|
||||||
|
|
||||||
|
שאל את המשתמש:
|
||||||
|
1. נתיב לקובץ ההחלטה
|
||||||
|
2. מספר ההחלטה (אם ידוע)
|
||||||
|
3. תאריך ההחלטה (YYYY-MM-DD)
|
||||||
|
4. קטגוריות (אפשר לבחור כמה):
|
||||||
|
- בנייה
|
||||||
|
- שימוש חורג
|
||||||
|
- תכנית
|
||||||
|
- היתר
|
||||||
|
- הקלה
|
||||||
|
- חלוקה
|
||||||
|
- תמ"א 38
|
||||||
|
- היטל השבחה
|
||||||
|
- פיצויים לפי סעיף 197
|
||||||
|
|
||||||
|
השתמש בכלי document_upload_training עם subject_categories כרשימה.
|
||||||
|
|
||||||
|
אחרי העלאת מספר החלטות, הצע להריץ /style-report לניתוח דפוסי הסגנון.
|
||||||
6
.dockerignore
Normal file
6
.dockerignore
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
data/
|
||||||
|
.claude/
|
||||||
|
mcp-server/.venv/
|
||||||
|
**/__pycache__/
|
||||||
|
*.pyc
|
||||||
|
.git/
|
||||||
8
.gitignore
vendored
Normal file
8
.gitignore
vendored
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
data/uploads/
|
||||||
|
data/cases/
|
||||||
|
mcp-server/.venv/
|
||||||
|
__pycache__/
|
||||||
|
*.pyc
|
||||||
|
.env
|
||||||
|
data/training/
|
||||||
|
*.egg-info/
|
||||||
14
.mcp.json
Normal file
14
.mcp.json
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
{
|
||||||
|
"mcpServers": {
|
||||||
|
"legal-ai": {
|
||||||
|
"type": "stdio",
|
||||||
|
"command": "/home/chaim/legal-ai/mcp-server/.venv/bin/python",
|
||||||
|
"args": ["-m", "legal_mcp.server"],
|
||||||
|
"cwd": "/home/chaim/legal-ai/mcp-server",
|
||||||
|
"env": {
|
||||||
|
"DOTENV_PATH": "/home/chaim/.env",
|
||||||
|
"DATA_DIR": "/home/chaim/legal-ai/data"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
48
CLAUDE.md
Normal file
48
CLAUDE.md
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
# עוזר משפטי (Ezer Mishpati)
|
||||||
|
|
||||||
|
מערכת AI לסיוע בניסוח החלטות משפטיות בסגנון דפנה תמיר, יו"ר ועדת הערר מחוז ירושלים.
|
||||||
|
|
||||||
|
## כלי MCP זמינים
|
||||||
|
|
||||||
|
### ניהול תיקים
|
||||||
|
- `case_create` - יצירת תיק ערר חדש
|
||||||
|
- `case_list` - רשימת תיקים (סינון אופציונלי לפי סטטוס)
|
||||||
|
- `case_get` - פרטי תיק מלאים כולל מסמכים
|
||||||
|
- `case_update` - עדכון פרטי תיק וסטטוס
|
||||||
|
|
||||||
|
### מסמכים
|
||||||
|
- `document_upload` - העלאה ועיבוד מסמך (חילוץ טקסט → chunks → embeddings)
|
||||||
|
- `document_upload_training` - העלאת החלטה קודמת של דפנה לקורפוס
|
||||||
|
- `document_get_text` - קבלת טקסט מחולץ
|
||||||
|
- `document_list` - רשימת מסמכים בתיק
|
||||||
|
|
||||||
|
### חיפוש
|
||||||
|
- `search_decisions` - חיפוש סמנטי בהחלטות ומסמכים
|
||||||
|
- `search_case_documents` - חיפוש בתוך תיק ספציפי
|
||||||
|
- `find_similar_cases` - מציאת תיקים דומים
|
||||||
|
|
||||||
|
### ניסוח
|
||||||
|
- `get_style_guide` - דפוסי הסגנון של דפנה
|
||||||
|
- `draft_section` - הרכבת הקשר לניסוח סעיף (עובדות + תקדימים + סגנון)
|
||||||
|
- `get_decision_template` - תבנית מבנית להחלטה
|
||||||
|
- `analyze_style` - ניתוח סגנון על הקורפוס
|
||||||
|
|
||||||
|
### תהליך עבודה
|
||||||
|
- `workflow_status` - סטטוס מלא לתיק
|
||||||
|
- `processing_status` - סטטוס כללי של המערכת
|
||||||
|
|
||||||
|
## תהליך עבודה טיפוסי
|
||||||
|
|
||||||
|
1. `/new-case` → יצירת תיק חדש
|
||||||
|
2. `/upload-doc` → העלאת כתב ערר ותשובת ועדה
|
||||||
|
3. חיפוש תיקים דומים
|
||||||
|
4. `/draft-decision` → ניסוח סעיף אחר סעיף
|
||||||
|
5. עריכה ושיפור עם Claude
|
||||||
|
6. עדכון סטטוס → final
|
||||||
|
|
||||||
|
## הנחיות ניסוח
|
||||||
|
|
||||||
|
- כל ההחלטות בעברית
|
||||||
|
- שמור על סגנון דפנה (השתמש ב-`get_style_guide` לפני ניסוח)
|
||||||
|
- הפנה לתקדימים מהקורפוס
|
||||||
|
- המבנה: רקע → טענות עוררים → טענות משיבים → דיון → מסקנה → החלטה
|
||||||
26
Dockerfile
Normal file
26
Dockerfile
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
FROM python:3.12-slim
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# System deps for PyMuPDF and document processing
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
gcc libmupdf-dev libfreetype6-dev libharfbuzz-dev libjpeg62-turbo-dev \
|
||||||
|
libopenjp2-7-dev && rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Copy MCP server source (for importing services)
|
||||||
|
COPY mcp-server/pyproject.toml /app/mcp-server/pyproject.toml
|
||||||
|
COPY mcp-server/src/ /app/mcp-server/src/
|
||||||
|
|
||||||
|
# Install MCP server dependencies + web deps
|
||||||
|
RUN pip install --no-cache-dir /app/mcp-server && \
|
||||||
|
pip install --no-cache-dir fastapi uvicorn python-multipart
|
||||||
|
|
||||||
|
# Copy web app
|
||||||
|
COPY web/ /app/web/
|
||||||
|
|
||||||
|
ENV PYTHONPATH=/app/mcp-server/src
|
||||||
|
ENV DOTENV_PATH=/home/chaim/.env
|
||||||
|
|
||||||
|
EXPOSE 8080
|
||||||
|
|
||||||
|
CMD ["uvicorn", "web.app:app", "--host", "0.0.0.0", "--port", "8080"]
|
||||||
27
mcp-server/pyproject.toml
Normal file
27
mcp-server/pyproject.toml
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
[project]
|
||||||
|
name = "legal-mcp"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = "MCP server for AI-assisted legal decision drafting"
|
||||||
|
requires-python = ">=3.10"
|
||||||
|
dependencies = [
|
||||||
|
"mcp[cli]>=1.0.0",
|
||||||
|
"asyncpg>=0.29.0",
|
||||||
|
"pgvector>=0.3.0",
|
||||||
|
"voyageai>=0.3.0",
|
||||||
|
"anthropic>=0.40.0",
|
||||||
|
"python-dotenv>=1.0.0",
|
||||||
|
"pydantic>=2.0.0",
|
||||||
|
"pymupdf>=1.25.0",
|
||||||
|
"python-docx>=1.1.0",
|
||||||
|
"striprtf>=0.0.26",
|
||||||
|
"redis>=5.0.0",
|
||||||
|
"rq>=1.16.0",
|
||||||
|
"pillow>=10.0.0",
|
||||||
|
]
|
||||||
|
|
||||||
|
[build-system]
|
||||||
|
requires = ["setuptools>=68.0"]
|
||||||
|
build-backend = "setuptools.build_meta"
|
||||||
|
|
||||||
|
[tool.setuptools.packages.find]
|
||||||
|
where = ["src"]
|
||||||
0
mcp-server/src/legal_mcp/__init__.py
Normal file
0
mcp-server/src/legal_mcp/__init__.py
Normal file
4
mcp-server/src/legal_mcp/__main__.py
Normal file
4
mcp-server/src/legal_mcp/__main__.py
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
"""Allow running with: python -m legal_mcp"""
|
||||||
|
from legal_mcp.server import main
|
||||||
|
|
||||||
|
main()
|
||||||
40
mcp-server/src/legal_mcp/config.py
Normal file
40
mcp-server/src/legal_mcp/config.py
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
"""Configuration loaded from central .env file."""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
# Load from central .env or override path
|
||||||
|
dotenv_path = os.environ.get("DOTENV_PATH", str(Path.home() / ".env"))
|
||||||
|
load_dotenv(dotenv_path)
|
||||||
|
|
||||||
|
# PostgreSQL
|
||||||
|
POSTGRES_URL = os.environ.get(
|
||||||
|
"POSTGRES_URL",
|
||||||
|
f"postgres://{os.environ.get('POSTGRES_USER', 'legal_ai')}:"
|
||||||
|
f"{os.environ.get('POSTGRES_PASSWORD', '')}@"
|
||||||
|
f"{os.environ.get('POSTGRES_HOST', '127.0.0.1')}:"
|
||||||
|
f"{os.environ.get('POSTGRES_PORT', '5433')}/"
|
||||||
|
f"{os.environ.get('POSTGRES_DB', 'legal_ai')}",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Redis
|
||||||
|
REDIS_URL = os.environ.get("REDIS_URL", "redis://127.0.0.1:6380/0")
|
||||||
|
|
||||||
|
# Voyage AI
|
||||||
|
VOYAGE_API_KEY = os.environ.get("VOYAGE_API_KEY", "")
|
||||||
|
VOYAGE_MODEL = "voyage-3-large"
|
||||||
|
VOYAGE_DIMENSIONS = 1024
|
||||||
|
|
||||||
|
# Anthropic (for Claude Vision OCR)
|
||||||
|
ANTHROPIC_API_KEY = os.environ.get("ANTHROPIC_API_KEY", "")
|
||||||
|
|
||||||
|
# Data directory
|
||||||
|
DATA_DIR = Path(os.environ.get("DATA_DIR", str(Path.home() / "legal-ai" / "data")))
|
||||||
|
CASES_DIR = DATA_DIR / "cases"
|
||||||
|
TRAINING_DIR = DATA_DIR / "training"
|
||||||
|
|
||||||
|
# Chunking parameters
|
||||||
|
CHUNK_SIZE_TOKENS = 600
|
||||||
|
CHUNK_OVERLAP_TOKENS = 100
|
||||||
97
mcp-server/src/legal_mcp/models.py
Normal file
97
mcp-server/src/legal_mcp/models.py
Normal file
@@ -0,0 +1,97 @@
|
|||||||
|
"""Pydantic models for cases, documents, and related entities."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import enum
|
||||||
|
from datetime import date, datetime
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class CaseStatus(str, enum.Enum):
|
||||||
|
NEW = "new"
|
||||||
|
IN_PROGRESS = "in_progress"
|
||||||
|
DRAFTED = "drafted"
|
||||||
|
REVIEWED = "reviewed"
|
||||||
|
FINAL = "final"
|
||||||
|
|
||||||
|
|
||||||
|
class DocType(str, enum.Enum):
|
||||||
|
APPEAL = "appeal" # כתב ערר
|
||||||
|
RESPONSE = "response" # תשובה
|
||||||
|
DECISION = "decision" # החלטה
|
||||||
|
REFERENCE = "reference" # מסמך עזר
|
||||||
|
EXHIBIT = "exhibit" # נספח
|
||||||
|
|
||||||
|
|
||||||
|
class SectionType(str, enum.Enum):
|
||||||
|
INTRO = "intro"
|
||||||
|
FACTS = "facts"
|
||||||
|
APPELLANT_CLAIMS = "appellant_claims"
|
||||||
|
RESPONDENT_CLAIMS = "respondent_claims"
|
||||||
|
LEGAL_ANALYSIS = "legal_analysis"
|
||||||
|
CONCLUSION = "conclusion"
|
||||||
|
RULING = "ruling"
|
||||||
|
OTHER = "other"
|
||||||
|
|
||||||
|
|
||||||
|
class ExtractionStatus(str, enum.Enum):
|
||||||
|
PENDING = "pending"
|
||||||
|
PROCESSING = "processing"
|
||||||
|
COMPLETED = "completed"
|
||||||
|
FAILED = "failed"
|
||||||
|
|
||||||
|
|
||||||
|
class CaseCreate(BaseModel):
|
||||||
|
case_number: str = Field(description="מספר תיק הערר (לדוגמה: 123-24)")
|
||||||
|
title: str = Field(description="כותרת קצרה של הערר")
|
||||||
|
appellants: list[str] = Field(default_factory=list, description="שמות העוררים")
|
||||||
|
respondents: list[str] = Field(default_factory=list, description="שמות המשיבים")
|
||||||
|
subject: str = Field(default="", description="נושא הערר")
|
||||||
|
property_address: str = Field(default="", description="כתובת הנכס")
|
||||||
|
permit_number: str = Field(default="", description="מספר היתר")
|
||||||
|
committee_type: str = Field(default="ועדה מקומית", description="סוג הוועדה")
|
||||||
|
hearing_date: date | None = Field(default=None, description="תאריך דיון")
|
||||||
|
notes: str = Field(default="", description="הערות")
|
||||||
|
|
||||||
|
|
||||||
|
class CaseInfo(BaseModel):
|
||||||
|
id: UUID
|
||||||
|
case_number: str
|
||||||
|
title: str
|
||||||
|
appellants: list[str]
|
||||||
|
respondents: list[str]
|
||||||
|
subject: str
|
||||||
|
property_address: str
|
||||||
|
permit_number: str
|
||||||
|
committee_type: str
|
||||||
|
status: CaseStatus
|
||||||
|
hearing_date: date | None
|
||||||
|
decision_date: date | None
|
||||||
|
tags: list[str]
|
||||||
|
notes: str
|
||||||
|
created_at: datetime
|
||||||
|
updated_at: datetime
|
||||||
|
|
||||||
|
|
||||||
|
class DocumentInfo(BaseModel):
|
||||||
|
id: UUID
|
||||||
|
case_id: UUID
|
||||||
|
doc_type: DocType
|
||||||
|
title: str
|
||||||
|
file_path: str
|
||||||
|
extraction_status: ExtractionStatus
|
||||||
|
page_count: int | None
|
||||||
|
created_at: datetime
|
||||||
|
|
||||||
|
|
||||||
|
class SearchResult(BaseModel):
|
||||||
|
chunk_content: str
|
||||||
|
score: float
|
||||||
|
case_number: str
|
||||||
|
document_title: str
|
||||||
|
section_type: str
|
||||||
|
page_number: int | None
|
||||||
|
document_id: UUID
|
||||||
|
case_id: UUID
|
||||||
219
mcp-server/src/legal_mcp/server.py
Normal file
219
mcp-server/src/legal_mcp/server.py
Normal file
@@ -0,0 +1,219 @@
|
|||||||
|
"""Ezer Mishpati - MCP Server entry point.
|
||||||
|
|
||||||
|
Run with: python -m legal_mcp.server
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import sys
|
||||||
|
from collections.abc import AsyncIterator
|
||||||
|
from contextlib import asynccontextmanager
|
||||||
|
|
||||||
|
from mcp.server.fastmcp import FastMCP
|
||||||
|
|
||||||
|
# Configure logging to stderr (stdout is reserved for JSON-RPC)
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format="%(asctime)s [%(name)s] %(levelname)s: %(message)s",
|
||||||
|
stream=sys.stderr,
|
||||||
|
)
|
||||||
|
logger = logging.getLogger("legal_mcp")
|
||||||
|
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def lifespan(server: FastMCP) -> AsyncIterator[None]:
|
||||||
|
"""Initialize DB schema on startup, close pool on shutdown."""
|
||||||
|
from legal_mcp.services.db import close_pool, init_schema
|
||||||
|
|
||||||
|
logger.info("Initializing database schema...")
|
||||||
|
await init_schema()
|
||||||
|
logger.info("Ezer Mishpati MCP server ready")
|
||||||
|
try:
|
||||||
|
yield
|
||||||
|
finally:
|
||||||
|
await close_pool()
|
||||||
|
logger.info("Ezer Mishpati MCP server stopped")
|
||||||
|
|
||||||
|
|
||||||
|
# Create MCP server
|
||||||
|
mcp = FastMCP(
|
||||||
|
"Ezer Mishpati - עוזר משפטי",
|
||||||
|
instructions="מערכת AI לסיוע בניסוח החלטות משפטיות בסגנון דפנה תמיר",
|
||||||
|
lifespan=lifespan,
|
||||||
|
)
|
||||||
|
|
||||||
|
# ── Import and register tools ───────────────────────────────────────
|
||||||
|
|
||||||
|
from legal_mcp.tools import cases, documents, search, drafting, workflow # noqa: E402
|
||||||
|
|
||||||
|
|
||||||
|
# Case management
|
||||||
|
@mcp.tool()
|
||||||
|
async def case_create(
|
||||||
|
case_number: str,
|
||||||
|
title: str,
|
||||||
|
appellants: list[str] | None = None,
|
||||||
|
respondents: list[str] | None = None,
|
||||||
|
subject: str = "",
|
||||||
|
property_address: str = "",
|
||||||
|
permit_number: str = "",
|
||||||
|
committee_type: str = "ועדה מקומית",
|
||||||
|
hearing_date: str = "",
|
||||||
|
notes: str = "",
|
||||||
|
) -> str:
|
||||||
|
"""יצירת תיק ערר חדש."""
|
||||||
|
return await cases.case_create(
|
||||||
|
case_number, title, appellants, respondents,
|
||||||
|
subject, property_address, permit_number, committee_type,
|
||||||
|
hearing_date, notes,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@mcp.tool()
|
||||||
|
async def case_list(status: str = "", limit: int = 50) -> str:
|
||||||
|
"""רשימת תיקי ערר. סינון אופציונלי לפי סטטוס (new/in_progress/drafted/reviewed/final)."""
|
||||||
|
return await cases.case_list(status, limit)
|
||||||
|
|
||||||
|
|
||||||
|
@mcp.tool()
|
||||||
|
async def case_get(case_number: str) -> str:
|
||||||
|
"""פרטי תיק מלאים כולל רשימת מסמכים."""
|
||||||
|
return await cases.case_get(case_number)
|
||||||
|
|
||||||
|
|
||||||
|
@mcp.tool()
|
||||||
|
async def case_update(
|
||||||
|
case_number: str,
|
||||||
|
status: str = "",
|
||||||
|
title: str = "",
|
||||||
|
subject: str = "",
|
||||||
|
notes: str = "",
|
||||||
|
hearing_date: str = "",
|
||||||
|
decision_date: str = "",
|
||||||
|
tags: list[str] | None = None,
|
||||||
|
) -> str:
|
||||||
|
"""עדכון פרטי תיק."""
|
||||||
|
return await cases.case_update(
|
||||||
|
case_number, status, title, subject, notes,
|
||||||
|
hearing_date, decision_date, tags,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Documents
|
||||||
|
@mcp.tool()
|
||||||
|
async def document_upload(
|
||||||
|
case_number: str,
|
||||||
|
file_path: str,
|
||||||
|
doc_type: str = "appeal",
|
||||||
|
title: str = "",
|
||||||
|
) -> str:
|
||||||
|
"""העלאה ועיבוד מסמך לתיק ערר (PDF/DOCX/RTF/TXT). מחלץ טקסט ויוצר embeddings."""
|
||||||
|
return await documents.document_upload(case_number, file_path, doc_type, title)
|
||||||
|
|
||||||
|
|
||||||
|
@mcp.tool()
|
||||||
|
async def document_upload_training(
|
||||||
|
file_path: str,
|
||||||
|
decision_number: str = "",
|
||||||
|
decision_date: str = "",
|
||||||
|
subject_categories: list[str] | None = None,
|
||||||
|
title: str = "",
|
||||||
|
) -> str:
|
||||||
|
"""העלאת החלטה קודמת של דפנה לקורפוס הסגנון. קטגוריות: בנייה, שימוש חורג, תכנית, היתר, הקלה, חלוקה, תמ"א 38, היטל השבחה, פיצויים 197."""
|
||||||
|
return await documents.document_upload_training(
|
||||||
|
file_path, decision_number, decision_date, subject_categories, title,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@mcp.tool()
|
||||||
|
async def document_get_text(case_number: str, doc_title: str = "") -> str:
|
||||||
|
"""קבלת טקסט מלא של מסמך מתוך תיק."""
|
||||||
|
return await documents.document_get_text(case_number, doc_title)
|
||||||
|
|
||||||
|
|
||||||
|
@mcp.tool()
|
||||||
|
async def document_list(case_number: str) -> str:
|
||||||
|
"""רשימת מסמכים בתיק."""
|
||||||
|
return await documents.document_list(case_number)
|
||||||
|
|
||||||
|
|
||||||
|
# Search
|
||||||
|
@mcp.tool()
|
||||||
|
async def search_decisions(
|
||||||
|
query: str,
|
||||||
|
limit: int = 10,
|
||||||
|
section_type: str = "",
|
||||||
|
) -> str:
|
||||||
|
"""חיפוש סמנטי בהחלטות קודמות ובמסמכים."""
|
||||||
|
return await search.search_decisions(query, limit, section_type)
|
||||||
|
|
||||||
|
|
||||||
|
@mcp.tool()
|
||||||
|
async def search_case_documents(
|
||||||
|
case_number: str,
|
||||||
|
query: str,
|
||||||
|
limit: int = 10,
|
||||||
|
) -> str:
|
||||||
|
"""חיפוש סמנטי בתוך מסמכי תיק ספציפי."""
|
||||||
|
return await search.search_case_documents(case_number, query, limit)
|
||||||
|
|
||||||
|
|
||||||
|
@mcp.tool()
|
||||||
|
async def find_similar_cases(
|
||||||
|
description: str,
|
||||||
|
limit: int = 5,
|
||||||
|
) -> str:
|
||||||
|
"""מציאת תיקים דומים על בסיס תיאור."""
|
||||||
|
return await search.find_similar_cases(description, limit)
|
||||||
|
|
||||||
|
|
||||||
|
# Drafting
|
||||||
|
@mcp.tool()
|
||||||
|
async def get_style_guide() -> str:
|
||||||
|
"""שליפת דפוסי הסגנון של דפנה - נוסחאות, ביטויים אופייניים ומבנה."""
|
||||||
|
return await drafting.get_style_guide()
|
||||||
|
|
||||||
|
|
||||||
|
@mcp.tool()
|
||||||
|
async def draft_section(
|
||||||
|
case_number: str,
|
||||||
|
section: str,
|
||||||
|
instructions: str = "",
|
||||||
|
) -> str:
|
||||||
|
"""הרכבת הקשר מלא לניסוח סעיף (עובדות + תקדימים + סגנון)."""
|
||||||
|
return await drafting.draft_section(case_number, section, instructions)
|
||||||
|
|
||||||
|
|
||||||
|
@mcp.tool()
|
||||||
|
async def get_decision_template(case_number: str) -> str:
|
||||||
|
"""תבנית מבנית להחלטה מלאה עם פרטי התיק."""
|
||||||
|
return await drafting.get_decision_template(case_number)
|
||||||
|
|
||||||
|
|
||||||
|
@mcp.tool()
|
||||||
|
async def analyze_style() -> str:
|
||||||
|
"""ניתוח סגנון על קורפוס ההחלטות של דפנה. מחלץ ושומר דפוסי כתיבה."""
|
||||||
|
return await drafting.analyze_style()
|
||||||
|
|
||||||
|
|
||||||
|
# Workflow
|
||||||
|
@mcp.tool()
|
||||||
|
async def workflow_status(case_number: str) -> str:
|
||||||
|
"""סטטוס תהליך עבודה מלא לתיק."""
|
||||||
|
return await workflow.workflow_status(case_number)
|
||||||
|
|
||||||
|
|
||||||
|
@mcp.tool()
|
||||||
|
async def processing_status() -> str:
|
||||||
|
"""סטטוס כללי - מספר תיקים, מסמכים, chunks."""
|
||||||
|
return await workflow.processing_status()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
mcp.run(transport="stdio")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
0
mcp-server/src/legal_mcp/services/__init__.py
Normal file
0
mcp-server/src/legal_mcp/services/__init__.py
Normal file
130
mcp-server/src/legal_mcp/services/chunker.py
Normal file
130
mcp-server/src/legal_mcp/services/chunker.py
Normal file
@@ -0,0 +1,130 @@
|
|||||||
|
"""Legal document chunker - splits text into sections and chunks for RAG."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
|
from legal_mcp import config
|
||||||
|
|
||||||
|
# Hebrew legal section headers
|
||||||
|
SECTION_PATTERNS = [
|
||||||
|
(r"רקע\s*עובדתי|רקע\s*כללי|העובדות|הרקע", "facts"),
|
||||||
|
(r"טענות\s*העוררי[םן]|טענות\s*המערערי[םן]|עיקר\s*טענות\s*העוררי[םן]", "appellant_claims"),
|
||||||
|
(r"טענות\s*המשיבי[םן]|תשובת\s*המשיבי[םן]|עיקר\s*טענות\s*המשיבי[םן]", "respondent_claims"),
|
||||||
|
(r"דיון\s*והכרעה|דיון|הכרעה|ניתוח\s*משפטי|המסגרת\s*המשפטית", "legal_analysis"),
|
||||||
|
(r"מסקנ[הות]|סיכום", "conclusion"),
|
||||||
|
(r"החלטה|לפיכך\s*אני\s*מחליט|התוצאה", "ruling"),
|
||||||
|
(r"מבוא|פתיחה|לפניי", "intro"),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Chunk:
|
||||||
|
content: str
|
||||||
|
section_type: str = "other"
|
||||||
|
page_number: int | None = None
|
||||||
|
chunk_index: int = 0
|
||||||
|
|
||||||
|
|
||||||
|
def chunk_document(
|
||||||
|
text: str,
|
||||||
|
chunk_size: int = config.CHUNK_SIZE_TOKENS,
|
||||||
|
overlap: int = config.CHUNK_OVERLAP_TOKENS,
|
||||||
|
) -> list[Chunk]:
|
||||||
|
"""Split a legal document into chunks, respecting section boundaries."""
|
||||||
|
if not text.strip():
|
||||||
|
return []
|
||||||
|
|
||||||
|
sections = _split_into_sections(text)
|
||||||
|
chunks: list[Chunk] = []
|
||||||
|
idx = 0
|
||||||
|
|
||||||
|
for section_type, section_text in sections:
|
||||||
|
section_chunks = _split_section(section_text, chunk_size, overlap)
|
||||||
|
for chunk_text in section_chunks:
|
||||||
|
chunks.append(Chunk(
|
||||||
|
content=chunk_text,
|
||||||
|
section_type=section_type,
|
||||||
|
chunk_index=idx,
|
||||||
|
))
|
||||||
|
idx += 1
|
||||||
|
|
||||||
|
return chunks
|
||||||
|
|
||||||
|
|
||||||
|
def _split_into_sections(text: str) -> list[tuple[str, str]]:
|
||||||
|
"""Split text into (section_type, text) pairs based on Hebrew headers."""
|
||||||
|
# Find all section headers and their positions
|
||||||
|
markers: list[tuple[int, str]] = []
|
||||||
|
|
||||||
|
for pattern, section_type in SECTION_PATTERNS:
|
||||||
|
for match in re.finditer(pattern, text):
|
||||||
|
markers.append((match.start(), section_type))
|
||||||
|
|
||||||
|
if not markers:
|
||||||
|
# No sections found - treat as single block
|
||||||
|
return [("other", text)]
|
||||||
|
|
||||||
|
markers.sort(key=lambda x: x[0])
|
||||||
|
|
||||||
|
sections: list[tuple[str, str]] = []
|
||||||
|
|
||||||
|
# Text before first section
|
||||||
|
if markers[0][0] > 0:
|
||||||
|
intro_text = text[: markers[0][0]].strip()
|
||||||
|
if intro_text:
|
||||||
|
sections.append(("intro", intro_text))
|
||||||
|
|
||||||
|
# Each section
|
||||||
|
for i, (pos, section_type) in enumerate(markers):
|
||||||
|
end = markers[i + 1][0] if i + 1 < len(markers) else len(text)
|
||||||
|
section_text = text[pos:end].strip()
|
||||||
|
if section_text:
|
||||||
|
sections.append((section_type, section_text))
|
||||||
|
|
||||||
|
return sections
|
||||||
|
|
||||||
|
|
||||||
|
def _split_section(text: str, chunk_size: int, overlap: int) -> list[str]:
|
||||||
|
"""Split a section into overlapping chunks by paragraphs.
|
||||||
|
|
||||||
|
Uses approximate token counting (Hebrew ~1.5 chars per token).
|
||||||
|
"""
|
||||||
|
if not text.strip():
|
||||||
|
return []
|
||||||
|
|
||||||
|
paragraphs = [p.strip() for p in text.split("\n") if p.strip()]
|
||||||
|
chunks: list[str] = []
|
||||||
|
current: list[str] = []
|
||||||
|
current_tokens = 0
|
||||||
|
|
||||||
|
for para in paragraphs:
|
||||||
|
para_tokens = _estimate_tokens(para)
|
||||||
|
|
||||||
|
if current_tokens + para_tokens > chunk_size and current:
|
||||||
|
chunks.append("\n".join(current))
|
||||||
|
# Keep overlap
|
||||||
|
overlap_paras: list[str] = []
|
||||||
|
overlap_tokens = 0
|
||||||
|
for p in reversed(current):
|
||||||
|
pt = _estimate_tokens(p)
|
||||||
|
if overlap_tokens + pt > overlap:
|
||||||
|
break
|
||||||
|
overlap_paras.insert(0, p)
|
||||||
|
overlap_tokens += pt
|
||||||
|
current = overlap_paras
|
||||||
|
current_tokens = overlap_tokens
|
||||||
|
|
||||||
|
current.append(para)
|
||||||
|
current_tokens += para_tokens
|
||||||
|
|
||||||
|
if current:
|
||||||
|
chunks.append("\n".join(current))
|
||||||
|
|
||||||
|
return chunks
|
||||||
|
|
||||||
|
|
||||||
|
def _estimate_tokens(text: str) -> int:
|
||||||
|
"""Rough token estimate for Hebrew text (~1.5 chars per token)."""
|
||||||
|
return max(1, len(text) // 2)
|
||||||
440
mcp-server/src/legal_mcp/services/db.py
Normal file
440
mcp-server/src/legal_mcp/services/db.py
Normal file
@@ -0,0 +1,440 @@
|
|||||||
|
"""Database service - asyncpg connection pool and queries."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from datetime import date
|
||||||
|
from uuid import UUID, uuid4
|
||||||
|
|
||||||
|
import asyncpg
|
||||||
|
from pgvector.asyncpg import register_vector
|
||||||
|
|
||||||
|
from legal_mcp import config
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_pool: asyncpg.Pool | None = None
|
||||||
|
|
||||||
|
|
||||||
|
async def get_pool() -> asyncpg.Pool:
|
||||||
|
global _pool
|
||||||
|
if _pool is None:
|
||||||
|
# First, ensure pgvector extension exists (before registering type codec)
|
||||||
|
conn = await asyncpg.connect(config.POSTGRES_URL)
|
||||||
|
await conn.execute('CREATE EXTENSION IF NOT EXISTS vector')
|
||||||
|
await conn.execute('CREATE EXTENSION IF NOT EXISTS "uuid-ossp"')
|
||||||
|
await conn.close()
|
||||||
|
|
||||||
|
_pool = await asyncpg.create_pool(
|
||||||
|
config.POSTGRES_URL,
|
||||||
|
min_size=2,
|
||||||
|
max_size=10,
|
||||||
|
init=_init_connection,
|
||||||
|
)
|
||||||
|
return _pool
|
||||||
|
|
||||||
|
|
||||||
|
async def _init_connection(conn: asyncpg.Connection) -> None:
|
||||||
|
await register_vector(conn)
|
||||||
|
|
||||||
|
|
||||||
|
async def close_pool() -> None:
|
||||||
|
global _pool
|
||||||
|
if _pool:
|
||||||
|
await _pool.close()
|
||||||
|
_pool = None
|
||||||
|
|
||||||
|
|
||||||
|
# ── Schema ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
SCHEMA_SQL = """
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS cases (
|
||||||
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||||
|
case_number TEXT UNIQUE NOT NULL,
|
||||||
|
title TEXT NOT NULL,
|
||||||
|
appellants JSONB DEFAULT '[]',
|
||||||
|
respondents JSONB DEFAULT '[]',
|
||||||
|
subject TEXT DEFAULT '',
|
||||||
|
property_address TEXT DEFAULT '',
|
||||||
|
permit_number TEXT DEFAULT '',
|
||||||
|
committee_type TEXT DEFAULT 'ועדה מקומית',
|
||||||
|
status TEXT DEFAULT 'new',
|
||||||
|
hearing_date DATE,
|
||||||
|
decision_date DATE,
|
||||||
|
tags JSONB DEFAULT '[]',
|
||||||
|
notes TEXT DEFAULT '',
|
||||||
|
created_at TIMESTAMPTZ DEFAULT now(),
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT now()
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS documents (
|
||||||
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||||
|
case_id UUID REFERENCES cases(id) ON DELETE CASCADE,
|
||||||
|
doc_type TEXT NOT NULL,
|
||||||
|
title TEXT NOT NULL,
|
||||||
|
file_path TEXT NOT NULL,
|
||||||
|
extracted_text TEXT DEFAULT '',
|
||||||
|
extraction_status TEXT DEFAULT 'pending',
|
||||||
|
page_count INTEGER,
|
||||||
|
metadata JSONB DEFAULT '{}',
|
||||||
|
created_at TIMESTAMPTZ DEFAULT now()
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS document_chunks (
|
||||||
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||||
|
document_id UUID REFERENCES documents(id) ON DELETE CASCADE,
|
||||||
|
case_id UUID REFERENCES cases(id) ON DELETE CASCADE,
|
||||||
|
chunk_index INTEGER NOT NULL,
|
||||||
|
content TEXT NOT NULL,
|
||||||
|
section_type TEXT DEFAULT 'other',
|
||||||
|
embedding vector(1024),
|
||||||
|
page_number INTEGER,
|
||||||
|
created_at TIMESTAMPTZ DEFAULT now()
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS style_corpus (
|
||||||
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||||
|
document_id UUID REFERENCES documents(id) ON DELETE SET NULL,
|
||||||
|
decision_number TEXT,
|
||||||
|
decision_date DATE,
|
||||||
|
subject_categories JSONB DEFAULT '[]',
|
||||||
|
full_text TEXT NOT NULL,
|
||||||
|
summary TEXT DEFAULT '',
|
||||||
|
outcome TEXT DEFAULT '',
|
||||||
|
key_principles JSONB DEFAULT '[]',
|
||||||
|
created_at TIMESTAMPTZ DEFAULT now()
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS style_patterns (
|
||||||
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||||
|
pattern_type TEXT NOT NULL,
|
||||||
|
pattern_text TEXT NOT NULL,
|
||||||
|
frequency INTEGER DEFAULT 1,
|
||||||
|
context TEXT DEFAULT '',
|
||||||
|
examples JSONB DEFAULT '[]',
|
||||||
|
created_at TIMESTAMPTZ DEFAULT now()
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_chunks_embedding
|
||||||
|
ON document_chunks USING ivfflat (embedding vector_cosine_ops)
|
||||||
|
WITH (lists = 100);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_chunks_case ON document_chunks(case_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_chunks_doc ON document_chunks(document_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_docs_case ON documents(case_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_cases_status ON cases(status);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_cases_number ON cases(case_number);
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
async def init_schema() -> None:
|
||||||
|
pool = await get_pool()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
await conn.execute(SCHEMA_SQL)
|
||||||
|
logger.info("Database schema initialized")
|
||||||
|
|
||||||
|
|
||||||
|
# ── Case CRUD ───────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
async def create_case(
|
||||||
|
case_number: str,
|
||||||
|
title: str,
|
||||||
|
appellants: list[str] | None = None,
|
||||||
|
respondents: list[str] | None = None,
|
||||||
|
subject: str = "",
|
||||||
|
property_address: str = "",
|
||||||
|
permit_number: str = "",
|
||||||
|
committee_type: str = "ועדה מקומית",
|
||||||
|
hearing_date: date | None = None,
|
||||||
|
notes: str = "",
|
||||||
|
) -> dict:
|
||||||
|
pool = await get_pool()
|
||||||
|
case_id = uuid4()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
await conn.execute(
|
||||||
|
"""INSERT INTO cases (id, case_number, title, appellants, respondents,
|
||||||
|
subject, property_address, permit_number, committee_type,
|
||||||
|
hearing_date, notes)
|
||||||
|
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)""",
|
||||||
|
case_id, case_number, title,
|
||||||
|
json.dumps(appellants or []),
|
||||||
|
json.dumps(respondents or []),
|
||||||
|
subject, property_address, permit_number, committee_type,
|
||||||
|
hearing_date, notes,
|
||||||
|
)
|
||||||
|
return await get_case(case_id)
|
||||||
|
|
||||||
|
|
||||||
|
async def get_case(case_id: UUID) -> dict | None:
|
||||||
|
pool = await get_pool()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
row = await conn.fetchrow("SELECT * FROM cases WHERE id = $1", case_id)
|
||||||
|
if row is None:
|
||||||
|
return None
|
||||||
|
return _row_to_case(row)
|
||||||
|
|
||||||
|
|
||||||
|
async def get_case_by_number(case_number: str) -> dict | None:
|
||||||
|
pool = await get_pool()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
row = await conn.fetchrow(
|
||||||
|
"SELECT * FROM cases WHERE case_number = $1", case_number
|
||||||
|
)
|
||||||
|
if row is None:
|
||||||
|
return None
|
||||||
|
return _row_to_case(row)
|
||||||
|
|
||||||
|
|
||||||
|
async def list_cases(status: str | None = None, limit: int = 50) -> list[dict]:
|
||||||
|
pool = await get_pool()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
if status:
|
||||||
|
rows = await conn.fetch(
|
||||||
|
"SELECT * FROM cases WHERE status = $1 ORDER BY updated_at DESC LIMIT $2",
|
||||||
|
status, limit,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
rows = await conn.fetch(
|
||||||
|
"SELECT * FROM cases ORDER BY updated_at DESC LIMIT $1", limit
|
||||||
|
)
|
||||||
|
return [_row_to_case(r) for r in rows]
|
||||||
|
|
||||||
|
|
||||||
|
async def update_case(case_id: UUID, **fields) -> dict | None:
|
||||||
|
if not fields:
|
||||||
|
return await get_case(case_id)
|
||||||
|
pool = await get_pool()
|
||||||
|
set_clauses = []
|
||||||
|
values = []
|
||||||
|
for i, (key, val) in enumerate(fields.items(), start=2):
|
||||||
|
if key in ("appellants", "respondents", "tags"):
|
||||||
|
val = json.dumps(val)
|
||||||
|
set_clauses.append(f"{key} = ${i}")
|
||||||
|
values.append(val)
|
||||||
|
set_clauses.append("updated_at = now()")
|
||||||
|
sql = f"UPDATE cases SET {', '.join(set_clauses)} WHERE id = $1"
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
await conn.execute(sql, case_id, *values)
|
||||||
|
return await get_case(case_id)
|
||||||
|
|
||||||
|
|
||||||
|
def _row_to_case(row: asyncpg.Record) -> dict:
|
||||||
|
d = dict(row)
|
||||||
|
for field in ("appellants", "respondents", "tags"):
|
||||||
|
if isinstance(d.get(field), str):
|
||||||
|
d[field] = json.loads(d[field])
|
||||||
|
d["id"] = str(d["id"])
|
||||||
|
return d
|
||||||
|
|
||||||
|
|
||||||
|
# ── Document CRUD ───────────────────────────────────────────────────
|
||||||
|
|
||||||
|
async def create_document(
|
||||||
|
case_id: UUID,
|
||||||
|
doc_type: str,
|
||||||
|
title: str,
|
||||||
|
file_path: str,
|
||||||
|
page_count: int | None = None,
|
||||||
|
) -> dict:
|
||||||
|
pool = await get_pool()
|
||||||
|
doc_id = uuid4()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
await conn.execute(
|
||||||
|
"""INSERT INTO documents (id, case_id, doc_type, title, file_path, page_count)
|
||||||
|
VALUES ($1, $2, $3, $4, $5, $6)""",
|
||||||
|
doc_id, case_id, doc_type, title, file_path, page_count,
|
||||||
|
)
|
||||||
|
row = await conn.fetchrow("SELECT * FROM documents WHERE id = $1", doc_id)
|
||||||
|
return _row_to_doc(row)
|
||||||
|
|
||||||
|
|
||||||
|
async def update_document(doc_id: UUID, **fields) -> None:
|
||||||
|
if not fields:
|
||||||
|
return
|
||||||
|
pool = await get_pool()
|
||||||
|
set_clauses = []
|
||||||
|
values = []
|
||||||
|
for i, (key, val) in enumerate(fields.items(), start=2):
|
||||||
|
if key == "metadata":
|
||||||
|
val = json.dumps(val)
|
||||||
|
set_clauses.append(f"{key} = ${i}")
|
||||||
|
values.append(val)
|
||||||
|
sql = f"UPDATE documents SET {', '.join(set_clauses)} WHERE id = $1"
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
await conn.execute(sql, doc_id, *values)
|
||||||
|
|
||||||
|
|
||||||
|
async def get_document(doc_id: UUID) -> dict | None:
|
||||||
|
pool = await get_pool()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
row = await conn.fetchrow("SELECT * FROM documents WHERE id = $1", doc_id)
|
||||||
|
return _row_to_doc(row) if row else None
|
||||||
|
|
||||||
|
|
||||||
|
async def list_documents(case_id: UUID) -> list[dict]:
|
||||||
|
pool = await get_pool()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
rows = await conn.fetch(
|
||||||
|
"SELECT * FROM documents WHERE case_id = $1 ORDER BY created_at", case_id
|
||||||
|
)
|
||||||
|
return [_row_to_doc(r) for r in rows]
|
||||||
|
|
||||||
|
|
||||||
|
async def get_document_text(doc_id: UUID) -> str:
|
||||||
|
pool = await get_pool()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
row = await conn.fetchrow(
|
||||||
|
"SELECT extracted_text FROM documents WHERE id = $1", doc_id
|
||||||
|
)
|
||||||
|
return row["extracted_text"] if row else ""
|
||||||
|
|
||||||
|
|
||||||
|
def _row_to_doc(row: asyncpg.Record) -> dict:
|
||||||
|
d = dict(row)
|
||||||
|
d["id"] = str(d["id"])
|
||||||
|
d["case_id"] = str(d["case_id"])
|
||||||
|
if isinstance(d.get("metadata"), str):
|
||||||
|
d["metadata"] = json.loads(d["metadata"])
|
||||||
|
return d
|
||||||
|
|
||||||
|
|
||||||
|
# ── Chunks & Vectors ───────────────────────────────────────────────
|
||||||
|
|
||||||
|
async def store_chunks(
|
||||||
|
document_id: UUID,
|
||||||
|
case_id: UUID | None,
|
||||||
|
chunks: list[dict],
|
||||||
|
) -> int:
|
||||||
|
"""Store document chunks with embeddings. Each chunk dict has:
|
||||||
|
content, section_type, embedding (list[float]), page_number, chunk_index
|
||||||
|
"""
|
||||||
|
pool = await get_pool()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
# Delete existing chunks for this document
|
||||||
|
await conn.execute(
|
||||||
|
"DELETE FROM document_chunks WHERE document_id = $1", document_id
|
||||||
|
)
|
||||||
|
for chunk in chunks:
|
||||||
|
await conn.execute(
|
||||||
|
"""INSERT INTO document_chunks
|
||||||
|
(document_id, case_id, chunk_index, content, section_type, embedding, page_number)
|
||||||
|
VALUES ($1, $2, $3, $4, $5, $6, $7)""",
|
||||||
|
document_id, case_id,
|
||||||
|
chunk["chunk_index"],
|
||||||
|
chunk["content"],
|
||||||
|
chunk.get("section_type", "other"),
|
||||||
|
chunk["embedding"],
|
||||||
|
chunk.get("page_number"),
|
||||||
|
)
|
||||||
|
return len(chunks)
|
||||||
|
|
||||||
|
|
||||||
|
async def search_similar(
|
||||||
|
query_embedding: list[float],
|
||||||
|
limit: int = 10,
|
||||||
|
case_id: UUID | None = None,
|
||||||
|
section_type: str | None = None,
|
||||||
|
) -> list[dict]:
|
||||||
|
"""Cosine similarity search on document chunks."""
|
||||||
|
pool = await get_pool()
|
||||||
|
conditions = []
|
||||||
|
params: list = [query_embedding, limit]
|
||||||
|
param_idx = 3
|
||||||
|
|
||||||
|
if case_id:
|
||||||
|
conditions.append(f"dc.case_id = ${param_idx}")
|
||||||
|
params.append(case_id)
|
||||||
|
param_idx += 1
|
||||||
|
if section_type:
|
||||||
|
conditions.append(f"dc.section_type = ${param_idx}")
|
||||||
|
params.append(section_type)
|
||||||
|
param_idx += 1
|
||||||
|
|
||||||
|
where = f"WHERE {' AND '.join(conditions)}" if conditions else ""
|
||||||
|
|
||||||
|
sql = f"""
|
||||||
|
SELECT dc.content, dc.section_type, dc.page_number,
|
||||||
|
dc.document_id, dc.case_id,
|
||||||
|
d.title AS document_title,
|
||||||
|
c.case_number,
|
||||||
|
1 - (dc.embedding <=> $1) AS score
|
||||||
|
FROM document_chunks dc
|
||||||
|
JOIN documents d ON d.id = dc.document_id
|
||||||
|
JOIN cases c ON c.id = dc.case_id
|
||||||
|
{where}
|
||||||
|
ORDER BY dc.embedding <=> $1
|
||||||
|
LIMIT $2
|
||||||
|
"""
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
rows = await conn.fetch(sql, *params)
|
||||||
|
return [dict(r) for r in rows]
|
||||||
|
|
||||||
|
|
||||||
|
# ── Style corpus ────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
async def add_to_style_corpus(
|
||||||
|
document_id: UUID | None,
|
||||||
|
decision_number: str,
|
||||||
|
decision_date: date | None,
|
||||||
|
subject_categories: list[str],
|
||||||
|
full_text: str,
|
||||||
|
summary: str = "",
|
||||||
|
outcome: str = "",
|
||||||
|
key_principles: list[str] | None = None,
|
||||||
|
) -> UUID:
|
||||||
|
pool = await get_pool()
|
||||||
|
corpus_id = uuid4()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
await conn.execute(
|
||||||
|
"""INSERT INTO style_corpus
|
||||||
|
(id, document_id, decision_number, decision_date,
|
||||||
|
subject_categories, full_text, summary, outcome, key_principles)
|
||||||
|
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)""",
|
||||||
|
corpus_id, document_id, decision_number, decision_date,
|
||||||
|
json.dumps(subject_categories), full_text, summary, outcome,
|
||||||
|
json.dumps(key_principles or []),
|
||||||
|
)
|
||||||
|
return corpus_id
|
||||||
|
|
||||||
|
|
||||||
|
async def get_style_patterns(pattern_type: str | None = None) -> list[dict]:
|
||||||
|
pool = await get_pool()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
if pattern_type:
|
||||||
|
rows = await conn.fetch(
|
||||||
|
"SELECT * FROM style_patterns WHERE pattern_type = $1 ORDER BY frequency DESC",
|
||||||
|
pattern_type,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
rows = await conn.fetch(
|
||||||
|
"SELECT * FROM style_patterns ORDER BY pattern_type, frequency DESC"
|
||||||
|
)
|
||||||
|
return [dict(r) for r in rows]
|
||||||
|
|
||||||
|
|
||||||
|
async def upsert_style_pattern(
|
||||||
|
pattern_type: str,
|
||||||
|
pattern_text: str,
|
||||||
|
context: str = "",
|
||||||
|
examples: list[str] | None = None,
|
||||||
|
) -> None:
|
||||||
|
pool = await get_pool()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
existing = await conn.fetchrow(
|
||||||
|
"SELECT id, frequency FROM style_patterns WHERE pattern_type = $1 AND pattern_text = $2",
|
||||||
|
pattern_type, pattern_text,
|
||||||
|
)
|
||||||
|
if existing:
|
||||||
|
await conn.execute(
|
||||||
|
"UPDATE style_patterns SET frequency = frequency + 1 WHERE id = $1",
|
||||||
|
existing["id"],
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
await conn.execute(
|
||||||
|
"""INSERT INTO style_patterns (pattern_type, pattern_text, context, examples)
|
||||||
|
VALUES ($1, $2, $3, $4)""",
|
||||||
|
pattern_type, pattern_text, context,
|
||||||
|
json.dumps(examples or []),
|
||||||
|
)
|
||||||
55
mcp-server/src/legal_mcp/services/embeddings.py
Normal file
55
mcp-server/src/legal_mcp/services/embeddings.py
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
"""Embedding service using Voyage AI API."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
import voyageai
|
||||||
|
|
||||||
|
from legal_mcp import config
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_client: voyageai.Client | None = None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_client() -> voyageai.Client:
|
||||||
|
global _client
|
||||||
|
if _client is None:
|
||||||
|
_client = voyageai.Client(api_key=config.VOYAGE_API_KEY)
|
||||||
|
return _client
|
||||||
|
|
||||||
|
|
||||||
|
async def embed_texts(texts: list[str], input_type: str = "document") -> list[list[float]]:
|
||||||
|
"""Embed a batch of texts using Voyage AI.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
texts: List of texts to embed (max 128 per call).
|
||||||
|
input_type: "document" for indexing, "query" for search queries.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of embedding vectors (1024 dimensions each).
|
||||||
|
"""
|
||||||
|
if not texts:
|
||||||
|
return []
|
||||||
|
|
||||||
|
client = _get_client()
|
||||||
|
all_embeddings = []
|
||||||
|
|
||||||
|
# Voyage AI supports up to 128 texts per batch
|
||||||
|
for i in range(0, len(texts), 128):
|
||||||
|
batch = texts[i : i + 128]
|
||||||
|
result = client.embed(
|
||||||
|
batch,
|
||||||
|
model=config.VOYAGE_MODEL,
|
||||||
|
input_type=input_type,
|
||||||
|
)
|
||||||
|
all_embeddings.extend(result.embeddings)
|
||||||
|
|
||||||
|
return all_embeddings
|
||||||
|
|
||||||
|
|
||||||
|
async def embed_query(query: str) -> list[float]:
|
||||||
|
"""Embed a single search query."""
|
||||||
|
results = await embed_texts([query], input_type="query")
|
||||||
|
return results[0]
|
||||||
126
mcp-server/src/legal_mcp/services/extractor.py
Normal file
126
mcp-server/src/legal_mcp/services/extractor.py
Normal file
@@ -0,0 +1,126 @@
|
|||||||
|
"""Text extraction from PDF, DOCX, and RTF files.
|
||||||
|
|
||||||
|
Primary PDF extraction: Claude Vision API (for scanned documents).
|
||||||
|
Fallback: PyMuPDF direct text extraction (for born-digital PDFs).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import anthropic
|
||||||
|
import fitz # PyMuPDF
|
||||||
|
from docx import Document as DocxDocument
|
||||||
|
from striprtf.striprtf import rtf_to_text
|
||||||
|
|
||||||
|
from legal_mcp import config
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_anthropic_client: anthropic.Anthropic | None = None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_anthropic() -> anthropic.Anthropic:
|
||||||
|
global _anthropic_client
|
||||||
|
if _anthropic_client is None:
|
||||||
|
_anthropic_client = anthropic.Anthropic(api_key=config.ANTHROPIC_API_KEY)
|
||||||
|
return _anthropic_client
|
||||||
|
|
||||||
|
|
||||||
|
async def extract_text(file_path: str) -> tuple[str, int]:
|
||||||
|
"""Extract text from a document file.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (extracted_text, page_count).
|
||||||
|
page_count is 0 for non-PDF files.
|
||||||
|
"""
|
||||||
|
path = Path(file_path)
|
||||||
|
suffix = path.suffix.lower()
|
||||||
|
|
||||||
|
if suffix == ".pdf":
|
||||||
|
return await _extract_pdf(path)
|
||||||
|
elif suffix == ".docx":
|
||||||
|
return _extract_docx(path), 0
|
||||||
|
elif suffix == ".rtf":
|
||||||
|
return _extract_rtf(path), 0
|
||||||
|
elif suffix == ".txt":
|
||||||
|
return path.read_text(encoding="utf-8"), 0
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unsupported file type: {suffix}")
|
||||||
|
|
||||||
|
|
||||||
|
async def _extract_pdf(path: Path) -> tuple[str, int]:
|
||||||
|
"""Extract text from PDF. Try direct text first, fall back to Claude Vision for scanned pages."""
|
||||||
|
doc = fitz.open(str(path))
|
||||||
|
page_count = len(doc)
|
||||||
|
pages_text: list[str] = []
|
||||||
|
|
||||||
|
for page_num in range(page_count):
|
||||||
|
page = doc[page_num]
|
||||||
|
# Try direct text extraction first
|
||||||
|
text = page.get_text().strip()
|
||||||
|
|
||||||
|
if len(text) > 50:
|
||||||
|
# Sufficient text found - born-digital page
|
||||||
|
pages_text.append(text)
|
||||||
|
logger.debug("Page %d: direct text extraction (%d chars)", page_num + 1, len(text))
|
||||||
|
else:
|
||||||
|
# Likely scanned - use Claude Vision
|
||||||
|
logger.info("Page %d: using Claude Vision OCR", page_num + 1)
|
||||||
|
pix = page.get_pixmap(dpi=200)
|
||||||
|
img_bytes = pix.tobytes("png")
|
||||||
|
ocr_text = await _ocr_with_claude(img_bytes, page_num + 1)
|
||||||
|
pages_text.append(ocr_text)
|
||||||
|
|
||||||
|
doc.close()
|
||||||
|
return "\n\n".join(pages_text), page_count
|
||||||
|
|
||||||
|
|
||||||
|
async def _ocr_with_claude(image_bytes: bytes, page_num: int) -> str:
|
||||||
|
"""OCR a single page image using Claude Vision API."""
|
||||||
|
client = _get_anthropic()
|
||||||
|
b64_image = base64.b64encode(image_bytes).decode("utf-8")
|
||||||
|
|
||||||
|
message = client.messages.create(
|
||||||
|
model="claude-sonnet-4-20250514",
|
||||||
|
max_tokens=4096,
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "image",
|
||||||
|
"source": {
|
||||||
|
"type": "base64",
|
||||||
|
"media_type": "image/png",
|
||||||
|
"data": b64_image,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": (
|
||||||
|
"חלץ את כל הטקסט מהתמונה הזו. זהו מסמך משפטי בעברית. "
|
||||||
|
"שמור על מבנה הפסקאות המקורי. "
|
||||||
|
"החזר רק את הטקסט המחולץ, ללא הערות נוספות."
|
||||||
|
),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
],
|
||||||
|
)
|
||||||
|
return message.content[0].text
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_docx(path: Path) -> str:
|
||||||
|
"""Extract text from DOCX file."""
|
||||||
|
doc = DocxDocument(str(path))
|
||||||
|
paragraphs = [p.text for p in doc.paragraphs if p.text.strip()]
|
||||||
|
return "\n\n".join(paragraphs)
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_rtf(path: Path) -> str:
|
||||||
|
"""Extract text from RTF file."""
|
||||||
|
rtf_content = path.read_text(encoding="utf-8", errors="replace")
|
||||||
|
return rtf_to_text(rtf_content)
|
||||||
79
mcp-server/src/legal_mcp/services/processor.py
Normal file
79
mcp-server/src/legal_mcp/services/processor.py
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
"""Document processing pipeline: extract → chunk → embed → store."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
|
from legal_mcp.services import chunker, db, embeddings, extractor
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
async def process_document(document_id: UUID, case_id: UUID) -> dict:
|
||||||
|
"""Full processing pipeline for a document.
|
||||||
|
|
||||||
|
1. Extract text from file
|
||||||
|
2. Split into chunks
|
||||||
|
3. Generate embeddings
|
||||||
|
4. Store chunks + embeddings in DB
|
||||||
|
|
||||||
|
Returns processing summary.
|
||||||
|
"""
|
||||||
|
doc = await db.get_document(document_id)
|
||||||
|
if not doc:
|
||||||
|
raise ValueError(f"Document {document_id} not found")
|
||||||
|
|
||||||
|
await db.update_document(document_id, extraction_status="processing")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Step 1: Extract text
|
||||||
|
logger.info("Extracting text from %s", doc["file_path"])
|
||||||
|
text, page_count = await extractor.extract_text(doc["file_path"])
|
||||||
|
|
||||||
|
await db.update_document(
|
||||||
|
document_id,
|
||||||
|
extracted_text=text,
|
||||||
|
page_count=page_count,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Step 2: Chunk
|
||||||
|
logger.info("Chunking document (%d chars)", len(text))
|
||||||
|
chunks = chunker.chunk_document(text)
|
||||||
|
|
||||||
|
if not chunks:
|
||||||
|
await db.update_document(document_id, extraction_status="completed")
|
||||||
|
return {"status": "completed", "chunks": 0, "message": "No text to chunk"}
|
||||||
|
|
||||||
|
# Step 3: Embed
|
||||||
|
logger.info("Generating embeddings for %d chunks", len(chunks))
|
||||||
|
texts = [c.content for c in chunks]
|
||||||
|
embs = await embeddings.embed_texts(texts, input_type="document")
|
||||||
|
|
||||||
|
# Step 4: Store
|
||||||
|
chunk_dicts = [
|
||||||
|
{
|
||||||
|
"content": c.content,
|
||||||
|
"section_type": c.section_type,
|
||||||
|
"embedding": emb,
|
||||||
|
"page_number": c.page_number,
|
||||||
|
"chunk_index": c.chunk_index,
|
||||||
|
}
|
||||||
|
for c, emb in zip(chunks, embs)
|
||||||
|
]
|
||||||
|
|
||||||
|
stored = await db.store_chunks(document_id, case_id, chunk_dicts)
|
||||||
|
await db.update_document(document_id, extraction_status="completed")
|
||||||
|
|
||||||
|
logger.info("Document processed: %d chunks stored", stored)
|
||||||
|
return {
|
||||||
|
"status": "completed",
|
||||||
|
"chunks": stored,
|
||||||
|
"pages": page_count,
|
||||||
|
"text_length": len(text),
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception("Document processing failed: %s", e)
|
||||||
|
await db.update_document(document_id, extraction_status="failed")
|
||||||
|
return {"status": "failed", "error": str(e)}
|
||||||
121
mcp-server/src/legal_mcp/services/style_analyzer.py
Normal file
121
mcp-server/src/legal_mcp/services/style_analyzer.py
Normal file
@@ -0,0 +1,121 @@
|
|||||||
|
"""Style analyzer - extracts writing patterns from Dafna's decision corpus."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
|
||||||
|
import anthropic
|
||||||
|
|
||||||
|
from legal_mcp import config
|
||||||
|
from legal_mcp.services import db
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
ANALYSIS_PROMPT = """\
|
||||||
|
אתה מנתח סגנון כתיבה משפטית. לפניך החלטות משפטיות שנכתבו על ידי אותה יושבת ראש של ועדת ערר.
|
||||||
|
|
||||||
|
נתח את ההחלטות וחלץ את דפוסי הכתיבה הבאים:
|
||||||
|
|
||||||
|
1. **נוסחאות פתיחה** (opening_formula) - איך מתחילות ההחלטות
|
||||||
|
2. **ביטויי מעבר** (transition) - ביטויים שמחברים בין חלקי ההחלטה
|
||||||
|
3. **סגנון ציטוט** (citation_style) - איך מצטטים חקיקה ופסיקה
|
||||||
|
4. **מבנה ניתוח** (analysis_structure) - איך בנוי הניתוח המשפטי
|
||||||
|
5. **נוסחאות סיום** (closing_formula) - איך מסתיימות ההחלטות
|
||||||
|
6. **ביטויים אופייניים** (characteristic_phrase) - ביטויים ייחודיים שחוזרים
|
||||||
|
|
||||||
|
לכל דפוס, תן:
|
||||||
|
- הטקסט המדויק של הדפוס
|
||||||
|
- הקשר (באיזה חלק של ההחלטה הוא מופיע)
|
||||||
|
- דוגמה מתוך הטקסט
|
||||||
|
|
||||||
|
החזר את התוצאות בפורמט הבא (JSON array):
|
||||||
|
```json
|
||||||
|
[
|
||||||
|
{{
|
||||||
|
"type": "opening_formula",
|
||||||
|
"text": "לפניי ערר על החלטת...",
|
||||||
|
"context": "פתיחת ההחלטה",
|
||||||
|
"example": "לפניי ערר על החלטת הוועדה המקומית לתכנון ובניה ירושלים"
|
||||||
|
}}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
ההחלטות:
|
||||||
|
{decisions}
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
async def analyze_corpus() -> dict:
|
||||||
|
"""Analyze the style corpus and extract/update patterns.
|
||||||
|
|
||||||
|
Returns summary of patterns found.
|
||||||
|
"""
|
||||||
|
pool = await db.get_pool()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
rows = await conn.fetch(
|
||||||
|
"SELECT full_text, decision_number FROM style_corpus ORDER BY decision_date DESC LIMIT 20"
|
||||||
|
)
|
||||||
|
|
||||||
|
if not rows:
|
||||||
|
return {"error": "אין החלטות בקורפוס. העלה החלטות קודמות תחילה."}
|
||||||
|
|
||||||
|
# Prepare text for analysis
|
||||||
|
decisions_text = ""
|
||||||
|
for row in rows:
|
||||||
|
decisions_text += f"\n\n--- החלטה {row['decision_number'] or 'ללא מספר'} ---\n"
|
||||||
|
# Limit each decision to ~3000 chars to fit context
|
||||||
|
text = row["full_text"]
|
||||||
|
if len(text) > 3000:
|
||||||
|
text = text[:1500] + "\n...\n" + text[-1500:]
|
||||||
|
decisions_text += text
|
||||||
|
|
||||||
|
# Call Claude to analyze patterns
|
||||||
|
client = anthropic.Anthropic(api_key=config.ANTHROPIC_API_KEY)
|
||||||
|
message = client.messages.create(
|
||||||
|
model="claude-sonnet-4-6",
|
||||||
|
max_tokens=16384,
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": ANALYSIS_PROMPT.format(decisions=decisions_text),
|
||||||
|
}
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
response_text = message.content[0].text
|
||||||
|
|
||||||
|
# Extract JSON from response - prefer code-block fenced JSON
|
||||||
|
import json
|
||||||
|
code_block = re.search(r"```(?:json)?\s*(\[[\s\S]*?\])\s*```", response_text)
|
||||||
|
if code_block:
|
||||||
|
json_str = code_block.group(1)
|
||||||
|
else:
|
||||||
|
# Fallback: find the last JSON array (skip prose brackets)
|
||||||
|
all_arrays = list(re.finditer(r"\[[\s\S]*?\]", response_text))
|
||||||
|
if not all_arrays:
|
||||||
|
return {"error": "Could not parse analysis results", "raw": response_text}
|
||||||
|
json_str = all_arrays[-1].group()
|
||||||
|
|
||||||
|
try:
|
||||||
|
patterns = json.loads(json_str)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
return {"error": f"JSON parse error: {e}", "raw": response_text}
|
||||||
|
|
||||||
|
# Store patterns
|
||||||
|
count = 0
|
||||||
|
for pattern in patterns:
|
||||||
|
await db.upsert_style_pattern(
|
||||||
|
pattern_type=pattern.get("type", "other"),
|
||||||
|
pattern_text=pattern.get("text", ""),
|
||||||
|
context=pattern.get("context", ""),
|
||||||
|
examples=[pattern.get("example", "")],
|
||||||
|
)
|
||||||
|
count += 1
|
||||||
|
|
||||||
|
return {
|
||||||
|
"patterns_found": count,
|
||||||
|
"decisions_analyzed": len(rows),
|
||||||
|
"pattern_types": list({p.get("type") for p in patterns}),
|
||||||
|
}
|
||||||
0
mcp-server/src/legal_mcp/tools/__init__.py
Normal file
0
mcp-server/src/legal_mcp/tools/__init__.py
Normal file
177
mcp-server/src/legal_mcp/tools/cases.py
Normal file
177
mcp-server/src/legal_mcp/tools/cases.py
Normal file
@@ -0,0 +1,177 @@
|
|||||||
|
"""MCP tools for case management."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import subprocess
|
||||||
|
from pathlib import Path
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
|
from legal_mcp import config
|
||||||
|
from legal_mcp.services import db
|
||||||
|
|
||||||
|
|
||||||
|
async def case_create(
|
||||||
|
case_number: str,
|
||||||
|
title: str,
|
||||||
|
appellants: list[str] | None = None,
|
||||||
|
respondents: list[str] | None = None,
|
||||||
|
subject: str = "",
|
||||||
|
property_address: str = "",
|
||||||
|
permit_number: str = "",
|
||||||
|
committee_type: str = "ועדה מקומית",
|
||||||
|
hearing_date: str = "",
|
||||||
|
notes: str = "",
|
||||||
|
) -> str:
|
||||||
|
"""יצירת תיק ערר חדש.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
case_number: מספר תיק הערר (לדוגמה: 123-24)
|
||||||
|
title: כותרת קצרה של הערר
|
||||||
|
appellants: שמות העוררים
|
||||||
|
respondents: שמות המשיבים
|
||||||
|
subject: נושא הערר
|
||||||
|
property_address: כתובת הנכס
|
||||||
|
permit_number: מספר היתר
|
||||||
|
committee_type: סוג הוועדה (ברירת מחדל: ועדה מקומית)
|
||||||
|
hearing_date: תאריך דיון (YYYY-MM-DD)
|
||||||
|
notes: הערות
|
||||||
|
"""
|
||||||
|
from datetime import date as date_type
|
||||||
|
|
||||||
|
h_date = None
|
||||||
|
if hearing_date:
|
||||||
|
h_date = date_type.fromisoformat(hearing_date)
|
||||||
|
|
||||||
|
case = await db.create_case(
|
||||||
|
case_number=case_number,
|
||||||
|
title=title,
|
||||||
|
appellants=appellants,
|
||||||
|
respondents=respondents,
|
||||||
|
subject=subject,
|
||||||
|
property_address=property_address,
|
||||||
|
permit_number=permit_number,
|
||||||
|
committee_type=committee_type,
|
||||||
|
hearing_date=h_date,
|
||||||
|
notes=notes,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Initialize git repo for the case
|
||||||
|
case_dir = config.CASES_DIR / case_number
|
||||||
|
case_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
(case_dir / "documents").mkdir(exist_ok=True)
|
||||||
|
(case_dir / "drafts").mkdir(exist_ok=True)
|
||||||
|
|
||||||
|
# Save case metadata
|
||||||
|
case_json = case_dir / "case.json"
|
||||||
|
case_json.write_text(json.dumps(case, default=str, ensure_ascii=False, indent=2))
|
||||||
|
|
||||||
|
# Create notes file
|
||||||
|
notes_file = case_dir / "notes.md"
|
||||||
|
notes_file.write_text(f"# הערות - תיק {case_number}\n\n{notes}\n")
|
||||||
|
|
||||||
|
# Initialize git repo
|
||||||
|
subprocess.run(["git", "init"], cwd=case_dir, capture_output=True)
|
||||||
|
subprocess.run(["git", "add", "."], cwd=case_dir, capture_output=True)
|
||||||
|
subprocess.run(
|
||||||
|
["git", "commit", "-m", f"אתחול תיק {case_number}: {title}"],
|
||||||
|
cwd=case_dir,
|
||||||
|
capture_output=True,
|
||||||
|
env={"GIT_AUTHOR_NAME": "Ezer Mishpati", "GIT_AUTHOR_EMAIL": "legal@local",
|
||||||
|
"GIT_COMMITTER_NAME": "Ezer Mishpati", "GIT_COMMITTER_EMAIL": "legal@local",
|
||||||
|
"PATH": "/usr/bin:/bin"},
|
||||||
|
)
|
||||||
|
|
||||||
|
return json.dumps(case, default=str, ensure_ascii=False, indent=2)
|
||||||
|
|
||||||
|
|
||||||
|
async def case_list(status: str = "", limit: int = 50) -> str:
|
||||||
|
"""רשימת תיקי ערר עם אפשרות סינון לפי סטטוס.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
status: סינון לפי סטטוס (new, in_progress, drafted, reviewed, final). ריק = הכל
|
||||||
|
limit: מספר תוצאות מקסימלי
|
||||||
|
"""
|
||||||
|
cases = await db.list_cases(status=status or None, limit=limit)
|
||||||
|
if not cases:
|
||||||
|
return "אין תיקים."
|
||||||
|
return json.dumps(cases, default=str, ensure_ascii=False, indent=2)
|
||||||
|
|
||||||
|
|
||||||
|
async def case_get(case_number: str) -> str:
|
||||||
|
"""קבלת פרטי תיק מלאים כולל רשימת מסמכים.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
case_number: מספר תיק הערר
|
||||||
|
"""
|
||||||
|
case = await db.get_case_by_number(case_number)
|
||||||
|
if not case:
|
||||||
|
return f"תיק {case_number} לא נמצא."
|
||||||
|
|
||||||
|
docs = await db.list_documents(UUID(case["id"]))
|
||||||
|
case["documents"] = docs
|
||||||
|
return json.dumps(case, default=str, ensure_ascii=False, indent=2)
|
||||||
|
|
||||||
|
|
||||||
|
async def case_update(
|
||||||
|
case_number: str,
|
||||||
|
status: str = "",
|
||||||
|
title: str = "",
|
||||||
|
subject: str = "",
|
||||||
|
notes: str = "",
|
||||||
|
hearing_date: str = "",
|
||||||
|
decision_date: str = "",
|
||||||
|
tags: list[str] | None = None,
|
||||||
|
) -> str:
|
||||||
|
"""עדכון פרטי תיק.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
case_number: מספר תיק הערר
|
||||||
|
status: סטטוס חדש (new, in_progress, drafted, reviewed, final)
|
||||||
|
title: כותרת חדשה
|
||||||
|
subject: נושא חדש
|
||||||
|
notes: הערות חדשות
|
||||||
|
hearing_date: תאריך דיון (YYYY-MM-DD)
|
||||||
|
decision_date: תאריך החלטה (YYYY-MM-DD)
|
||||||
|
tags: תגיות
|
||||||
|
"""
|
||||||
|
from datetime import date as date_type
|
||||||
|
|
||||||
|
case = await db.get_case_by_number(case_number)
|
||||||
|
if not case:
|
||||||
|
return f"תיק {case_number} לא נמצא."
|
||||||
|
|
||||||
|
fields = {}
|
||||||
|
if status:
|
||||||
|
fields["status"] = status
|
||||||
|
if title:
|
||||||
|
fields["title"] = title
|
||||||
|
if subject:
|
||||||
|
fields["subject"] = subject
|
||||||
|
if notes:
|
||||||
|
fields["notes"] = notes
|
||||||
|
if hearing_date:
|
||||||
|
fields["hearing_date"] = date_type.fromisoformat(hearing_date)
|
||||||
|
if decision_date:
|
||||||
|
fields["decision_date"] = date_type.fromisoformat(decision_date)
|
||||||
|
if tags is not None:
|
||||||
|
fields["tags"] = tags
|
||||||
|
|
||||||
|
updated = await db.update_case(UUID(case["id"]), **fields)
|
||||||
|
|
||||||
|
# Git commit the update
|
||||||
|
case_dir = config.CASES_DIR / case_number
|
||||||
|
if case_dir.exists():
|
||||||
|
case_json = case_dir / "case.json"
|
||||||
|
case_json.write_text(json.dumps(updated, default=str, ensure_ascii=False, indent=2))
|
||||||
|
subprocess.run(["git", "add", "case.json"], cwd=case_dir, capture_output=True)
|
||||||
|
subprocess.run(
|
||||||
|
["git", "commit", "-m", f"עדכון תיק: {', '.join(fields.keys())}"],
|
||||||
|
cwd=case_dir,
|
||||||
|
capture_output=True,
|
||||||
|
env={"GIT_AUTHOR_NAME": "Ezer Mishpati", "GIT_AUTHOR_EMAIL": "legal@local",
|
||||||
|
"GIT_COMMITTER_NAME": "Ezer Mishpati", "GIT_COMMITTER_EMAIL": "legal@local",
|
||||||
|
"PATH": "/usr/bin:/bin"},
|
||||||
|
)
|
||||||
|
|
||||||
|
return json.dumps(updated, default=str, ensure_ascii=False, indent=2)
|
||||||
218
mcp-server/src/legal_mcp/tools/documents.py
Normal file
218
mcp-server/src/legal_mcp/tools/documents.py
Normal file
@@ -0,0 +1,218 @@
|
|||||||
|
"""MCP tools for document management and processing."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import shutil
|
||||||
|
import subprocess
|
||||||
|
from pathlib import Path
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
|
from legal_mcp import config
|
||||||
|
from legal_mcp.services import db, processor
|
||||||
|
|
||||||
|
|
||||||
|
async def document_upload(
|
||||||
|
case_number: str,
|
||||||
|
file_path: str,
|
||||||
|
doc_type: str = "appeal",
|
||||||
|
title: str = "",
|
||||||
|
) -> str:
|
||||||
|
"""העלאה ועיבוד מסמך לתיק ערר. מחלץ טקסט, יוצר chunks ו-embeddings.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
case_number: מספר תיק הערר
|
||||||
|
file_path: נתיב מלא לקובץ (PDF, DOCX, RTF, TXT)
|
||||||
|
doc_type: סוג מסמך (appeal=כתב ערר, response=תשובה, decision=החלטה, reference=מסמך עזר, exhibit=נספח)
|
||||||
|
title: שם המסמך (אם ריק, ייקח משם הקובץ)
|
||||||
|
"""
|
||||||
|
case = await db.get_case_by_number(case_number)
|
||||||
|
if not case:
|
||||||
|
return f"תיק {case_number} לא נמצא."
|
||||||
|
|
||||||
|
source = Path(file_path)
|
||||||
|
if not source.exists():
|
||||||
|
return f"קובץ לא נמצא: {file_path}"
|
||||||
|
|
||||||
|
case_id = UUID(case["id"])
|
||||||
|
if not title:
|
||||||
|
title = source.stem
|
||||||
|
|
||||||
|
# Copy file to case directory
|
||||||
|
case_dir = config.CASES_DIR / case_number / "documents"
|
||||||
|
case_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
dest = case_dir / source.name
|
||||||
|
shutil.copy2(str(source), str(dest))
|
||||||
|
|
||||||
|
# Create document record
|
||||||
|
doc = await db.create_document(
|
||||||
|
case_id=case_id,
|
||||||
|
doc_type=doc_type,
|
||||||
|
title=title,
|
||||||
|
file_path=str(dest),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Process document (extract → chunk → embed → store)
|
||||||
|
result = await processor.process_document(UUID(doc["id"]), case_id)
|
||||||
|
|
||||||
|
# Git commit
|
||||||
|
repo_dir = config.CASES_DIR / case_number
|
||||||
|
if repo_dir.exists():
|
||||||
|
subprocess.run(["git", "add", "."], cwd=repo_dir, capture_output=True)
|
||||||
|
doc_type_hebrew = {
|
||||||
|
"appeal": "כתב ערר",
|
||||||
|
"response": "תשובה",
|
||||||
|
"decision": "החלטה",
|
||||||
|
"reference": "מסמך עזר",
|
||||||
|
"exhibit": "נספח",
|
||||||
|
}.get(doc_type, doc_type)
|
||||||
|
subprocess.run(
|
||||||
|
["git", "commit", "-m", f"הוספת {doc_type_hebrew}: {title}"],
|
||||||
|
cwd=repo_dir,
|
||||||
|
capture_output=True,
|
||||||
|
env={"GIT_AUTHOR_NAME": "Ezer Mishpati", "GIT_AUTHOR_EMAIL": "legal@local",
|
||||||
|
"GIT_COMMITTER_NAME": "Ezer Mishpati", "GIT_COMMITTER_EMAIL": "legal@local",
|
||||||
|
"PATH": "/usr/bin:/bin"},
|
||||||
|
)
|
||||||
|
|
||||||
|
return json.dumps({
|
||||||
|
"document": doc,
|
||||||
|
"processing": result,
|
||||||
|
}, default=str, ensure_ascii=False, indent=2)
|
||||||
|
|
||||||
|
|
||||||
|
async def document_upload_training(
|
||||||
|
file_path: str,
|
||||||
|
decision_number: str = "",
|
||||||
|
decision_date: str = "",
|
||||||
|
subject_categories: list[str] | None = None,
|
||||||
|
title: str = "",
|
||||||
|
) -> str:
|
||||||
|
"""העלאת החלטה קודמת של דפנה לקורפוס הסגנון (training).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_path: נתיב מלא לקובץ ההחלטה
|
||||||
|
decision_number: מספר ההחלטה
|
||||||
|
decision_date: תאריך ההחלטה (YYYY-MM-DD)
|
||||||
|
subject_categories: קטגוריות - אפשר לבחור כמה (בנייה, שימוש חורג, תכנית, היתר, הקלה, חלוקה, תמ"א 38, היטל השבחה, פיצויים 197)
|
||||||
|
title: שם המסמך
|
||||||
|
"""
|
||||||
|
from datetime import date as date_type
|
||||||
|
|
||||||
|
from legal_mcp.services import extractor, embeddings, chunker
|
||||||
|
|
||||||
|
source = Path(file_path)
|
||||||
|
if not source.exists():
|
||||||
|
return f"קובץ לא נמצא: {file_path}"
|
||||||
|
|
||||||
|
if not title:
|
||||||
|
title = source.stem
|
||||||
|
|
||||||
|
# Copy to training directory (skip if already there)
|
||||||
|
config.TRAINING_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
dest = config.TRAINING_DIR / source.name
|
||||||
|
if source.resolve() != dest.resolve():
|
||||||
|
shutil.copy2(str(source), str(dest))
|
||||||
|
|
||||||
|
# Extract text
|
||||||
|
text, page_count = await extractor.extract_text(str(dest))
|
||||||
|
|
||||||
|
# Parse date
|
||||||
|
d_date = None
|
||||||
|
if decision_date:
|
||||||
|
d_date = date_type.fromisoformat(decision_date)
|
||||||
|
|
||||||
|
# Add to style corpus
|
||||||
|
corpus_id = await db.add_to_style_corpus(
|
||||||
|
document_id=None,
|
||||||
|
decision_number=decision_number,
|
||||||
|
decision_date=d_date,
|
||||||
|
subject_categories=subject_categories or [],
|
||||||
|
full_text=text,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Chunk and embed for RAG search over training corpus
|
||||||
|
chunks = chunker.chunk_document(text)
|
||||||
|
if chunks:
|
||||||
|
# Create a document record (no case association)
|
||||||
|
doc = await db.create_document(
|
||||||
|
case_id=None,
|
||||||
|
doc_type="decision",
|
||||||
|
title=f"[קורפוס] {title}",
|
||||||
|
file_path=str(dest),
|
||||||
|
page_count=page_count,
|
||||||
|
)
|
||||||
|
doc_id = UUID(doc["id"])
|
||||||
|
await db.update_document(doc_id, extracted_text=text, extraction_status="completed")
|
||||||
|
|
||||||
|
# Generate embeddings and store chunks
|
||||||
|
texts = [c.content for c in chunks]
|
||||||
|
embs = await embeddings.embed_texts(texts, input_type="document")
|
||||||
|
chunk_dicts = [
|
||||||
|
{
|
||||||
|
"content": c.content,
|
||||||
|
"section_type": c.section_type,
|
||||||
|
"embedding": emb,
|
||||||
|
"page_number": c.page_number,
|
||||||
|
"chunk_index": c.chunk_index,
|
||||||
|
}
|
||||||
|
for c, emb in zip(chunks, embs)
|
||||||
|
]
|
||||||
|
await db.store_chunks(doc_id, None, chunk_dicts)
|
||||||
|
|
||||||
|
return json.dumps({
|
||||||
|
"corpus_id": str(corpus_id),
|
||||||
|
"title": title,
|
||||||
|
"pages": page_count,
|
||||||
|
"text_length": len(text),
|
||||||
|
"chunks": len(chunks) if chunks else 0,
|
||||||
|
}, default=str, ensure_ascii=False, indent=2)
|
||||||
|
|
||||||
|
|
||||||
|
async def document_get_text(case_number: str, doc_title: str = "") -> str:
|
||||||
|
"""קבלת טקסט מלא של מסמך מתוך תיק.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
case_number: מספר תיק הערר
|
||||||
|
doc_title: שם המסמך (אם ריק, מחזיר את כל המסמכים)
|
||||||
|
"""
|
||||||
|
case = await db.get_case_by_number(case_number)
|
||||||
|
if not case:
|
||||||
|
return f"תיק {case_number} לא נמצא."
|
||||||
|
|
||||||
|
docs = await db.list_documents(UUID(case["id"]))
|
||||||
|
if not docs:
|
||||||
|
return f"אין מסמכים בתיק {case_number}."
|
||||||
|
|
||||||
|
if doc_title:
|
||||||
|
docs = [d for d in docs if doc_title.lower() in d["title"].lower()]
|
||||||
|
if not docs:
|
||||||
|
return f"מסמך '{doc_title}' לא נמצא בתיק."
|
||||||
|
|
||||||
|
results = []
|
||||||
|
for doc in docs:
|
||||||
|
text = await db.get_document_text(UUID(doc["id"]))
|
||||||
|
results.append({
|
||||||
|
"title": doc["title"],
|
||||||
|
"doc_type": doc["doc_type"],
|
||||||
|
"text": text[:10000] if text else "(ללא טקסט)",
|
||||||
|
})
|
||||||
|
|
||||||
|
return json.dumps(results, ensure_ascii=False, indent=2)
|
||||||
|
|
||||||
|
|
||||||
|
async def document_list(case_number: str) -> str:
|
||||||
|
"""רשימת מסמכים בתיק.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
case_number: מספר תיק הערר
|
||||||
|
"""
|
||||||
|
case = await db.get_case_by_number(case_number)
|
||||||
|
if not case:
|
||||||
|
return f"תיק {case_number} לא נמצא."
|
||||||
|
|
||||||
|
docs = await db.list_documents(UUID(case["id"]))
|
||||||
|
if not docs:
|
||||||
|
return f"אין מסמכים בתיק {case_number}."
|
||||||
|
|
||||||
|
return json.dumps(docs, default=str, ensure_ascii=False, indent=2)
|
||||||
202
mcp-server/src/legal_mcp/tools/drafting.py
Normal file
202
mcp-server/src/legal_mcp/tools/drafting.py
Normal file
@@ -0,0 +1,202 @@
|
|||||||
|
"""MCP tools for decision drafting support."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
|
from legal_mcp.services import db, embeddings
|
||||||
|
|
||||||
|
|
||||||
|
DECISION_TEMPLATE = """# החלטה
|
||||||
|
|
||||||
|
## בפני: דפנה תמיר, יו"ר ועדת הערר מחוז ירושלים
|
||||||
|
|
||||||
|
**ערר מספר:** {case_number}
|
||||||
|
**נושא:** {subject}
|
||||||
|
**העוררים:** {appellants}
|
||||||
|
**המשיבים:** {respondents}
|
||||||
|
**כתובת הנכס:** {property_address}
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## א. רקע עובדתי
|
||||||
|
|
||||||
|
[תיאור הרקע העובדתי של הערר]
|
||||||
|
|
||||||
|
## ב. טענות העוררים
|
||||||
|
|
||||||
|
[סיכום טענות העוררים]
|
||||||
|
|
||||||
|
## ג. טענות המשיבים
|
||||||
|
|
||||||
|
[סיכום טענות המשיבים]
|
||||||
|
|
||||||
|
## ד. דיון והכרעה
|
||||||
|
|
||||||
|
[ניתוח משפטי]
|
||||||
|
|
||||||
|
## ה. מסקנה
|
||||||
|
|
||||||
|
[מסקנת הוועדה]
|
||||||
|
|
||||||
|
## ו. החלטה
|
||||||
|
|
||||||
|
[ההחלטה הסופית]
|
||||||
|
|
||||||
|
---
|
||||||
|
ניתנה היום, {date}
|
||||||
|
דפנה תמיר, יו"ר ועדת הערר
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
async def get_style_guide() -> str:
|
||||||
|
"""שליפת דפוסי הסגנון של דפנה - נוסחאות, ביטויים אופייניים ומבנה."""
|
||||||
|
patterns = await db.get_style_patterns()
|
||||||
|
|
||||||
|
if not patterns:
|
||||||
|
return "לא נמצאו דפוסי סגנון. יש להעלות החלטות קודמות ולהריץ ניתוח סגנון (/style-report)."
|
||||||
|
|
||||||
|
grouped: dict[str, list] = {}
|
||||||
|
for p in patterns:
|
||||||
|
pt = p["pattern_type"]
|
||||||
|
if pt not in grouped:
|
||||||
|
grouped[pt] = []
|
||||||
|
grouped[pt].append({
|
||||||
|
"text": p["pattern_text"],
|
||||||
|
"context": p["context"],
|
||||||
|
"frequency": p["frequency"],
|
||||||
|
})
|
||||||
|
|
||||||
|
type_names = {
|
||||||
|
"opening_formula": "נוסחאות פתיחה",
|
||||||
|
"transition": "ביטויי מעבר",
|
||||||
|
"citation_style": "סגנון ציטוט",
|
||||||
|
"analysis_structure": "מבנה ניתוח",
|
||||||
|
"closing_formula": "נוסחאות סיום",
|
||||||
|
"characteristic_phrase": "ביטויים אופייניים",
|
||||||
|
}
|
||||||
|
|
||||||
|
result = "# מדריך סגנון - דפנה תמיר\n\n"
|
||||||
|
for ptype, items in grouped.items():
|
||||||
|
result += f"## {type_names.get(ptype, ptype)}\n\n"
|
||||||
|
for item in items:
|
||||||
|
result += f"- **{item['text']}** ({item['context']}, תדירות: {item['frequency']})\n"
|
||||||
|
result += "\n"
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
async def draft_section(
|
||||||
|
case_number: str,
|
||||||
|
section: str,
|
||||||
|
instructions: str = "",
|
||||||
|
) -> str:
|
||||||
|
"""הרכבת הקשר מלא לניסוח סעיף בהחלטה - כולל עובדות מהמסמכים, תקדימים רלוונטיים ודפוסי סגנון.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
case_number: מספר תיק הערר
|
||||||
|
section: סוג הסעיף (facts, appellant_claims, respondent_claims, legal_analysis, conclusion, ruling)
|
||||||
|
instructions: הנחיות נוספות לניסוח
|
||||||
|
"""
|
||||||
|
case = await db.get_case_by_number(case_number)
|
||||||
|
if not case:
|
||||||
|
return f"תיק {case_number} לא נמצא."
|
||||||
|
|
||||||
|
case_id = UUID(case["id"])
|
||||||
|
|
||||||
|
# 1. Get relevant chunks from case documents
|
||||||
|
section_query = {
|
||||||
|
"facts": "רקע עובדתי של התיק",
|
||||||
|
"appellant_claims": "טענות העוררים",
|
||||||
|
"respondent_claims": "טענות המשיבים",
|
||||||
|
"legal_analysis": "ניתוח משפטי ודיון",
|
||||||
|
"conclusion": "מסקנות",
|
||||||
|
"ruling": "החלטה",
|
||||||
|
}.get(section, section)
|
||||||
|
|
||||||
|
query_emb = await embeddings.embed_query(section_query)
|
||||||
|
case_chunks = await db.search_similar(
|
||||||
|
query_embedding=query_emb, limit=10, case_id=case_id
|
||||||
|
)
|
||||||
|
|
||||||
|
# 2. Get similar sections from precedents
|
||||||
|
precedent_chunks = await db.search_similar(
|
||||||
|
query_embedding=query_emb, limit=5, section_type=section
|
||||||
|
)
|
||||||
|
# Filter out chunks from the same case
|
||||||
|
precedent_chunks = [c for c in precedent_chunks if str(c["case_id"]) != case["id"]]
|
||||||
|
|
||||||
|
# 3. Get style patterns
|
||||||
|
style_patterns = await db.get_style_patterns()
|
||||||
|
|
||||||
|
# Build context
|
||||||
|
context = {
|
||||||
|
"case": {
|
||||||
|
"case_number": case["case_number"],
|
||||||
|
"title": case["title"],
|
||||||
|
"appellants": case["appellants"],
|
||||||
|
"respondents": case["respondents"],
|
||||||
|
"subject": case["subject"],
|
||||||
|
"property_address": case["property_address"],
|
||||||
|
},
|
||||||
|
"section": section,
|
||||||
|
"instructions": instructions,
|
||||||
|
"case_documents": [
|
||||||
|
{
|
||||||
|
"document": c["document_title"],
|
||||||
|
"section_type": c["section_type"],
|
||||||
|
"content": c["content"],
|
||||||
|
}
|
||||||
|
for c in case_chunks
|
||||||
|
],
|
||||||
|
"precedents": [
|
||||||
|
{
|
||||||
|
"case_number": c["case_number"],
|
||||||
|
"document": c["document_title"],
|
||||||
|
"content": c["content"][:500],
|
||||||
|
}
|
||||||
|
for c in precedent_chunks[:3]
|
||||||
|
],
|
||||||
|
"style_patterns": [
|
||||||
|
{
|
||||||
|
"type": p["pattern_type"],
|
||||||
|
"text": p["pattern_text"],
|
||||||
|
}
|
||||||
|
for p in style_patterns[:15]
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
return json.dumps(context, ensure_ascii=False, indent=2)
|
||||||
|
|
||||||
|
|
||||||
|
async def get_decision_template(case_number: str) -> str:
|
||||||
|
"""קבלת תבנית מבנית להחלטה מלאה עם פרטי התיק.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
case_number: מספר תיק הערר
|
||||||
|
"""
|
||||||
|
from datetime import date
|
||||||
|
|
||||||
|
case = await db.get_case_by_number(case_number)
|
||||||
|
if not case:
|
||||||
|
return f"תיק {case_number} לא נמצא."
|
||||||
|
|
||||||
|
template = DECISION_TEMPLATE.format(
|
||||||
|
case_number=case["case_number"],
|
||||||
|
subject=case["subject"],
|
||||||
|
appellants=", ".join(case.get("appellants", [])),
|
||||||
|
respondents=", ".join(case.get("respondents", [])),
|
||||||
|
property_address=case.get("property_address", ""),
|
||||||
|
date=date.today().strftime("%d.%m.%Y"),
|
||||||
|
)
|
||||||
|
|
||||||
|
return template
|
||||||
|
|
||||||
|
|
||||||
|
async def analyze_style() -> str:
|
||||||
|
"""הרצת ניתוח סגנון על קורפוס ההחלטות של דפנה. מחלץ דפוסי כתיבה ושומר אותם."""
|
||||||
|
from legal_mcp.services.style_analyzer import analyze_corpus
|
||||||
|
|
||||||
|
result = await analyze_corpus()
|
||||||
|
return json.dumps(result, ensure_ascii=False, indent=2)
|
||||||
124
mcp-server/src/legal_mcp/tools/search.py
Normal file
124
mcp-server/src/legal_mcp/tools/search.py
Normal file
@@ -0,0 +1,124 @@
|
|||||||
|
"""MCP tools for RAG search over legal documents and decisions."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
|
from legal_mcp.services import db, embeddings
|
||||||
|
|
||||||
|
|
||||||
|
async def search_decisions(
|
||||||
|
query: str,
|
||||||
|
limit: int = 10,
|
||||||
|
section_type: str = "",
|
||||||
|
) -> str:
|
||||||
|
"""חיפוש סמנטי בהחלטות קודמות ובמסמכים.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query: שאילתת חיפוש בעברית (לדוגמה: "שימוש חורג למסחר באזור מגורים")
|
||||||
|
limit: מספר תוצאות מקסימלי
|
||||||
|
section_type: סינון לפי סוג סעיף (facts, legal_analysis, conclusion, ruling, וכו'). ריק = הכל
|
||||||
|
"""
|
||||||
|
query_emb = await embeddings.embed_query(query)
|
||||||
|
results = await db.search_similar(
|
||||||
|
query_embedding=query_emb,
|
||||||
|
limit=limit,
|
||||||
|
section_type=section_type or None,
|
||||||
|
)
|
||||||
|
|
||||||
|
if not results:
|
||||||
|
return "לא נמצאו תוצאות."
|
||||||
|
|
||||||
|
formatted = []
|
||||||
|
for r in results:
|
||||||
|
formatted.append({
|
||||||
|
"score": round(float(r["score"]), 4),
|
||||||
|
"case_number": r["case_number"],
|
||||||
|
"document": r["document_title"],
|
||||||
|
"section": r["section_type"],
|
||||||
|
"page": r["page_number"],
|
||||||
|
"content": r["content"],
|
||||||
|
})
|
||||||
|
|
||||||
|
return json.dumps(formatted, ensure_ascii=False, indent=2)
|
||||||
|
|
||||||
|
|
||||||
|
async def search_case_documents(
|
||||||
|
case_number: str,
|
||||||
|
query: str,
|
||||||
|
limit: int = 10,
|
||||||
|
) -> str:
|
||||||
|
"""חיפוש סמנטי בתוך מסמכי תיק ספציפי.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
case_number: מספר תיק הערר
|
||||||
|
query: שאילתת חיפוש
|
||||||
|
limit: מספר תוצאות מקסימלי
|
||||||
|
"""
|
||||||
|
case = await db.get_case_by_number(case_number)
|
||||||
|
if not case:
|
||||||
|
return f"תיק {case_number} לא נמצא."
|
||||||
|
|
||||||
|
query_emb = await embeddings.embed_query(query)
|
||||||
|
results = await db.search_similar(
|
||||||
|
query_embedding=query_emb,
|
||||||
|
limit=limit,
|
||||||
|
case_id=UUID(case["id"]),
|
||||||
|
)
|
||||||
|
|
||||||
|
if not results:
|
||||||
|
return f"לא נמצאו תוצאות בתיק {case_number}."
|
||||||
|
|
||||||
|
formatted = []
|
||||||
|
for r in results:
|
||||||
|
formatted.append({
|
||||||
|
"score": round(float(r["score"]), 4),
|
||||||
|
"document": r["document_title"],
|
||||||
|
"section": r["section_type"],
|
||||||
|
"page": r["page_number"],
|
||||||
|
"content": r["content"],
|
||||||
|
})
|
||||||
|
|
||||||
|
return json.dumps(formatted, ensure_ascii=False, indent=2)
|
||||||
|
|
||||||
|
|
||||||
|
async def find_similar_cases(
|
||||||
|
description: str,
|
||||||
|
limit: int = 5,
|
||||||
|
) -> str:
|
||||||
|
"""מציאת תיקים דומים על בסיס תיאור.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
description: תיאור התיק או הנושא (לדוגמה: "ערר על סירוב להיתר בנייה לתוספת קומה")
|
||||||
|
limit: מספר תוצאות מקסימלי
|
||||||
|
"""
|
||||||
|
query_emb = await embeddings.embed_query(description)
|
||||||
|
results = await db.search_similar(
|
||||||
|
query_embedding=query_emb,
|
||||||
|
limit=limit * 3, # Get more to deduplicate by case
|
||||||
|
)
|
||||||
|
|
||||||
|
if not results:
|
||||||
|
return "לא נמצאו תיקים דומים."
|
||||||
|
|
||||||
|
# Deduplicate by case_number, keep best score per case
|
||||||
|
seen_cases = {}
|
||||||
|
for r in results:
|
||||||
|
cn = r["case_number"]
|
||||||
|
if cn not in seen_cases or r["score"] > seen_cases[cn]["score"]:
|
||||||
|
seen_cases[cn] = r
|
||||||
|
|
||||||
|
# Sort by score and limit
|
||||||
|
top_cases = sorted(seen_cases.values(), key=lambda x: x["score"], reverse=True)[:limit]
|
||||||
|
|
||||||
|
formatted = []
|
||||||
|
for r in top_cases:
|
||||||
|
formatted.append({
|
||||||
|
"score": round(float(r["score"]), 4),
|
||||||
|
"case_number": r["case_number"],
|
||||||
|
"document": r["document_title"],
|
||||||
|
"relevant_section": r["content"][:500],
|
||||||
|
})
|
||||||
|
|
||||||
|
return json.dumps(formatted, ensure_ascii=False, indent=2)
|
||||||
118
mcp-server/src/legal_mcp/tools/workflow.py
Normal file
118
mcp-server/src/legal_mcp/tools/workflow.py
Normal file
@@ -0,0 +1,118 @@
|
|||||||
|
"""MCP tools for workflow status tracking."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
|
from legal_mcp.services import db
|
||||||
|
|
||||||
|
|
||||||
|
async def workflow_status(case_number: str) -> str:
|
||||||
|
"""סטטוס תהליך עבודה מלא לתיק - מסמכים, עיבוד, טיוטות.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
case_number: מספר תיק הערר
|
||||||
|
"""
|
||||||
|
case = await db.get_case_by_number(case_number)
|
||||||
|
if not case:
|
||||||
|
return f"תיק {case_number} לא נמצא."
|
||||||
|
|
||||||
|
case_id = UUID(case["id"])
|
||||||
|
docs = await db.list_documents(case_id)
|
||||||
|
|
||||||
|
# Count chunks per document
|
||||||
|
pool = await db.get_pool()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
chunk_counts = await conn.fetch(
|
||||||
|
"SELECT document_id, COUNT(*) as count FROM document_chunks WHERE case_id = $1 GROUP BY document_id",
|
||||||
|
case_id,
|
||||||
|
)
|
||||||
|
chunk_map = {str(r["document_id"]): r["count"] for r in chunk_counts}
|
||||||
|
|
||||||
|
doc_status = []
|
||||||
|
for doc in docs:
|
||||||
|
doc_status.append({
|
||||||
|
"title": doc["title"],
|
||||||
|
"type": doc["doc_type"],
|
||||||
|
"extraction": doc["extraction_status"],
|
||||||
|
"chunks": chunk_map.get(doc["id"], 0),
|
||||||
|
"pages": doc.get("page_count"),
|
||||||
|
})
|
||||||
|
|
||||||
|
# Check draft status
|
||||||
|
from pathlib import Path
|
||||||
|
from legal_mcp import config
|
||||||
|
|
||||||
|
case_dir = config.CASES_DIR / case_number
|
||||||
|
draft_path = case_dir / "drafts" / "decision.md"
|
||||||
|
has_draft = draft_path.exists()
|
||||||
|
draft_size = draft_path.stat().st_size if has_draft else 0
|
||||||
|
|
||||||
|
status = {
|
||||||
|
"case_number": case["case_number"],
|
||||||
|
"title": case["title"],
|
||||||
|
"status": case["status"],
|
||||||
|
"documents": doc_status,
|
||||||
|
"total_documents": len(docs),
|
||||||
|
"total_chunks": sum(chunk_map.values()),
|
||||||
|
"has_draft": has_draft,
|
||||||
|
"draft_size_bytes": draft_size,
|
||||||
|
"next_steps": _suggest_next_steps(case, docs, has_draft),
|
||||||
|
}
|
||||||
|
|
||||||
|
return json.dumps(status, ensure_ascii=False, indent=2)
|
||||||
|
|
||||||
|
|
||||||
|
def _suggest_next_steps(case: dict, docs: list, has_draft: bool) -> list[str]:
|
||||||
|
"""Suggest next steps based on case state."""
|
||||||
|
steps = []
|
||||||
|
doc_types = {d["doc_type"] for d in docs}
|
||||||
|
|
||||||
|
if not docs:
|
||||||
|
steps.append("העלה מסמכים לתיק (כתב ערר, תשובת ועדה)")
|
||||||
|
else:
|
||||||
|
if "appeal" not in doc_types:
|
||||||
|
steps.append("העלה כתב ערר")
|
||||||
|
if "response" not in doc_types:
|
||||||
|
steps.append("העלה תשובת ועדה/משיבים")
|
||||||
|
|
||||||
|
pending = [d for d in docs if d["extraction_status"] == "pending"]
|
||||||
|
if pending:
|
||||||
|
steps.append(f"עיבוד {len(pending)} מסמכים ממתינים")
|
||||||
|
|
||||||
|
if docs and not has_draft:
|
||||||
|
steps.append("התחל ניסוח טיוטת החלטה (/draft-decision)")
|
||||||
|
elif has_draft and case["status"] in ("new", "in_progress"):
|
||||||
|
steps.append("סקור ועדכן את הטיוטה")
|
||||||
|
steps.append("עדכן סטטוס ל-drafted")
|
||||||
|
|
||||||
|
if case["status"] == "drafted":
|
||||||
|
steps.append("סקירה סופית ועדכון סטטוס ל-reviewed")
|
||||||
|
elif case["status"] == "reviewed":
|
||||||
|
steps.append("אישור סופי ועדכון סטטוס ל-final")
|
||||||
|
|
||||||
|
return steps
|
||||||
|
|
||||||
|
|
||||||
|
async def processing_status() -> str:
|
||||||
|
"""סטטוס כללי - מספר תיקים, מסמכים ממתינים לעיבוד."""
|
||||||
|
pool = await db.get_pool()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
case_count = await conn.fetchval("SELECT COUNT(*) FROM cases")
|
||||||
|
doc_count = await conn.fetchval("SELECT COUNT(*) FROM documents")
|
||||||
|
pending_count = await conn.fetchval(
|
||||||
|
"SELECT COUNT(*) FROM documents WHERE extraction_status = 'pending'"
|
||||||
|
)
|
||||||
|
chunk_count = await conn.fetchval("SELECT COUNT(*) FROM document_chunks")
|
||||||
|
corpus_count = await conn.fetchval("SELECT COUNT(*) FROM style_corpus")
|
||||||
|
pattern_count = await conn.fetchval("SELECT COUNT(*) FROM style_patterns")
|
||||||
|
|
||||||
|
return json.dumps({
|
||||||
|
"cases": case_count,
|
||||||
|
"documents": doc_count,
|
||||||
|
"pending_processing": pending_count,
|
||||||
|
"chunks": chunk_count,
|
||||||
|
"style_corpus_entries": corpus_count,
|
||||||
|
"style_patterns": pattern_count,
|
||||||
|
}, ensure_ascii=False, indent=2)
|
||||||
342
web/app.py
Normal file
342
web/app.py
Normal file
@@ -0,0 +1,342 @@
|
|||||||
|
"""Ezer Mishpati — Web upload interface for legal documents."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
import shutil
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
from contextlib import asynccontextmanager
|
||||||
|
from pathlib import Path
|
||||||
|
from uuid import UUID, uuid4
|
||||||
|
|
||||||
|
# Allow importing legal_mcp from the MCP server source
|
||||||
|
sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "mcp-server" / "src"))
|
||||||
|
|
||||||
|
from fastapi import FastAPI, File, HTTPException, UploadFile
|
||||||
|
from fastapi.responses import FileResponse, StreamingResponse
|
||||||
|
from fastapi.staticfiles import StaticFiles
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
from legal_mcp import config
|
||||||
|
from legal_mcp.services import chunker, db, embeddings, extractor, processor
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
UPLOAD_DIR = config.DATA_DIR / "uploads"
|
||||||
|
ALLOWED_EXTENSIONS = {".pdf", ".docx", ".rtf", ".txt"}
|
||||||
|
MAX_FILE_SIZE = 50 * 1024 * 1024 # 50MB
|
||||||
|
|
||||||
|
# In-memory progress tracking
|
||||||
|
_progress: dict[str, dict] = {}
|
||||||
|
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def lifespan(app: FastAPI):
|
||||||
|
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
await db.init_schema()
|
||||||
|
yield
|
||||||
|
await db.close_pool()
|
||||||
|
|
||||||
|
|
||||||
|
app = FastAPI(title="Ezer Mishpati — Upload", lifespan=lifespan)
|
||||||
|
|
||||||
|
STATIC_DIR = Path(__file__).parent / "static"
|
||||||
|
|
||||||
|
|
||||||
|
# ── API Endpoints ──────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/")
|
||||||
|
async def index():
|
||||||
|
return FileResponse(STATIC_DIR / "index.html")
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/api/upload")
|
||||||
|
async def upload_file(file: UploadFile = File(...)):
|
||||||
|
"""Upload a file to the temporary uploads directory."""
|
||||||
|
if not file.filename:
|
||||||
|
raise HTTPException(400, "No filename provided")
|
||||||
|
|
||||||
|
# Validate extension
|
||||||
|
ext = Path(file.filename).suffix.lower()
|
||||||
|
if ext not in ALLOWED_EXTENSIONS:
|
||||||
|
raise HTTPException(400, f"Unsupported file type: {ext}. Allowed: {', '.join(ALLOWED_EXTENSIONS)}")
|
||||||
|
|
||||||
|
# Sanitize filename
|
||||||
|
safe_name = re.sub(r"[^\w\u0590-\u05FF\s.\-()]", "", Path(file.filename).stem)
|
||||||
|
if not safe_name:
|
||||||
|
safe_name = "document"
|
||||||
|
timestamp = int(time.time())
|
||||||
|
filename = f"{timestamp}_{safe_name}{ext}"
|
||||||
|
|
||||||
|
# Read and validate size
|
||||||
|
content = await file.read()
|
||||||
|
if len(content) > MAX_FILE_SIZE:
|
||||||
|
raise HTTPException(400, f"File too large. Max: {MAX_FILE_SIZE // (1024*1024)}MB")
|
||||||
|
|
||||||
|
dest = UPLOAD_DIR / filename
|
||||||
|
dest.write_bytes(content)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"filename": filename,
|
||||||
|
"original_name": file.filename,
|
||||||
|
"size": len(content),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/uploads")
|
||||||
|
async def list_uploads():
|
||||||
|
"""List files in the uploads (pending) directory."""
|
||||||
|
if not UPLOAD_DIR.exists():
|
||||||
|
return []
|
||||||
|
files = []
|
||||||
|
for f in sorted(UPLOAD_DIR.iterdir(), key=lambda p: p.stat().st_mtime, reverse=True):
|
||||||
|
if f.is_file() and f.suffix.lower() in ALLOWED_EXTENSIONS:
|
||||||
|
stat = f.stat()
|
||||||
|
files.append({
|
||||||
|
"filename": f.name,
|
||||||
|
"size": stat.st_size,
|
||||||
|
"uploaded_at": stat.st_mtime,
|
||||||
|
})
|
||||||
|
return files
|
||||||
|
|
||||||
|
|
||||||
|
@app.delete("/api/uploads/{filename}")
|
||||||
|
async def delete_upload(filename: str):
|
||||||
|
"""Remove a file from the uploads directory."""
|
||||||
|
path = UPLOAD_DIR / filename
|
||||||
|
if not path.exists() or not path.parent.samefile(UPLOAD_DIR):
|
||||||
|
raise HTTPException(404, "File not found")
|
||||||
|
path.unlink()
|
||||||
|
return {"deleted": filename}
|
||||||
|
|
||||||
|
|
||||||
|
class ClassifyRequest(BaseModel):
|
||||||
|
filename: str
|
||||||
|
category: str # "training" or "case"
|
||||||
|
# For case documents
|
||||||
|
case_number: str = ""
|
||||||
|
doc_type: str = "appeal"
|
||||||
|
title: str = ""
|
||||||
|
# For training documents
|
||||||
|
decision_number: str = ""
|
||||||
|
decision_date: str = ""
|
||||||
|
subject_categories: list[str] = []
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/api/classify")
|
||||||
|
async def classify_file(req: ClassifyRequest):
|
||||||
|
"""Classify a pending file and start processing."""
|
||||||
|
source = UPLOAD_DIR / req.filename
|
||||||
|
if not source.exists() or not source.parent.samefile(UPLOAD_DIR):
|
||||||
|
raise HTTPException(404, "File not found in uploads")
|
||||||
|
|
||||||
|
if req.category not in ("training", "case"):
|
||||||
|
raise HTTPException(400, "Category must be 'training' or 'case'")
|
||||||
|
|
||||||
|
if req.category == "case" and not req.case_number:
|
||||||
|
raise HTTPException(400, "case_number required for case documents")
|
||||||
|
|
||||||
|
task_id = str(uuid4())
|
||||||
|
_progress[task_id] = {"status": "queued", "filename": req.filename}
|
||||||
|
|
||||||
|
asyncio.create_task(_process_file(task_id, source, req))
|
||||||
|
|
||||||
|
return {"task_id": task_id}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/progress/{task_id}")
|
||||||
|
async def progress_stream(task_id: str):
|
||||||
|
"""SSE stream of processing progress."""
|
||||||
|
if task_id not in _progress:
|
||||||
|
raise HTTPException(404, "Task not found")
|
||||||
|
|
||||||
|
async def event_stream():
|
||||||
|
while True:
|
||||||
|
data = _progress.get(task_id, {})
|
||||||
|
yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
|
||||||
|
if data.get("status") in ("completed", "failed"):
|
||||||
|
break
|
||||||
|
await asyncio.sleep(1)
|
||||||
|
# Clean up after a delay
|
||||||
|
await asyncio.sleep(30)
|
||||||
|
_progress.pop(task_id, None)
|
||||||
|
|
||||||
|
return StreamingResponse(event_stream(), media_type="text/event-stream")
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/cases")
|
||||||
|
async def list_cases():
|
||||||
|
"""List existing cases for the dropdown."""
|
||||||
|
cases = await db.list_cases()
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"case_number": c["case_number"],
|
||||||
|
"title": c["title"],
|
||||||
|
"status": c["status"],
|
||||||
|
}
|
||||||
|
for c in cases
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# ── Background Processing ─────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
async def _process_file(task_id: str, source: Path, req: ClassifyRequest):
|
||||||
|
"""Process a classified file in the background."""
|
||||||
|
try:
|
||||||
|
if req.category == "case":
|
||||||
|
await _process_case_document(task_id, source, req)
|
||||||
|
else:
|
||||||
|
await _process_training_document(task_id, source, req)
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception("Processing failed for %s", req.filename)
|
||||||
|
_progress[task_id] = {"status": "failed", "error": str(e), "filename": req.filename}
|
||||||
|
|
||||||
|
|
||||||
|
async def _process_case_document(task_id: str, source: Path, req: ClassifyRequest):
|
||||||
|
"""Process a case document (mirrors documents.document_upload logic)."""
|
||||||
|
_progress[task_id] = {"status": "validating", "filename": req.filename}
|
||||||
|
|
||||||
|
case = await db.get_case_by_number(req.case_number)
|
||||||
|
if not case:
|
||||||
|
_progress[task_id] = {"status": "failed", "error": f"Case {req.case_number} not found"}
|
||||||
|
return
|
||||||
|
|
||||||
|
case_id = UUID(case["id"])
|
||||||
|
title = req.title or source.stem.split("_", 1)[-1] # Remove timestamp prefix
|
||||||
|
|
||||||
|
# Copy to case directory
|
||||||
|
_progress[task_id] = {"status": "copying", "filename": req.filename}
|
||||||
|
case_dir = config.CASES_DIR / req.case_number / "documents"
|
||||||
|
case_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
# Use original name without timestamp prefix
|
||||||
|
original_name = re.sub(r"^\d+_", "", source.name)
|
||||||
|
dest = case_dir / original_name
|
||||||
|
shutil.copy2(str(source), str(dest))
|
||||||
|
|
||||||
|
# Create document record
|
||||||
|
_progress[task_id] = {"status": "registering", "filename": req.filename}
|
||||||
|
doc = await db.create_document(
|
||||||
|
case_id=case_id,
|
||||||
|
doc_type=req.doc_type,
|
||||||
|
title=title,
|
||||||
|
file_path=str(dest),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Process (extract → chunk → embed → store)
|
||||||
|
_progress[task_id] = {"status": "processing", "filename": req.filename, "step": "extracting"}
|
||||||
|
result = await processor.process_document(UUID(doc["id"]), case_id)
|
||||||
|
|
||||||
|
# Git commit
|
||||||
|
repo_dir = config.CASES_DIR / req.case_number
|
||||||
|
if repo_dir.exists():
|
||||||
|
subprocess.run(["git", "add", "."], cwd=repo_dir, capture_output=True)
|
||||||
|
doc_type_hebrew = {
|
||||||
|
"appeal": "כתב ערר", "response": "תשובה", "decision": "החלטה",
|
||||||
|
"reference": "מסמך עזר", "exhibit": "נספח",
|
||||||
|
}.get(req.doc_type, req.doc_type)
|
||||||
|
subprocess.run(
|
||||||
|
["git", "commit", "-m", f"הוספת {doc_type_hebrew}: {title}"],
|
||||||
|
cwd=repo_dir, capture_output=True,
|
||||||
|
env={"GIT_AUTHOR_NAME": "Ezer Mishpati", "GIT_AUTHOR_EMAIL": "legal@local",
|
||||||
|
"GIT_COMMITTER_NAME": "Ezer Mishpati", "GIT_COMMITTER_EMAIL": "legal@local",
|
||||||
|
"PATH": "/usr/bin:/bin"},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Remove from uploads
|
||||||
|
source.unlink(missing_ok=True)
|
||||||
|
|
||||||
|
_progress[task_id] = {
|
||||||
|
"status": "completed",
|
||||||
|
"filename": req.filename,
|
||||||
|
"result": result,
|
||||||
|
"case_number": req.case_number,
|
||||||
|
"doc_type": req.doc_type,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def _process_training_document(task_id: str, source: Path, req: ClassifyRequest):
|
||||||
|
"""Process a training document (mirrors documents.document_upload_training logic)."""
|
||||||
|
from datetime import date as date_type
|
||||||
|
|
||||||
|
title = req.title or source.stem.split("_", 1)[-1]
|
||||||
|
|
||||||
|
# Copy to training directory
|
||||||
|
_progress[task_id] = {"status": "copying", "filename": req.filename}
|
||||||
|
config.TRAINING_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
original_name = re.sub(r"^\d+_", "", source.name)
|
||||||
|
dest = config.TRAINING_DIR / original_name
|
||||||
|
shutil.copy2(str(source), str(dest))
|
||||||
|
|
||||||
|
# Extract text
|
||||||
|
_progress[task_id] = {"status": "processing", "filename": req.filename, "step": "extracting"}
|
||||||
|
text, page_count = await extractor.extract_text(str(dest))
|
||||||
|
|
||||||
|
# Parse date
|
||||||
|
d_date = None
|
||||||
|
if req.decision_date:
|
||||||
|
d_date = date_type.fromisoformat(req.decision_date)
|
||||||
|
|
||||||
|
# Add to style corpus
|
||||||
|
_progress[task_id] = {"status": "processing", "filename": req.filename, "step": "corpus"}
|
||||||
|
corpus_id = await db.add_to_style_corpus(
|
||||||
|
document_id=None,
|
||||||
|
decision_number=req.decision_number,
|
||||||
|
decision_date=d_date,
|
||||||
|
subject_categories=req.subject_categories,
|
||||||
|
full_text=text,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Chunk and embed
|
||||||
|
_progress[task_id] = {"status": "processing", "filename": req.filename, "step": "chunking"}
|
||||||
|
chunks = chunker.chunk_document(text)
|
||||||
|
|
||||||
|
chunk_count = 0
|
||||||
|
if chunks:
|
||||||
|
doc = await db.create_document(
|
||||||
|
case_id=None,
|
||||||
|
doc_type="decision",
|
||||||
|
title=f"[קורפוס] {title}",
|
||||||
|
file_path=str(dest),
|
||||||
|
page_count=page_count,
|
||||||
|
)
|
||||||
|
doc_id = UUID(doc["id"])
|
||||||
|
await db.update_document(doc_id, extracted_text=text, extraction_status="completed")
|
||||||
|
|
||||||
|
_progress[task_id] = {"status": "processing", "filename": req.filename, "step": "embedding"}
|
||||||
|
texts = [c.content for c in chunks]
|
||||||
|
embs = await embeddings.embed_texts(texts, input_type="document")
|
||||||
|
|
||||||
|
chunk_dicts = [
|
||||||
|
{
|
||||||
|
"content": c.content,
|
||||||
|
"section_type": c.section_type,
|
||||||
|
"embedding": emb,
|
||||||
|
"page_number": c.page_number,
|
||||||
|
"chunk_index": c.chunk_index,
|
||||||
|
}
|
||||||
|
for c, emb in zip(chunks, embs)
|
||||||
|
]
|
||||||
|
await db.store_chunks(doc_id, None, chunk_dicts)
|
||||||
|
chunk_count = len(chunks)
|
||||||
|
|
||||||
|
# Remove from uploads
|
||||||
|
source.unlink(missing_ok=True)
|
||||||
|
|
||||||
|
_progress[task_id] = {
|
||||||
|
"status": "completed",
|
||||||
|
"filename": req.filename,
|
||||||
|
"result": {
|
||||||
|
"corpus_id": str(corpus_id),
|
||||||
|
"title": title,
|
||||||
|
"pages": page_count,
|
||||||
|
"text_length": len(text),
|
||||||
|
"chunks": chunk_count,
|
||||||
|
},
|
||||||
|
}
|
||||||
571
web/static/index.html
Normal file
571
web/static/index.html
Normal file
@@ -0,0 +1,571 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="he" dir="rtl">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>עוזר משפטי — העלאת מסמכים</title>
|
||||||
|
<style>
|
||||||
|
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||||
|
body {
|
||||||
|
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
||||||
|
background: #f5f6fa;
|
||||||
|
color: #2d3436;
|
||||||
|
direction: rtl;
|
||||||
|
min-height: 100vh;
|
||||||
|
}
|
||||||
|
.container { max-width: 900px; margin: 0 auto; padding: 20px; }
|
||||||
|
header {
|
||||||
|
background: #2d3436;
|
||||||
|
color: white;
|
||||||
|
padding: 20px;
|
||||||
|
text-align: center;
|
||||||
|
margin-bottom: 24px;
|
||||||
|
border-radius: 8px;
|
||||||
|
}
|
||||||
|
header h1 { font-size: 1.5em; font-weight: 600; }
|
||||||
|
header p { opacity: 0.7; margin-top: 4px; font-size: 0.9em; }
|
||||||
|
|
||||||
|
/* Upload Zone */
|
||||||
|
.upload-zone {
|
||||||
|
border: 2px dashed #b2bec3;
|
||||||
|
border-radius: 12px;
|
||||||
|
padding: 40px;
|
||||||
|
text-align: center;
|
||||||
|
background: white;
|
||||||
|
cursor: pointer;
|
||||||
|
transition: all 0.2s;
|
||||||
|
margin-bottom: 24px;
|
||||||
|
}
|
||||||
|
.upload-zone:hover, .upload-zone.dragover {
|
||||||
|
border-color: #0984e3;
|
||||||
|
background: #f0f7ff;
|
||||||
|
}
|
||||||
|
.upload-zone h2 { color: #636e72; font-size: 1.1em; margin-bottom: 8px; }
|
||||||
|
.upload-zone p { color: #b2bec3; font-size: 0.85em; }
|
||||||
|
.upload-zone input[type="file"] { display: none; }
|
||||||
|
.upload-progress {
|
||||||
|
margin-top: 12px;
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
.upload-progress .bar {
|
||||||
|
height: 4px;
|
||||||
|
background: #dfe6e9;
|
||||||
|
border-radius: 2px;
|
||||||
|
overflow: hidden;
|
||||||
|
}
|
||||||
|
.upload-progress .bar-fill {
|
||||||
|
height: 100%;
|
||||||
|
background: #0984e3;
|
||||||
|
width: 0;
|
||||||
|
transition: width 0.3s;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Cards */
|
||||||
|
.card {
|
||||||
|
background: white;
|
||||||
|
border-radius: 8px;
|
||||||
|
box-shadow: 0 1px 3px rgba(0,0,0,0.08);
|
||||||
|
padding: 20px;
|
||||||
|
margin-bottom: 16px;
|
||||||
|
}
|
||||||
|
.card h3 {
|
||||||
|
font-size: 1em;
|
||||||
|
margin-bottom: 12px;
|
||||||
|
color: #2d3436;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Pending Files */
|
||||||
|
.pending-file {
|
||||||
|
border: 1px solid #dfe6e9;
|
||||||
|
border-radius: 8px;
|
||||||
|
padding: 16px;
|
||||||
|
margin-bottom: 12px;
|
||||||
|
background: #fafafa;
|
||||||
|
}
|
||||||
|
.pending-file .file-info {
|
||||||
|
display: flex;
|
||||||
|
justify-content: space-between;
|
||||||
|
align-items: center;
|
||||||
|
margin-bottom: 12px;
|
||||||
|
}
|
||||||
|
.pending-file .file-name {
|
||||||
|
font-weight: 600;
|
||||||
|
font-size: 0.95em;
|
||||||
|
word-break: break-all;
|
||||||
|
}
|
||||||
|
.pending-file .file-size {
|
||||||
|
color: #636e72;
|
||||||
|
font-size: 0.8em;
|
||||||
|
white-space: nowrap;
|
||||||
|
margin-right: 12px;
|
||||||
|
}
|
||||||
|
.pending-file .file-actions {
|
||||||
|
display: flex;
|
||||||
|
gap: 8px;
|
||||||
|
align-items: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Form Elements */
|
||||||
|
.form-row {
|
||||||
|
display: flex;
|
||||||
|
gap: 12px;
|
||||||
|
flex-wrap: wrap;
|
||||||
|
align-items: end;
|
||||||
|
margin-bottom: 10px;
|
||||||
|
}
|
||||||
|
.form-group { flex: 1; min-width: 150px; }
|
||||||
|
.form-group label {
|
||||||
|
display: block;
|
||||||
|
font-size: 0.8em;
|
||||||
|
color: #636e72;
|
||||||
|
margin-bottom: 4px;
|
||||||
|
}
|
||||||
|
.form-group select, .form-group input {
|
||||||
|
width: 100%;
|
||||||
|
padding: 8px 10px;
|
||||||
|
border: 1px solid #dfe6e9;
|
||||||
|
border-radius: 6px;
|
||||||
|
font-size: 0.9em;
|
||||||
|
font-family: inherit;
|
||||||
|
direction: rtl;
|
||||||
|
}
|
||||||
|
.radio-group {
|
||||||
|
display: flex;
|
||||||
|
gap: 16px;
|
||||||
|
margin-bottom: 10px;
|
||||||
|
}
|
||||||
|
.radio-group label {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 6px;
|
||||||
|
font-size: 0.9em;
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
.conditional { display: none; }
|
||||||
|
.conditional.active { display: block; }
|
||||||
|
|
||||||
|
/* Buttons */
|
||||||
|
.btn {
|
||||||
|
padding: 8px 20px;
|
||||||
|
border: none;
|
||||||
|
border-radius: 6px;
|
||||||
|
font-size: 0.9em;
|
||||||
|
font-family: inherit;
|
||||||
|
cursor: pointer;
|
||||||
|
transition: all 0.15s;
|
||||||
|
}
|
||||||
|
.btn-primary { background: #0984e3; color: white; }
|
||||||
|
.btn-primary:hover { background: #0770c2; }
|
||||||
|
.btn-danger { background: #d63031; color: white; }
|
||||||
|
.btn-danger:hover { background: #b71c1c; }
|
||||||
|
.btn-sm { padding: 5px 12px; font-size: 0.8em; }
|
||||||
|
.btn:disabled { opacity: 0.5; cursor: not-allowed; }
|
||||||
|
|
||||||
|
/* Processing Tasks */
|
||||||
|
.task-item {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 12px;
|
||||||
|
padding: 12px;
|
||||||
|
border: 1px solid #dfe6e9;
|
||||||
|
border-radius: 8px;
|
||||||
|
margin-bottom: 8px;
|
||||||
|
}
|
||||||
|
.task-item .spinner {
|
||||||
|
width: 20px;
|
||||||
|
height: 20px;
|
||||||
|
border: 2px solid #dfe6e9;
|
||||||
|
border-top-color: #0984e3;
|
||||||
|
border-radius: 50%;
|
||||||
|
animation: spin 0.8s linear infinite;
|
||||||
|
}
|
||||||
|
.task-item.done .spinner {
|
||||||
|
border-color: #00b894;
|
||||||
|
border-top-color: #00b894;
|
||||||
|
animation: none;
|
||||||
|
}
|
||||||
|
.task-item.failed .spinner {
|
||||||
|
border-color: #d63031;
|
||||||
|
border-top-color: #d63031;
|
||||||
|
animation: none;
|
||||||
|
}
|
||||||
|
@keyframes spin { to { transform: rotate(360deg); } }
|
||||||
|
.task-info { flex: 1; }
|
||||||
|
.task-info .task-name { font-size: 0.9em; font-weight: 500; }
|
||||||
|
.task-info .task-status { font-size: 0.8em; color: #636e72; }
|
||||||
|
|
||||||
|
/* Subject checkboxes */
|
||||||
|
.subject-grid {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: repeat(3, 1fr);
|
||||||
|
gap: 6px;
|
||||||
|
}
|
||||||
|
.subject-grid label {
|
||||||
|
font-size: 0.8em;
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 4px;
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Empty state */
|
||||||
|
.empty { text-align: center; color: #b2bec3; padding: 24px; font-size: 0.9em; }
|
||||||
|
|
||||||
|
/* Toast */
|
||||||
|
.toast {
|
||||||
|
position: fixed;
|
||||||
|
bottom: 20px;
|
||||||
|
left: 50%;
|
||||||
|
transform: translateX(-50%);
|
||||||
|
background: #2d3436;
|
||||||
|
color: white;
|
||||||
|
padding: 12px 24px;
|
||||||
|
border-radius: 8px;
|
||||||
|
font-size: 0.9em;
|
||||||
|
z-index: 1000;
|
||||||
|
opacity: 0;
|
||||||
|
transition: opacity 0.3s;
|
||||||
|
}
|
||||||
|
.toast.show { opacity: 1; }
|
||||||
|
.toast.error { background: #d63031; }
|
||||||
|
.toast.success { background: #00b894; }
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div class="container">
|
||||||
|
<header>
|
||||||
|
<h1>עוזר משפטי — העלאת מסמכים</h1>
|
||||||
|
<p>העלאה, סיווג ועיבוד מסמכים משפטיים</p>
|
||||||
|
</header>
|
||||||
|
|
||||||
|
<!-- Upload Zone -->
|
||||||
|
<div class="upload-zone" id="dropZone">
|
||||||
|
<h2>גרור קבצים לכאן או לחץ לבחירה</h2>
|
||||||
|
<p>PDF, DOCX, RTF, TXT — עד 50MB</p>
|
||||||
|
<input type="file" id="fileInput" multiple accept=".pdf,.docx,.rtf,.txt">
|
||||||
|
<div class="upload-progress" id="uploadProgress">
|
||||||
|
<div class="bar"><div class="bar-fill" id="uploadBar"></div></div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Pending Files -->
|
||||||
|
<div class="card" id="pendingCard" style="display:none">
|
||||||
|
<h3>קבצים ממתינים לסיווג</h3>
|
||||||
|
<div id="pendingList"></div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Processing Tasks -->
|
||||||
|
<div class="card" id="tasksCard" style="display:none">
|
||||||
|
<h3>עיבוד</h3>
|
||||||
|
<div id="tasksList"></div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="toast" id="toast"></div>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
const API = '/api';
|
||||||
|
|
||||||
|
// ── Upload Zone ───────────────────────────────────────────────
|
||||||
|
const dropZone = document.getElementById('dropZone');
|
||||||
|
const fileInput = document.getElementById('fileInput');
|
||||||
|
const uploadProgress = document.getElementById('uploadProgress');
|
||||||
|
const uploadBar = document.getElementById('uploadBar');
|
||||||
|
|
||||||
|
dropZone.addEventListener('click', () => fileInput.click());
|
||||||
|
dropZone.addEventListener('dragover', e => { e.preventDefault(); dropZone.classList.add('dragover'); });
|
||||||
|
dropZone.addEventListener('dragleave', () => dropZone.classList.remove('dragover'));
|
||||||
|
dropZone.addEventListener('drop', e => {
|
||||||
|
e.preventDefault();
|
||||||
|
dropZone.classList.remove('dragover');
|
||||||
|
handleFiles(e.dataTransfer.files);
|
||||||
|
});
|
||||||
|
fileInput.addEventListener('change', () => {
|
||||||
|
if (fileInput.files.length) handleFiles(fileInput.files);
|
||||||
|
fileInput.value = '';
|
||||||
|
});
|
||||||
|
|
||||||
|
async function handleFiles(files) {
|
||||||
|
for (const file of files) {
|
||||||
|
await uploadFile(file);
|
||||||
|
}
|
||||||
|
loadPending();
|
||||||
|
}
|
||||||
|
|
||||||
|
function uploadFile(file) {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
const formData = new FormData();
|
||||||
|
formData.append('file', file);
|
||||||
|
|
||||||
|
const xhr = new XMLHttpRequest();
|
||||||
|
uploadProgress.style.display = 'block';
|
||||||
|
|
||||||
|
xhr.upload.onprogress = e => {
|
||||||
|
if (e.lengthComputable) {
|
||||||
|
uploadBar.style.width = (e.loaded / e.total * 100) + '%';
|
||||||
|
}
|
||||||
|
};
|
||||||
|
xhr.onload = () => {
|
||||||
|
uploadProgress.style.display = 'none';
|
||||||
|
uploadBar.style.width = '0';
|
||||||
|
if (xhr.status === 200) {
|
||||||
|
toast('הקובץ הועלה בהצלחה', 'success');
|
||||||
|
resolve();
|
||||||
|
} else {
|
||||||
|
const err = JSON.parse(xhr.responseText);
|
||||||
|
toast(err.detail || 'שגיאה בהעלאה', 'error');
|
||||||
|
reject();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
xhr.onerror = () => {
|
||||||
|
uploadProgress.style.display = 'none';
|
||||||
|
toast('שגיאת רשת', 'error');
|
||||||
|
reject();
|
||||||
|
};
|
||||||
|
xhr.open('POST', API + '/upload');
|
||||||
|
xhr.send(formData);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Pending Files ─────────────────────────────────────────────
|
||||||
|
let cases = [];
|
||||||
|
|
||||||
|
async function loadCases() {
|
||||||
|
try {
|
||||||
|
const res = await fetch(API + '/cases');
|
||||||
|
cases = await res.json();
|
||||||
|
} catch (e) {
|
||||||
|
cases = [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function loadPending() {
|
||||||
|
const res = await fetch(API + '/uploads');
|
||||||
|
const files = await res.json();
|
||||||
|
const card = document.getElementById('pendingCard');
|
||||||
|
const list = document.getElementById('pendingList');
|
||||||
|
|
||||||
|
if (!files.length) {
|
||||||
|
card.style.display = 'none';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
card.style.display = 'block';
|
||||||
|
await loadCases();
|
||||||
|
|
||||||
|
list.innerHTML = files.map(f => `
|
||||||
|
<div class="pending-file" data-filename="${esc(f.filename)}">
|
||||||
|
<div class="file-info">
|
||||||
|
<span class="file-name">${esc(f.filename.replace(/^\d+_/, ''))}</span>
|
||||||
|
<div class="file-actions">
|
||||||
|
<span class="file-size">${formatSize(f.size)}</span>
|
||||||
|
<button class="btn btn-danger btn-sm" onclick="deleteFile('${esc(f.filename)}')">מחק</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="radio-group">
|
||||||
|
<label><input type="radio" name="cat_${esc(f.filename)}" value="training" onchange="showFields(this)"> החלטה קודמת (אימון)</label>
|
||||||
|
<label><input type="radio" name="cat_${esc(f.filename)}" value="case" onchange="showFields(this)"> מסמך תיק</label>
|
||||||
|
</div>
|
||||||
|
<div class="conditional case-fields" id="case_${esc(f.filename)}">
|
||||||
|
<div class="form-row">
|
||||||
|
<div class="form-group">
|
||||||
|
<label>תיק</label>
|
||||||
|
<select class="case-select">
|
||||||
|
<option value="">בחר תיק...</option>
|
||||||
|
${cases.map(c => `<option value="${esc(c.case_number)}">${esc(c.case_number)} — ${esc(c.title)}</option>`).join('')}
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
<label>סוג מסמך</label>
|
||||||
|
<select class="doctype-select">
|
||||||
|
<option value="appeal">כתב ערר</option>
|
||||||
|
<option value="response">תשובה</option>
|
||||||
|
<option value="decision">החלטה</option>
|
||||||
|
<option value="exhibit">נספח</option>
|
||||||
|
<option value="reference">מסמך עזר</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="conditional training-fields" id="train_${esc(f.filename)}">
|
||||||
|
<div class="form-row">
|
||||||
|
<div class="form-group">
|
||||||
|
<label>מספר החלטה</label>
|
||||||
|
<input type="text" class="decision-number" placeholder="לדוגמה: 1180+1181">
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
<label>תאריך החלטה</label>
|
||||||
|
<input type="date" class="decision-date">
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="form-group" style="margin-top:8px">
|
||||||
|
<label>קטגוריות</label>
|
||||||
|
<div class="subject-grid">
|
||||||
|
${['בנייה','שימוש חורג','תכנית','היתר','הקלה','חלוקה','תמ"א 38','היטל השבחה','פיצויים 197'].map(s =>
|
||||||
|
`<label><input type="checkbox" value="${s}"> ${s}</label>`
|
||||||
|
).join('')}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div style="margin-top:12px">
|
||||||
|
<div class="form-group" style="max-width:300px;margin-bottom:8px">
|
||||||
|
<label>כותרת (אופציונלי)</label>
|
||||||
|
<input type="text" class="doc-title" placeholder="שם המסמך">
|
||||||
|
</div>
|
||||||
|
<button class="btn btn-primary process-btn" onclick="classifyFile('${esc(f.filename)}')" disabled>עבד</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
`).join('');
|
||||||
|
}
|
||||||
|
|
||||||
|
function showFields(radio) {
|
||||||
|
const container = radio.closest('.pending-file');
|
||||||
|
const filename = container.dataset.filename;
|
||||||
|
const val = radio.value;
|
||||||
|
|
||||||
|
container.querySelector('.case-fields').classList.toggle('active', val === 'case');
|
||||||
|
container.querySelector('.training-fields').classList.toggle('active', val === 'training');
|
||||||
|
container.querySelector('.process-btn').disabled = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function deleteFile(filename) {
|
||||||
|
await fetch(API + '/uploads/' + encodeURIComponent(filename), { method: 'DELETE' });
|
||||||
|
loadPending();
|
||||||
|
toast('הקובץ נמחק');
|
||||||
|
}
|
||||||
|
|
||||||
|
async function classifyFile(filename) {
|
||||||
|
const container = document.querySelector(`.pending-file[data-filename="${filename}"]`);
|
||||||
|
const category = container.querySelector('input[type="radio"]:checked')?.value;
|
||||||
|
if (!category) return toast('יש לבחור סיווג', 'error');
|
||||||
|
|
||||||
|
const body = {
|
||||||
|
filename,
|
||||||
|
category,
|
||||||
|
title: container.querySelector('.doc-title').value,
|
||||||
|
};
|
||||||
|
|
||||||
|
if (category === 'case') {
|
||||||
|
body.case_number = container.querySelector('.case-select').value;
|
||||||
|
body.doc_type = container.querySelector('.doctype-select').value;
|
||||||
|
if (!body.case_number) return toast('יש לבחור תיק', 'error');
|
||||||
|
} else {
|
||||||
|
body.decision_number = container.querySelector('.decision-number').value;
|
||||||
|
body.decision_date = container.querySelector('.decision-date').value;
|
||||||
|
body.subject_categories = Array.from(container.querySelectorAll('.subject-grid input:checked')).map(cb => cb.value);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Disable button
|
||||||
|
container.querySelector('.process-btn').disabled = true;
|
||||||
|
container.querySelector('.process-btn').textContent = 'מעבד...';
|
||||||
|
|
||||||
|
try {
|
||||||
|
const res = await fetch(API + '/classify', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify(body),
|
||||||
|
});
|
||||||
|
if (!res.ok) {
|
||||||
|
const err = await res.json();
|
||||||
|
toast(err.detail || 'שגיאה', 'error');
|
||||||
|
container.querySelector('.process-btn').disabled = false;
|
||||||
|
container.querySelector('.process-btn').textContent = 'עבד';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const data = await res.json();
|
||||||
|
trackTask(data.task_id, filename.replace(/^\d+_/, ''));
|
||||||
|
// Remove from pending after a short delay
|
||||||
|
setTimeout(loadPending, 500);
|
||||||
|
} catch (e) {
|
||||||
|
toast('שגיאת רשת', 'error');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Task Tracking ─────────────────────────────────────────────
|
||||||
|
const activeTasks = new Map();
|
||||||
|
|
||||||
|
function trackTask(taskId, displayName) {
|
||||||
|
const card = document.getElementById('tasksCard');
|
||||||
|
const list = document.getElementById('tasksList');
|
||||||
|
card.style.display = 'block';
|
||||||
|
|
||||||
|
const STATUS_LABELS = {
|
||||||
|
queued: 'בתור...',
|
||||||
|
validating: 'מאמת...',
|
||||||
|
copying: 'מעתיק...',
|
||||||
|
registering: 'רושם...',
|
||||||
|
processing: 'מעבד...',
|
||||||
|
completed: 'הושלם',
|
||||||
|
failed: 'נכשל',
|
||||||
|
};
|
||||||
|
|
||||||
|
const STEP_LABELS = {
|
||||||
|
extracting: 'מחלץ טקסט',
|
||||||
|
corpus: 'מוסיף לקורפוס',
|
||||||
|
chunking: 'מפצל לקטעים',
|
||||||
|
embedding: 'יוצר embeddings',
|
||||||
|
};
|
||||||
|
|
||||||
|
const div = document.createElement('div');
|
||||||
|
div.className = 'task-item';
|
||||||
|
div.id = 'task_' + taskId;
|
||||||
|
div.innerHTML = `
|
||||||
|
<div class="spinner"></div>
|
||||||
|
<div class="task-info">
|
||||||
|
<div class="task-name">${esc(displayName)}</div>
|
||||||
|
<div class="task-status">בתור...</div>
|
||||||
|
</div>
|
||||||
|
`;
|
||||||
|
list.prepend(div);
|
||||||
|
|
||||||
|
const es = new EventSource(API + '/progress/' + taskId);
|
||||||
|
es.onmessage = e => {
|
||||||
|
const data = JSON.parse(e.data);
|
||||||
|
const statusEl = div.querySelector('.task-status');
|
||||||
|
let label = STATUS_LABELS[data.status] || data.status;
|
||||||
|
if (data.step) label += ' — ' + (STEP_LABELS[data.step] || data.step);
|
||||||
|
statusEl.textContent = label;
|
||||||
|
|
||||||
|
if (data.status === 'completed') {
|
||||||
|
div.classList.add('done');
|
||||||
|
es.close();
|
||||||
|
const r = data.result || {};
|
||||||
|
if (r.chunks !== undefined) {
|
||||||
|
statusEl.textContent = `הושלם — ${r.chunks} קטעים, ${r.pages || '?'} עמודים`;
|
||||||
|
}
|
||||||
|
toast('העיבוד הושלם: ' + esc(displayName), 'success');
|
||||||
|
} else if (data.status === 'failed') {
|
||||||
|
div.classList.add('failed');
|
||||||
|
es.close();
|
||||||
|
statusEl.textContent = 'נכשל: ' + (data.error || 'שגיאה לא ידועה');
|
||||||
|
toast('העיבוד נכשל', 'error');
|
||||||
|
}
|
||||||
|
};
|
||||||
|
es.onerror = () => { es.close(); };
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Helpers ───────────────────────────────────────────────────
|
||||||
|
function esc(s) {
|
||||||
|
const d = document.createElement('div');
|
||||||
|
d.textContent = s;
|
||||||
|
return d.innerHTML;
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatSize(bytes) {
|
||||||
|
if (bytes < 1024) return bytes + ' B';
|
||||||
|
if (bytes < 1024 * 1024) return (bytes / 1024).toFixed(1) + ' KB';
|
||||||
|
return (bytes / (1024 * 1024)).toFixed(1) + ' MB';
|
||||||
|
}
|
||||||
|
|
||||||
|
function toast(msg, type = '') {
|
||||||
|
const el = document.getElementById('toast');
|
||||||
|
el.textContent = msg;
|
||||||
|
el.className = 'toast show ' + type;
|
||||||
|
setTimeout(() => el.className = 'toast', 3000);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initial load
|
||||||
|
loadPending();
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
Reference in New Issue
Block a user