Initial commit: din-leumi MCP server + web app
MCP server with 7 tools for cataloging and searching National Insurance court decisions with pgvector semantic search. Web interface for upload, search, and browse.
This commit is contained in:
9
.gitignore
vendored
Normal file
9
.gitignore
vendored
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
__pycache__/
|
||||||
|
*.pyc
|
||||||
|
.venv/
|
||||||
|
*.egg-info/
|
||||||
|
dist/
|
||||||
|
build/
|
||||||
|
data/uploads/
|
||||||
|
data/decisions/
|
||||||
|
.env
|
||||||
14
.mcp.json
Normal file
14
.mcp.json
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
{
|
||||||
|
"mcpServers": {
|
||||||
|
"din-leumi": {
|
||||||
|
"type": "stdio",
|
||||||
|
"command": "/home/chaim/din-leumi/mcp-server/.venv/bin/python",
|
||||||
|
"args": ["-m", "din_leumi.server"],
|
||||||
|
"cwd": "/home/chaim/din-leumi/mcp-server",
|
||||||
|
"env": {
|
||||||
|
"DOTENV_PATH": "/home/chaim/.env",
|
||||||
|
"DIN_LEUMI_DATA_DIR": "/home/chaim/din-leumi/data"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
32
CLAUDE.md
Normal file
32
CLAUDE.md
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
# דין לאומי (Din Leumi)
|
||||||
|
|
||||||
|
מערכת לקטלוג וחיפוש סמנטי של פסקי דין בתחום ביטוח לאומי.
|
||||||
|
|
||||||
|
## כלי MCP זמינים
|
||||||
|
|
||||||
|
### ניהול פסקי דין
|
||||||
|
- `decision_upload` - העלאה ועיבוד פסק דין (PDF/DOCX/RTF/TXT) עם מטאדאטא
|
||||||
|
- `decision_get` - פרטי פסק דין מלאים כולל טקסט מחולץ
|
||||||
|
- `decision_list` - רשימת פסקי דין עם סינון (בית משפט, נושא, שופט, תאריכים, תוצאה)
|
||||||
|
- `decision_update` - עדכון מטאדאטא של פסק דין
|
||||||
|
- `decision_delete` - מחיקת פסק דין
|
||||||
|
|
||||||
|
### חיפוש
|
||||||
|
- `decision_search` - חיפוש סמנטי עם סינון מטאדאטא
|
||||||
|
|
||||||
|
### מערכת
|
||||||
|
- `system_status` - סטטיסטיקות מערכת
|
||||||
|
|
||||||
|
## נושאים נפוצים
|
||||||
|
נכות כללית, נכות מעבודה, תאונת עבודה, דמי לידה, דמי אבטלה, גמלת הבטחת הכנסה, גמלת ניידות, גמלת סיעוד, קצבת זקנה, קצבת שאירים
|
||||||
|
|
||||||
|
## תוצאות אפשריות
|
||||||
|
- `accepted` - התקבלה
|
||||||
|
- `rejected` - נדחתה
|
||||||
|
- `partial` - התקבלה חלקית
|
||||||
|
- `remanded` - הוחזרה לדיון מחדש
|
||||||
|
|
||||||
|
## תהליך עבודה
|
||||||
|
1. העלאת פסק דין: `decision_upload` עם נתיב לקובץ + מטאדאטא
|
||||||
|
2. חיפוש תקדימים: `decision_search` עם שאילתה + סינון אופציונלי
|
||||||
|
3. צפייה בפסק דין: `decision_get` לקריאת הטקסט המלא
|
||||||
26
Dockerfile
Normal file
26
Dockerfile
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
FROM python:3.12-slim
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# System deps for PyMuPDF and document processing
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
gcc libmupdf-dev libfreetype6-dev libharfbuzz-dev libjpeg62-turbo-dev \
|
||||||
|
libopenjp2-7-dev curl && rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Copy MCP server source (for importing services)
|
||||||
|
COPY mcp-server/pyproject.toml /app/mcp-server/pyproject.toml
|
||||||
|
COPY mcp-server/src/ /app/mcp-server/src/
|
||||||
|
|
||||||
|
# Install MCP server dependencies + web deps
|
||||||
|
RUN pip install --no-cache-dir /app/mcp-server && \
|
||||||
|
pip install --no-cache-dir fastapi uvicorn python-multipart
|
||||||
|
|
||||||
|
# Copy web app
|
||||||
|
COPY web/ /app/web/
|
||||||
|
|
||||||
|
ENV PYTHONPATH=/app/mcp-server/src
|
||||||
|
ENV DOTENV_PATH=/home/chaim/.env
|
||||||
|
|
||||||
|
EXPOSE 8081
|
||||||
|
|
||||||
|
CMD ["uvicorn", "web.app:app", "--host", "0.0.0.0", "--port", "8081"]
|
||||||
25
mcp-server/pyproject.toml
Normal file
25
mcp-server/pyproject.toml
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
[project]
|
||||||
|
name = "din-leumi"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = "MCP server for cataloging and searching National Insurance court decisions"
|
||||||
|
requires-python = ">=3.10"
|
||||||
|
dependencies = [
|
||||||
|
"mcp[cli]>=1.0.0",
|
||||||
|
"asyncpg>=0.29.0",
|
||||||
|
"pgvector>=0.3.0",
|
||||||
|
"voyageai>=0.3.0",
|
||||||
|
"anthropic>=0.40.0",
|
||||||
|
"python-dotenv>=1.0.0",
|
||||||
|
"pydantic>=2.0.0",
|
||||||
|
"pymupdf>=1.25.0",
|
||||||
|
"python-docx>=1.1.0",
|
||||||
|
"striprtf>=0.0.26",
|
||||||
|
"pillow>=10.0.0",
|
||||||
|
]
|
||||||
|
|
||||||
|
[build-system]
|
||||||
|
requires = ["setuptools>=68.0"]
|
||||||
|
build-backend = "setuptools.build_meta"
|
||||||
|
|
||||||
|
[tool.setuptools.packages.find]
|
||||||
|
where = ["src"]
|
||||||
0
mcp-server/src/din_leumi/__init__.py
Normal file
0
mcp-server/src/din_leumi/__init__.py
Normal file
4
mcp-server/src/din_leumi/__main__.py
Normal file
4
mcp-server/src/din_leumi/__main__.py
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
"""Allow running with: python -m din_leumi"""
|
||||||
|
from din_leumi.server import main
|
||||||
|
|
||||||
|
main()
|
||||||
36
mcp-server/src/din_leumi/config.py
Normal file
36
mcp-server/src/din_leumi/config.py
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
"""Configuration loaded from central .env file."""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
# Load from central .env or override path
|
||||||
|
dotenv_path = os.environ.get("DOTENV_PATH", str(Path.home() / ".env"))
|
||||||
|
load_dotenv(dotenv_path)
|
||||||
|
|
||||||
|
# PostgreSQL - uses shared server but separate database
|
||||||
|
POSTGRES_URL = os.environ.get(
|
||||||
|
"DIN_LEUMI_POSTGRES_URL",
|
||||||
|
f"postgres://{os.environ.get('POSTGRES_USER', 'legal_ai')}:"
|
||||||
|
f"{os.environ.get('POSTGRES_PASSWORD', '')}@"
|
||||||
|
f"{os.environ.get('POSTGRES_HOST', '127.0.0.1')}:"
|
||||||
|
f"{os.environ.get('POSTGRES_PORT', '5433')}/"
|
||||||
|
f"{os.environ.get('DIN_LEUMI_POSTGRES_DB', 'din_leumi')}",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Voyage AI
|
||||||
|
VOYAGE_API_KEY = os.environ.get("VOYAGE_API_KEY", "")
|
||||||
|
VOYAGE_MODEL = "voyage-3-large"
|
||||||
|
VOYAGE_DIMENSIONS = 1024
|
||||||
|
|
||||||
|
# Anthropic (for Claude Vision OCR)
|
||||||
|
ANTHROPIC_API_KEY = os.environ.get("ANTHROPIC_API_KEY", "")
|
||||||
|
|
||||||
|
# Data directory
|
||||||
|
DATA_DIR = Path(os.environ.get("DIN_LEUMI_DATA_DIR", str(Path.home() / "din-leumi" / "data")))
|
||||||
|
DECISIONS_DIR = DATA_DIR / "decisions"
|
||||||
|
|
||||||
|
# Chunking parameters
|
||||||
|
CHUNK_SIZE_TOKENS = 600
|
||||||
|
CHUNK_OVERLAP_TOKENS = 100
|
||||||
156
mcp-server/src/din_leumi/server.py
Normal file
156
mcp-server/src/din_leumi/server.py
Normal file
@@ -0,0 +1,156 @@
|
|||||||
|
"""Din Leumi - MCP Server entry point.
|
||||||
|
|
||||||
|
Run with: python -m din_leumi.server
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import sys
|
||||||
|
from collections.abc import AsyncIterator
|
||||||
|
from contextlib import asynccontextmanager
|
||||||
|
|
||||||
|
from mcp.server.fastmcp import FastMCP
|
||||||
|
|
||||||
|
# Configure logging to stderr (stdout is reserved for JSON-RPC)
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format="%(asctime)s [%(name)s] %(levelname)s: %(message)s",
|
||||||
|
stream=sys.stderr,
|
||||||
|
)
|
||||||
|
logger = logging.getLogger("din_leumi")
|
||||||
|
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def lifespan(server: FastMCP) -> AsyncIterator[None]:
|
||||||
|
"""Initialize DB schema on startup, close pool on shutdown."""
|
||||||
|
from din_leumi.services.db import close_pool, init_schema
|
||||||
|
|
||||||
|
logger.info("Initializing database schema...")
|
||||||
|
await init_schema()
|
||||||
|
logger.info("Din Leumi MCP server ready")
|
||||||
|
try:
|
||||||
|
yield
|
||||||
|
finally:
|
||||||
|
await close_pool()
|
||||||
|
logger.info("Din Leumi MCP server stopped")
|
||||||
|
|
||||||
|
|
||||||
|
# Create MCP server
|
||||||
|
mcp = FastMCP(
|
||||||
|
"Din Leumi - דין לאומי",
|
||||||
|
instructions="מערכת לקטלוג וחיפוש סמנטי של פסקי דין בתחום ביטוח לאומי",
|
||||||
|
lifespan=lifespan,
|
||||||
|
)
|
||||||
|
|
||||||
|
# ── Import tool modules ────────────────────────────────────────────
|
||||||
|
|
||||||
|
from din_leumi.tools import decisions, search # noqa: E402
|
||||||
|
|
||||||
|
|
||||||
|
# ── Decision management ────────────────────────────────────────────
|
||||||
|
|
||||||
|
@mcp.tool()
|
||||||
|
async def decision_upload(
|
||||||
|
file_path: str,
|
||||||
|
title: str = "",
|
||||||
|
court: str = "",
|
||||||
|
decision_date: str = "",
|
||||||
|
case_number: str = "",
|
||||||
|
judge: str = "",
|
||||||
|
parties_appellant: str = "",
|
||||||
|
parties_respondent: str = "המוסד לביטוח לאומי",
|
||||||
|
topics: list[str] | None = None,
|
||||||
|
outcome: str = "",
|
||||||
|
) -> str:
|
||||||
|
"""העלאה ועיבוד פסק דין של בית דין לעבודה בתחום ביטוח לאומי (PDF/DOCX/RTF/TXT).
|
||||||
|
מחלץ טקסט, יוצר chunks ו-embeddings לחיפוש סמנטי.
|
||||||
|
outcome: accepted/rejected/partial/remanded."""
|
||||||
|
return await decisions.decision_upload(
|
||||||
|
file_path, title, court, decision_date, case_number,
|
||||||
|
judge, parties_appellant, parties_respondent, topics, outcome,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@mcp.tool()
|
||||||
|
async def decision_search(
|
||||||
|
query: str,
|
||||||
|
limit: int = 10,
|
||||||
|
court: str = "",
|
||||||
|
topic: str = "",
|
||||||
|
date_from: str = "",
|
||||||
|
date_to: str = "",
|
||||||
|
outcome: str = "",
|
||||||
|
) -> str:
|
||||||
|
"""חיפוש סמנטי בפסקי דין של ביטוח לאומי.
|
||||||
|
ניתן לסנן לפי בית משפט, נושא, טווח תאריכים ותוצאה.
|
||||||
|
נושאים נפוצים: נכות כללית, נכות מעבודה, דמי לידה, תאונת עבודה, גמלת הבטחת הכנסה, דמי אבטלה."""
|
||||||
|
return await search.decision_search(
|
||||||
|
query, limit, court, topic, date_from, date_to, outcome,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@mcp.tool()
|
||||||
|
async def decision_list(
|
||||||
|
court: str = "",
|
||||||
|
topic: str = "",
|
||||||
|
judge: str = "",
|
||||||
|
date_from: str = "",
|
||||||
|
date_to: str = "",
|
||||||
|
outcome: str = "",
|
||||||
|
limit: int = 50,
|
||||||
|
) -> str:
|
||||||
|
"""רשימת פסקי דין לפי מטאדאטא. סינון אופציונלי לפי בית משפט, נושא, שופט, תאריכים, תוצאה."""
|
||||||
|
return await decisions.decision_list(
|
||||||
|
court, topic, judge, date_from, date_to, outcome, limit,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@mcp.tool()
|
||||||
|
async def decision_get(
|
||||||
|
decision_id: str = "",
|
||||||
|
case_number: str = "",
|
||||||
|
) -> str:
|
||||||
|
"""פרטי פסק דין מלאים כולל טקסט מחולץ. חיפוש לפי decision_id או case_number."""
|
||||||
|
return await decisions.decision_get(decision_id, case_number)
|
||||||
|
|
||||||
|
|
||||||
|
@mcp.tool()
|
||||||
|
async def decision_update(
|
||||||
|
decision_id: str,
|
||||||
|
title: str = "",
|
||||||
|
court: str = "",
|
||||||
|
decision_date: str = "",
|
||||||
|
case_number: str = "",
|
||||||
|
judge: str = "",
|
||||||
|
parties_appellant: str = "",
|
||||||
|
parties_respondent: str = "",
|
||||||
|
topics: list[str] | None = None,
|
||||||
|
outcome: str = "",
|
||||||
|
summary: str = "",
|
||||||
|
) -> str:
|
||||||
|
"""עדכון מטאדאטא של פסק דין."""
|
||||||
|
return await decisions.decision_update(
|
||||||
|
decision_id, title, court, decision_date, case_number,
|
||||||
|
judge, parties_appellant, parties_respondent, topics, outcome, summary,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@mcp.tool()
|
||||||
|
async def decision_delete(decision_id: str) -> str:
|
||||||
|
"""מחיקת פסק דין וכל ה-chunks שלו מהמערכת."""
|
||||||
|
return await decisions.decision_delete(decision_id)
|
||||||
|
|
||||||
|
|
||||||
|
@mcp.tool()
|
||||||
|
async def system_status() -> str:
|
||||||
|
"""סטטוס מערכת דין לאומי - מספר פסקי דין, chunks, סטטיסטיקות."""
|
||||||
|
return await search.system_status()
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
mcp.run(transport="stdio")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
0
mcp-server/src/din_leumi/services/__init__.py
Normal file
0
mcp-server/src/din_leumi/services/__init__.py
Normal file
132
mcp-server/src/din_leumi/services/chunker.py
Normal file
132
mcp-server/src/din_leumi/services/chunker.py
Normal file
@@ -0,0 +1,132 @@
|
|||||||
|
"""Legal document chunker - splits text into sections and chunks for RAG.
|
||||||
|
|
||||||
|
Adapted for National Insurance (Bituach Leumi) court decisions.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
from din_leumi import config
|
||||||
|
|
||||||
|
# Hebrew legal section headers for NI court decisions
|
||||||
|
SECTION_PATTERNS = [
|
||||||
|
(r"רקע\s*עובדתי|רקע\s*כללי|העובדות|הרקע|עובדות\s*המקרה", "facts"),
|
||||||
|
(r"טענות\s*התובע[ת]?|טענות\s*המבוטח[ת]?|טענות\s*המערער[ת]?|עיקר\s*טענות\s*התובע", "claimant_claims"),
|
||||||
|
(r"טענות\s*הנתבע[ת]?|טענות\s*המוסד|עיקר\s*טענות\s*הנתבע|תשובת\s*המוסד", "respondent_claims"),
|
||||||
|
(r"דיון\s*והכרעה|דיון|הכרעה|ניתוח\s*משפטי|המסגרת\s*המשפטית|הכרעת\s*הדין", "legal_analysis"),
|
||||||
|
(r"מסקנ[הות]|סיכום", "conclusion"),
|
||||||
|
(r"סוף\s*דבר|לסיכום|התוצאה|אשר\s*על\s*כן", "ruling"),
|
||||||
|
(r"מבוא|פתיחה|לפניי|לפני[נו]?", "intro"),
|
||||||
|
(r"הדין\s*החל|המסגרת\s*הנורמטיבית|הוראות\s*החוק", "legal_framework"),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Chunk:
|
||||||
|
content: str
|
||||||
|
section_type: str = "other"
|
||||||
|
page_number: int | None = None
|
||||||
|
chunk_index: int = 0
|
||||||
|
|
||||||
|
|
||||||
|
def chunk_document(
|
||||||
|
text: str,
|
||||||
|
chunk_size: int = config.CHUNK_SIZE_TOKENS,
|
||||||
|
overlap: int = config.CHUNK_OVERLAP_TOKENS,
|
||||||
|
) -> list[Chunk]:
|
||||||
|
"""Split a legal document into chunks, respecting section boundaries."""
|
||||||
|
if not text.strip():
|
||||||
|
return []
|
||||||
|
|
||||||
|
sections = _split_into_sections(text)
|
||||||
|
chunks: list[Chunk] = []
|
||||||
|
idx = 0
|
||||||
|
|
||||||
|
for section_type, section_text in sections:
|
||||||
|
section_chunks = _split_section(section_text, chunk_size, overlap)
|
||||||
|
for chunk_text in section_chunks:
|
||||||
|
chunks.append(Chunk(
|
||||||
|
content=chunk_text,
|
||||||
|
section_type=section_type,
|
||||||
|
chunk_index=idx,
|
||||||
|
))
|
||||||
|
idx += 1
|
||||||
|
|
||||||
|
return chunks
|
||||||
|
|
||||||
|
|
||||||
|
def _split_into_sections(text: str) -> list[tuple[str, str]]:
|
||||||
|
"""Split text into (section_type, text) pairs based on Hebrew headers."""
|
||||||
|
markers: list[tuple[int, str]] = []
|
||||||
|
|
||||||
|
for pattern, section_type in SECTION_PATTERNS:
|
||||||
|
for match in re.finditer(pattern, text):
|
||||||
|
markers.append((match.start(), section_type))
|
||||||
|
|
||||||
|
if not markers:
|
||||||
|
return [("other", text)]
|
||||||
|
|
||||||
|
markers.sort(key=lambda x: x[0])
|
||||||
|
|
||||||
|
sections: list[tuple[str, str]] = []
|
||||||
|
|
||||||
|
# Text before first section
|
||||||
|
if markers[0][0] > 0:
|
||||||
|
intro_text = text[: markers[0][0]].strip()
|
||||||
|
if intro_text:
|
||||||
|
sections.append(("intro", intro_text))
|
||||||
|
|
||||||
|
# Each section
|
||||||
|
for i, (pos, section_type) in enumerate(markers):
|
||||||
|
end = markers[i + 1][0] if i + 1 < len(markers) else len(text)
|
||||||
|
section_text = text[pos:end].strip()
|
||||||
|
if section_text:
|
||||||
|
sections.append((section_type, section_text))
|
||||||
|
|
||||||
|
return sections
|
||||||
|
|
||||||
|
|
||||||
|
def _split_section(text: str, chunk_size: int, overlap: int) -> list[str]:
|
||||||
|
"""Split a section into overlapping chunks by paragraphs.
|
||||||
|
|
||||||
|
Uses approximate token counting (Hebrew ~1.5 chars per token).
|
||||||
|
"""
|
||||||
|
if not text.strip():
|
||||||
|
return []
|
||||||
|
|
||||||
|
paragraphs = [p.strip() for p in text.split("\n") if p.strip()]
|
||||||
|
chunks: list[str] = []
|
||||||
|
current: list[str] = []
|
||||||
|
current_tokens = 0
|
||||||
|
|
||||||
|
for para in paragraphs:
|
||||||
|
para_tokens = _estimate_tokens(para)
|
||||||
|
|
||||||
|
if current_tokens + para_tokens > chunk_size and current:
|
||||||
|
chunks.append("\n".join(current))
|
||||||
|
# Keep overlap
|
||||||
|
overlap_paras: list[str] = []
|
||||||
|
overlap_tokens = 0
|
||||||
|
for p in reversed(current):
|
||||||
|
pt = _estimate_tokens(p)
|
||||||
|
if overlap_tokens + pt > overlap:
|
||||||
|
break
|
||||||
|
overlap_paras.insert(0, p)
|
||||||
|
overlap_tokens += pt
|
||||||
|
current = overlap_paras
|
||||||
|
current_tokens = overlap_tokens
|
||||||
|
|
||||||
|
current.append(para)
|
||||||
|
current_tokens += para_tokens
|
||||||
|
|
||||||
|
if current:
|
||||||
|
chunks.append("\n".join(current))
|
||||||
|
|
||||||
|
return chunks
|
||||||
|
|
||||||
|
|
||||||
|
def _estimate_tokens(text: str) -> int:
|
||||||
|
"""Rough token estimate for Hebrew text (~1.5 chars per token)."""
|
||||||
|
return max(1, len(text) // 2)
|
||||||
374
mcp-server/src/din_leumi/services/db.py
Normal file
374
mcp-server/src/din_leumi/services/db.py
Normal file
@@ -0,0 +1,374 @@
|
|||||||
|
"""Database service - asyncpg connection pool and queries for din-leumi."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from datetime import date
|
||||||
|
from uuid import UUID, uuid4
|
||||||
|
|
||||||
|
import asyncpg
|
||||||
|
from pgvector.asyncpg import register_vector
|
||||||
|
|
||||||
|
from din_leumi import config
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_pool: asyncpg.Pool | None = None
|
||||||
|
|
||||||
|
|
||||||
|
async def get_pool() -> asyncpg.Pool:
|
||||||
|
global _pool
|
||||||
|
if _pool is None:
|
||||||
|
conn = await asyncpg.connect(config.POSTGRES_URL)
|
||||||
|
await conn.execute('CREATE EXTENSION IF NOT EXISTS vector')
|
||||||
|
await conn.execute('CREATE EXTENSION IF NOT EXISTS "uuid-ossp"')
|
||||||
|
await conn.close()
|
||||||
|
|
||||||
|
_pool = await asyncpg.create_pool(
|
||||||
|
config.POSTGRES_URL,
|
||||||
|
min_size=2,
|
||||||
|
max_size=10,
|
||||||
|
init=_init_connection,
|
||||||
|
)
|
||||||
|
return _pool
|
||||||
|
|
||||||
|
|
||||||
|
async def _init_connection(conn: asyncpg.Connection) -> None:
|
||||||
|
await register_vector(conn)
|
||||||
|
|
||||||
|
|
||||||
|
async def close_pool() -> None:
|
||||||
|
global _pool
|
||||||
|
if _pool:
|
||||||
|
await _pool.close()
|
||||||
|
_pool = None
|
||||||
|
|
||||||
|
|
||||||
|
# ── Schema ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
SCHEMA_SQL = """
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS decisions (
|
||||||
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||||
|
title TEXT NOT NULL,
|
||||||
|
file_path TEXT NOT NULL,
|
||||||
|
extracted_text TEXT DEFAULT '',
|
||||||
|
extraction_status TEXT DEFAULT 'pending',
|
||||||
|
|
||||||
|
court TEXT DEFAULT '',
|
||||||
|
decision_date DATE,
|
||||||
|
case_number TEXT DEFAULT '',
|
||||||
|
judge TEXT DEFAULT '',
|
||||||
|
parties_appellant TEXT DEFAULT '',
|
||||||
|
parties_respondent TEXT DEFAULT 'המוסד לביטוח לאומי',
|
||||||
|
topics JSONB DEFAULT '[]',
|
||||||
|
outcome TEXT DEFAULT '',
|
||||||
|
summary TEXT DEFAULT '',
|
||||||
|
|
||||||
|
page_count INTEGER,
|
||||||
|
created_at TIMESTAMPTZ DEFAULT now(),
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT now()
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS decision_chunks (
|
||||||
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||||
|
decision_id UUID REFERENCES decisions(id) ON DELETE CASCADE,
|
||||||
|
chunk_index INTEGER NOT NULL,
|
||||||
|
content TEXT NOT NULL,
|
||||||
|
section_type TEXT DEFAULT 'other',
|
||||||
|
embedding vector(1024),
|
||||||
|
page_number INTEGER,
|
||||||
|
created_at TIMESTAMPTZ DEFAULT now()
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_chunks_decision ON decision_chunks(decision_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_decisions_court ON decisions(court);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_decisions_date ON decisions(decision_date);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_decisions_case_number ON decisions(case_number);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_decisions_topics ON decisions USING gin(topics);
|
||||||
|
"""
|
||||||
|
|
||||||
|
# IVFFlat index requires data to exist; create after first batch of decisions
|
||||||
|
IVFFLAT_INDEX_SQL = """
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_chunks_embedding
|
||||||
|
ON decision_chunks USING ivfflat (embedding vector_cosine_ops)
|
||||||
|
WITH (lists = 100);
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
async def init_schema() -> None:
|
||||||
|
pool = await get_pool()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
await conn.execute(SCHEMA_SQL)
|
||||||
|
# Check if we have enough data for IVFFlat
|
||||||
|
count = await conn.fetchval("SELECT count(*) FROM decision_chunks")
|
||||||
|
if count and count > 100:
|
||||||
|
await conn.execute(IVFFLAT_INDEX_SQL)
|
||||||
|
logger.info("IVFFlat index created (%d chunks)", count)
|
||||||
|
logger.info("Database schema initialized")
|
||||||
|
|
||||||
|
|
||||||
|
async def ensure_ivfflat_index() -> None:
|
||||||
|
"""Create IVFFlat index if enough data exists."""
|
||||||
|
pool = await get_pool()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
count = await conn.fetchval("SELECT count(*) FROM decision_chunks")
|
||||||
|
if count and count > 100:
|
||||||
|
await conn.execute(IVFFLAT_INDEX_SQL)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Decision CRUD ───────────────────────────────────────────────────
|
||||||
|
|
||||||
|
async def create_decision(
|
||||||
|
title: str,
|
||||||
|
file_path: str,
|
||||||
|
court: str = "",
|
||||||
|
decision_date: date | None = None,
|
||||||
|
case_number: str = "",
|
||||||
|
judge: str = "",
|
||||||
|
parties_appellant: str = "",
|
||||||
|
parties_respondent: str = "המוסד לביטוח לאומי",
|
||||||
|
topics: list[str] | None = None,
|
||||||
|
outcome: str = "",
|
||||||
|
) -> dict:
|
||||||
|
pool = await get_pool()
|
||||||
|
decision_id = uuid4()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
await conn.execute(
|
||||||
|
"""INSERT INTO decisions (id, title, file_path, court, decision_date,
|
||||||
|
case_number, judge, parties_appellant, parties_respondent,
|
||||||
|
topics, outcome)
|
||||||
|
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)""",
|
||||||
|
decision_id, title, file_path, court, decision_date,
|
||||||
|
case_number, judge, parties_appellant, parties_respondent,
|
||||||
|
json.dumps(topics or []), outcome,
|
||||||
|
)
|
||||||
|
return await get_decision(decision_id)
|
||||||
|
|
||||||
|
|
||||||
|
async def get_decision(decision_id: UUID) -> dict | None:
|
||||||
|
pool = await get_pool()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
row = await conn.fetchrow("SELECT * FROM decisions WHERE id = $1", decision_id)
|
||||||
|
if row is None:
|
||||||
|
return None
|
||||||
|
return _row_to_decision(row)
|
||||||
|
|
||||||
|
|
||||||
|
async def get_decision_by_case_number(case_number: str) -> dict | None:
|
||||||
|
pool = await get_pool()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
row = await conn.fetchrow(
|
||||||
|
"SELECT * FROM decisions WHERE case_number = $1", case_number
|
||||||
|
)
|
||||||
|
if row is None:
|
||||||
|
return None
|
||||||
|
return _row_to_decision(row)
|
||||||
|
|
||||||
|
|
||||||
|
async def list_decisions(
|
||||||
|
court: str = "",
|
||||||
|
topic: str = "",
|
||||||
|
judge: str = "",
|
||||||
|
date_from: date | None = None,
|
||||||
|
date_to: date | None = None,
|
||||||
|
outcome: str = "",
|
||||||
|
limit: int = 50,
|
||||||
|
) -> list[dict]:
|
||||||
|
pool = await get_pool()
|
||||||
|
conditions = []
|
||||||
|
params: list = []
|
||||||
|
idx = 1
|
||||||
|
|
||||||
|
if court:
|
||||||
|
conditions.append(f"court ILIKE ${idx}")
|
||||||
|
params.append(f"%{court}%")
|
||||||
|
idx += 1
|
||||||
|
if topic:
|
||||||
|
conditions.append(f"topics @> ${idx}::jsonb")
|
||||||
|
params.append(json.dumps([topic]))
|
||||||
|
idx += 1
|
||||||
|
if judge:
|
||||||
|
conditions.append(f"judge ILIKE ${idx}")
|
||||||
|
params.append(f"%{judge}%")
|
||||||
|
idx += 1
|
||||||
|
if date_from:
|
||||||
|
conditions.append(f"decision_date >= ${idx}")
|
||||||
|
params.append(date_from)
|
||||||
|
idx += 1
|
||||||
|
if date_to:
|
||||||
|
conditions.append(f"decision_date <= ${idx}")
|
||||||
|
params.append(date_to)
|
||||||
|
idx += 1
|
||||||
|
if outcome:
|
||||||
|
conditions.append(f"outcome = ${idx}")
|
||||||
|
params.append(outcome)
|
||||||
|
idx += 1
|
||||||
|
|
||||||
|
where = f"WHERE {' AND '.join(conditions)}" if conditions else ""
|
||||||
|
params.append(limit)
|
||||||
|
|
||||||
|
sql = f"""
|
||||||
|
SELECT id, title, court, decision_date, case_number, judge,
|
||||||
|
parties_appellant, parties_respondent, topics, outcome, summary,
|
||||||
|
extraction_status, page_count, created_at
|
||||||
|
FROM decisions
|
||||||
|
{where}
|
||||||
|
ORDER BY decision_date DESC NULLS LAST, created_at DESC
|
||||||
|
LIMIT ${idx}
|
||||||
|
"""
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
rows = await conn.fetch(sql, *params)
|
||||||
|
return [_row_to_decision(r) for r in rows]
|
||||||
|
|
||||||
|
|
||||||
|
async def update_decision(decision_id: UUID, **fields) -> dict | None:
|
||||||
|
if not fields:
|
||||||
|
return await get_decision(decision_id)
|
||||||
|
pool = await get_pool()
|
||||||
|
set_clauses = []
|
||||||
|
values = []
|
||||||
|
for i, (key, val) in enumerate(fields.items(), start=2):
|
||||||
|
if key == "topics":
|
||||||
|
val = json.dumps(val)
|
||||||
|
set_clauses.append(f"{key} = ${i}")
|
||||||
|
values.append(val)
|
||||||
|
set_clauses.append("updated_at = now()")
|
||||||
|
sql = f"UPDATE decisions SET {', '.join(set_clauses)} WHERE id = $1"
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
await conn.execute(sql, decision_id, *values)
|
||||||
|
return await get_decision(decision_id)
|
||||||
|
|
||||||
|
|
||||||
|
async def delete_decision(decision_id: UUID) -> bool:
|
||||||
|
pool = await get_pool()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
result = await conn.execute("DELETE FROM decisions WHERE id = $1", decision_id)
|
||||||
|
return result == "DELETE 1"
|
||||||
|
|
||||||
|
|
||||||
|
def _row_to_decision(row: asyncpg.Record) -> dict:
|
||||||
|
d = dict(row)
|
||||||
|
if isinstance(d.get("topics"), str):
|
||||||
|
d["topics"] = json.loads(d["topics"])
|
||||||
|
for field in ("id",):
|
||||||
|
if field in d and d[field] is not None:
|
||||||
|
d[field] = str(d[field])
|
||||||
|
return d
|
||||||
|
|
||||||
|
|
||||||
|
# ── Chunks & Vectors ───────────────────────────────────────────────
|
||||||
|
|
||||||
|
async def store_chunks(
|
||||||
|
decision_id: UUID,
|
||||||
|
chunks: list[dict],
|
||||||
|
) -> int:
|
||||||
|
"""Store decision chunks with embeddings."""
|
||||||
|
pool = await get_pool()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
await conn.execute(
|
||||||
|
"DELETE FROM decision_chunks WHERE decision_id = $1", decision_id
|
||||||
|
)
|
||||||
|
for chunk in chunks:
|
||||||
|
await conn.execute(
|
||||||
|
"""INSERT INTO decision_chunks
|
||||||
|
(decision_id, chunk_index, content, section_type, embedding, page_number)
|
||||||
|
VALUES ($1, $2, $3, $4, $5, $6)""",
|
||||||
|
decision_id,
|
||||||
|
chunk["chunk_index"],
|
||||||
|
chunk["content"],
|
||||||
|
chunk.get("section_type", "other"),
|
||||||
|
chunk["embedding"],
|
||||||
|
chunk.get("page_number"),
|
||||||
|
)
|
||||||
|
return len(chunks)
|
||||||
|
|
||||||
|
|
||||||
|
async def search_similar(
|
||||||
|
query_embedding: list[float],
|
||||||
|
limit: int = 10,
|
||||||
|
court: str = "",
|
||||||
|
topic: str = "",
|
||||||
|
date_from: date | None = None,
|
||||||
|
date_to: date | None = None,
|
||||||
|
outcome: str = "",
|
||||||
|
) -> list[dict]:
|
||||||
|
"""Cosine similarity search on decision chunks with metadata filtering."""
|
||||||
|
pool = await get_pool()
|
||||||
|
conditions = []
|
||||||
|
params: list = [query_embedding, limit]
|
||||||
|
idx = 3
|
||||||
|
|
||||||
|
if court:
|
||||||
|
conditions.append(f"d.court ILIKE ${idx}")
|
||||||
|
params.append(f"%{court}%")
|
||||||
|
idx += 1
|
||||||
|
if topic:
|
||||||
|
conditions.append(f"d.topics @> ${idx}::jsonb")
|
||||||
|
params.append(json.dumps([topic]))
|
||||||
|
idx += 1
|
||||||
|
if date_from:
|
||||||
|
conditions.append(f"d.decision_date >= ${idx}")
|
||||||
|
params.append(date_from)
|
||||||
|
idx += 1
|
||||||
|
if date_to:
|
||||||
|
conditions.append(f"d.decision_date <= ${idx}")
|
||||||
|
params.append(date_to)
|
||||||
|
idx += 1
|
||||||
|
if outcome:
|
||||||
|
conditions.append(f"d.outcome = ${idx}")
|
||||||
|
params.append(outcome)
|
||||||
|
idx += 1
|
||||||
|
|
||||||
|
where = f"WHERE {' AND '.join(conditions)}" if conditions else ""
|
||||||
|
|
||||||
|
sql = f"""
|
||||||
|
SELECT dc.content, dc.section_type, dc.page_number,
|
||||||
|
dc.decision_id,
|
||||||
|
d.title, d.case_number, d.court, d.decision_date,
|
||||||
|
d.judge, d.outcome,
|
||||||
|
1 - (dc.embedding <=> $1) AS score
|
||||||
|
FROM decision_chunks dc
|
||||||
|
JOIN decisions d ON d.id = dc.decision_id
|
||||||
|
{where}
|
||||||
|
ORDER BY dc.embedding <=> $1
|
||||||
|
LIMIT $2
|
||||||
|
"""
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
rows = await conn.fetch(sql, *params)
|
||||||
|
results = []
|
||||||
|
for r in rows:
|
||||||
|
d = dict(r)
|
||||||
|
if d.get("decision_id"):
|
||||||
|
d["decision_id"] = str(d["decision_id"])
|
||||||
|
results.append(d)
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
# ── Stats ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
async def get_stats() -> dict:
|
||||||
|
pool = await get_pool()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
total_decisions = await conn.fetchval("SELECT count(*) FROM decisions")
|
||||||
|
total_chunks = await conn.fetchval("SELECT count(*) FROM decision_chunks")
|
||||||
|
completed = await conn.fetchval(
|
||||||
|
"SELECT count(*) FROM decisions WHERE extraction_status = 'completed'"
|
||||||
|
)
|
||||||
|
courts = await conn.fetch(
|
||||||
|
"SELECT court, count(*) as cnt FROM decisions WHERE court != '' GROUP BY court ORDER BY cnt DESC LIMIT 10"
|
||||||
|
)
|
||||||
|
date_range = await conn.fetchrow(
|
||||||
|
"SELECT min(decision_date) as earliest, max(decision_date) as latest FROM decisions WHERE decision_date IS NOT NULL"
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"total_decisions": total_decisions,
|
||||||
|
"completed_decisions": completed,
|
||||||
|
"total_chunks": total_chunks,
|
||||||
|
"courts": [{"court": r["court"], "count": r["cnt"]} for r in courts],
|
||||||
|
"date_range": {
|
||||||
|
"earliest": str(date_range["earliest"]) if date_range and date_range["earliest"] else None,
|
||||||
|
"latest": str(date_range["latest"]) if date_range and date_range["latest"] else None,
|
||||||
|
} if date_range else None,
|
||||||
|
}
|
||||||
55
mcp-server/src/din_leumi/services/embeddings.py
Normal file
55
mcp-server/src/din_leumi/services/embeddings.py
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
"""Embedding service using Voyage AI API."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
import voyageai
|
||||||
|
|
||||||
|
from din_leumi import config
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_client: voyageai.Client | None = None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_client() -> voyageai.Client:
|
||||||
|
global _client
|
||||||
|
if _client is None:
|
||||||
|
_client = voyageai.Client(api_key=config.VOYAGE_API_KEY)
|
||||||
|
return _client
|
||||||
|
|
||||||
|
|
||||||
|
async def embed_texts(texts: list[str], input_type: str = "document") -> list[list[float]]:
|
||||||
|
"""Embed a batch of texts using Voyage AI.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
texts: List of texts to embed (max 128 per call).
|
||||||
|
input_type: "document" for indexing, "query" for search queries.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of embedding vectors (1024 dimensions each).
|
||||||
|
"""
|
||||||
|
if not texts:
|
||||||
|
return []
|
||||||
|
|
||||||
|
client = _get_client()
|
||||||
|
all_embeddings = []
|
||||||
|
|
||||||
|
# Voyage AI supports up to 128 texts per batch
|
||||||
|
for i in range(0, len(texts), 128):
|
||||||
|
batch = texts[i : i + 128]
|
||||||
|
result = client.embed(
|
||||||
|
batch,
|
||||||
|
model=config.VOYAGE_MODEL,
|
||||||
|
input_type=input_type,
|
||||||
|
)
|
||||||
|
all_embeddings.extend(result.embeddings)
|
||||||
|
|
||||||
|
return all_embeddings
|
||||||
|
|
||||||
|
|
||||||
|
async def embed_query(query: str) -> list[float]:
|
||||||
|
"""Embed a single search query."""
|
||||||
|
results = await embed_texts([query], input_type="query")
|
||||||
|
return results[0]
|
||||||
126
mcp-server/src/din_leumi/services/extractor.py
Normal file
126
mcp-server/src/din_leumi/services/extractor.py
Normal file
@@ -0,0 +1,126 @@
|
|||||||
|
"""Text extraction from PDF, DOCX, and RTF files.
|
||||||
|
|
||||||
|
Primary PDF extraction: Claude Vision API (for scanned documents).
|
||||||
|
Fallback: PyMuPDF direct text extraction (for born-digital PDFs).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import anthropic
|
||||||
|
import fitz # PyMuPDF
|
||||||
|
from docx import Document as DocxDocument
|
||||||
|
from striprtf.striprtf import rtf_to_text
|
||||||
|
|
||||||
|
from din_leumi import config
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_anthropic_client: anthropic.Anthropic | None = None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_anthropic() -> anthropic.Anthropic:
|
||||||
|
global _anthropic_client
|
||||||
|
if _anthropic_client is None:
|
||||||
|
_anthropic_client = anthropic.Anthropic(api_key=config.ANTHROPIC_API_KEY)
|
||||||
|
return _anthropic_client
|
||||||
|
|
||||||
|
|
||||||
|
async def extract_text(file_path: str) -> tuple[str, int]:
|
||||||
|
"""Extract text from a document file.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (extracted_text, page_count).
|
||||||
|
page_count is 0 for non-PDF files.
|
||||||
|
"""
|
||||||
|
path = Path(file_path)
|
||||||
|
suffix = path.suffix.lower()
|
||||||
|
|
||||||
|
if suffix == ".pdf":
|
||||||
|
return await _extract_pdf(path)
|
||||||
|
elif suffix == ".docx":
|
||||||
|
return _extract_docx(path), 0
|
||||||
|
elif suffix == ".rtf":
|
||||||
|
return _extract_rtf(path), 0
|
||||||
|
elif suffix == ".txt":
|
||||||
|
return path.read_text(encoding="utf-8"), 0
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unsupported file type: {suffix}")
|
||||||
|
|
||||||
|
|
||||||
|
async def _extract_pdf(path: Path) -> tuple[str, int]:
|
||||||
|
"""Extract text from PDF. Try direct text first, fall back to Claude Vision for scanned pages."""
|
||||||
|
doc = fitz.open(str(path))
|
||||||
|
page_count = len(doc)
|
||||||
|
pages_text: list[str] = []
|
||||||
|
|
||||||
|
for page_num in range(page_count):
|
||||||
|
page = doc[page_num]
|
||||||
|
# Try direct text extraction first
|
||||||
|
text = page.get_text().strip()
|
||||||
|
|
||||||
|
if len(text) > 50:
|
||||||
|
# Sufficient text found - born-digital page
|
||||||
|
pages_text.append(text)
|
||||||
|
logger.debug("Page %d: direct text extraction (%d chars)", page_num + 1, len(text))
|
||||||
|
else:
|
||||||
|
# Likely scanned - use Claude Vision
|
||||||
|
logger.info("Page %d: using Claude Vision OCR", page_num + 1)
|
||||||
|
pix = page.get_pixmap(dpi=200)
|
||||||
|
img_bytes = pix.tobytes("png")
|
||||||
|
ocr_text = await _ocr_with_claude(img_bytes, page_num + 1)
|
||||||
|
pages_text.append(ocr_text)
|
||||||
|
|
||||||
|
doc.close()
|
||||||
|
return "\n\n".join(pages_text), page_count
|
||||||
|
|
||||||
|
|
||||||
|
async def _ocr_with_claude(image_bytes: bytes, page_num: int) -> str:
|
||||||
|
"""OCR a single page image using Claude Vision API."""
|
||||||
|
client = _get_anthropic()
|
||||||
|
b64_image = base64.b64encode(image_bytes).decode("utf-8")
|
||||||
|
|
||||||
|
message = client.messages.create(
|
||||||
|
model="claude-sonnet-4-20250514",
|
||||||
|
max_tokens=4096,
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "image",
|
||||||
|
"source": {
|
||||||
|
"type": "base64",
|
||||||
|
"media_type": "image/png",
|
||||||
|
"data": b64_image,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": (
|
||||||
|
"חלץ את כל הטקסט מהתמונה הזו. זהו מסמך משפטי בעברית. "
|
||||||
|
"שמור על מבנה הפסקאות המקורי. "
|
||||||
|
"החזר רק את הטקסט המחולץ, ללא הערות נוספות."
|
||||||
|
),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
],
|
||||||
|
)
|
||||||
|
return message.content[0].text
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_docx(path: Path) -> str:
|
||||||
|
"""Extract text from DOCX file."""
|
||||||
|
doc = DocxDocument(str(path))
|
||||||
|
paragraphs = [p.text for p in doc.paragraphs if p.text.strip()]
|
||||||
|
return "\n\n".join(paragraphs)
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_rtf(path: Path) -> str:
|
||||||
|
"""Extract text from RTF file."""
|
||||||
|
rtf_content = path.read_text(encoding="utf-8", errors="replace")
|
||||||
|
return rtf_to_text(rtf_content)
|
||||||
82
mcp-server/src/din_leumi/services/processor.py
Normal file
82
mcp-server/src/din_leumi/services/processor.py
Normal file
@@ -0,0 +1,82 @@
|
|||||||
|
"""Decision processing pipeline: extract → chunk → embed → store."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
|
from din_leumi.services import chunker, db, embeddings, extractor
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
async def process_decision(decision_id: UUID) -> dict:
|
||||||
|
"""Full processing pipeline for a decision.
|
||||||
|
|
||||||
|
1. Extract text from file
|
||||||
|
2. Split into chunks
|
||||||
|
3. Generate embeddings
|
||||||
|
4. Store chunks + embeddings in DB
|
||||||
|
|
||||||
|
Returns processing summary.
|
||||||
|
"""
|
||||||
|
decision = await db.get_decision(decision_id)
|
||||||
|
if not decision:
|
||||||
|
raise ValueError(f"Decision {decision_id} not found")
|
||||||
|
|
||||||
|
await db.update_decision(decision_id, extraction_status="processing")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Step 1: Extract text
|
||||||
|
logger.info("Extracting text from %s", decision["file_path"])
|
||||||
|
text, page_count = await extractor.extract_text(decision["file_path"])
|
||||||
|
|
||||||
|
await db.update_decision(
|
||||||
|
decision_id,
|
||||||
|
extracted_text=text,
|
||||||
|
page_count=page_count,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Step 2: Chunk
|
||||||
|
logger.info("Chunking decision (%d chars)", len(text))
|
||||||
|
chunks = chunker.chunk_document(text)
|
||||||
|
|
||||||
|
if not chunks:
|
||||||
|
await db.update_decision(decision_id, extraction_status="completed")
|
||||||
|
return {"status": "completed", "chunks": 0, "message": "No text to chunk"}
|
||||||
|
|
||||||
|
# Step 3: Embed
|
||||||
|
logger.info("Generating embeddings for %d chunks", len(chunks))
|
||||||
|
texts = [c.content for c in chunks]
|
||||||
|
embs = await embeddings.embed_texts(texts, input_type="document")
|
||||||
|
|
||||||
|
# Step 4: Store
|
||||||
|
chunk_dicts = [
|
||||||
|
{
|
||||||
|
"content": c.content,
|
||||||
|
"section_type": c.section_type,
|
||||||
|
"embedding": emb,
|
||||||
|
"page_number": c.page_number,
|
||||||
|
"chunk_index": c.chunk_index,
|
||||||
|
}
|
||||||
|
for c, emb in zip(chunks, embs)
|
||||||
|
]
|
||||||
|
|
||||||
|
stored = await db.store_chunks(decision_id, chunk_dicts)
|
||||||
|
await db.update_decision(decision_id, extraction_status="completed")
|
||||||
|
|
||||||
|
# Try to create IVFFlat index if we have enough data
|
||||||
|
await db.ensure_ivfflat_index()
|
||||||
|
|
||||||
|
logger.info("Decision processed: %d chunks stored", stored)
|
||||||
|
return {
|
||||||
|
"status": "completed",
|
||||||
|
"chunks": stored,
|
||||||
|
"pages": page_count,
|
||||||
|
"text_length": len(text),
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception("Decision processing failed: %s", e)
|
||||||
|
await db.update_decision(decision_id, extraction_status="failed")
|
||||||
|
return {"status": "failed", "error": str(e)}
|
||||||
0
mcp-server/src/din_leumi/tools/__init__.py
Normal file
0
mcp-server/src/din_leumi/tools/__init__.py
Normal file
241
mcp-server/src/din_leumi/tools/decisions.py
Normal file
241
mcp-server/src/din_leumi/tools/decisions.py
Normal file
@@ -0,0 +1,241 @@
|
|||||||
|
"""Decision CRUD tool implementations."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import shutil
|
||||||
|
from datetime import date
|
||||||
|
from pathlib import Path
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
|
from din_leumi import config
|
||||||
|
from din_leumi.services import db, processor
|
||||||
|
|
||||||
|
|
||||||
|
async def decision_upload(
|
||||||
|
file_path: str,
|
||||||
|
title: str = "",
|
||||||
|
court: str = "",
|
||||||
|
decision_date: str = "",
|
||||||
|
case_number: str = "",
|
||||||
|
judge: str = "",
|
||||||
|
parties_appellant: str = "",
|
||||||
|
parties_respondent: str = "המוסד לביטוח לאומי",
|
||||||
|
topics: list[str] | None = None,
|
||||||
|
outcome: str = "",
|
||||||
|
) -> str:
|
||||||
|
"""Upload and process a court decision."""
|
||||||
|
source = Path(file_path)
|
||||||
|
if not source.exists():
|
||||||
|
return f"❌ הקובץ לא נמצא: {file_path}"
|
||||||
|
|
||||||
|
ext = source.suffix.lower()
|
||||||
|
if ext not in (".pdf", ".docx", ".rtf", ".txt"):
|
||||||
|
return f"❌ סוג קובץ לא נתמך: {ext}"
|
||||||
|
|
||||||
|
# Copy to decisions directory
|
||||||
|
config.DECISIONS_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
dest = config.DECISIONS_DIR / source.name
|
||||||
|
if dest.exists():
|
||||||
|
# Add suffix to avoid overwrite
|
||||||
|
dest = config.DECISIONS_DIR / f"{source.stem}_{UUID.__class__.__name__[:8]}{ext}"
|
||||||
|
shutil.copy2(str(source), str(dest))
|
||||||
|
|
||||||
|
# Parse date
|
||||||
|
d_date = None
|
||||||
|
if decision_date:
|
||||||
|
try:
|
||||||
|
d_date = date.fromisoformat(decision_date)
|
||||||
|
except ValueError:
|
||||||
|
return f"❌ פורמט תאריך לא תקין: {decision_date}. נדרש YYYY-MM-DD"
|
||||||
|
|
||||||
|
# Create decision record
|
||||||
|
if not title:
|
||||||
|
title = source.stem
|
||||||
|
|
||||||
|
decision = await db.create_decision(
|
||||||
|
title=title,
|
||||||
|
file_path=str(dest),
|
||||||
|
court=court,
|
||||||
|
decision_date=d_date,
|
||||||
|
case_number=case_number,
|
||||||
|
judge=judge,
|
||||||
|
parties_appellant=parties_appellant,
|
||||||
|
parties_respondent=parties_respondent,
|
||||||
|
topics=topics,
|
||||||
|
outcome=outcome,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Process (extract → chunk → embed → store)
|
||||||
|
result = await processor.process_decision(UUID(decision["id"]))
|
||||||
|
|
||||||
|
status_icon = "✅" if result["status"] == "completed" else "❌"
|
||||||
|
lines = [
|
||||||
|
f"{status_icon} פסק דין הועלה ועובד",
|
||||||
|
f" כותרת: {title}",
|
||||||
|
f" מזהה: {decision['id']}",
|
||||||
|
]
|
||||||
|
if case_number:
|
||||||
|
lines.append(f" מספר תיק: {case_number}")
|
||||||
|
if result.get("chunks"):
|
||||||
|
lines.append(f" chunks: {result['chunks']}")
|
||||||
|
if result.get("pages"):
|
||||||
|
lines.append(f" עמודים: {result['pages']}")
|
||||||
|
if result.get("error"):
|
||||||
|
lines.append(f" שגיאה: {result['error']}")
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
async def decision_get(
|
||||||
|
decision_id: str = "",
|
||||||
|
case_number: str = "",
|
||||||
|
) -> str:
|
||||||
|
"""Get full decision details."""
|
||||||
|
if not decision_id and not case_number:
|
||||||
|
return "❌ נדרש decision_id או case_number"
|
||||||
|
|
||||||
|
if decision_id:
|
||||||
|
decision = await db.get_decision(UUID(decision_id))
|
||||||
|
else:
|
||||||
|
decision = await db.get_decision_by_case_number(case_number)
|
||||||
|
|
||||||
|
if not decision:
|
||||||
|
return "❌ פסק דין לא נמצא"
|
||||||
|
|
||||||
|
lines = [
|
||||||
|
f"📄 {decision.get('title', '')}",
|
||||||
|
f" מזהה: {decision['id']}",
|
||||||
|
]
|
||||||
|
if decision.get("case_number"):
|
||||||
|
lines.append(f" מספר תיק: {decision['case_number']}")
|
||||||
|
if decision.get("court"):
|
||||||
|
lines.append(f" בית משפט: {decision['court']}")
|
||||||
|
if decision.get("decision_date"):
|
||||||
|
lines.append(f" תאריך: {decision['decision_date']}")
|
||||||
|
if decision.get("judge"):
|
||||||
|
lines.append(f" שופט: {decision['judge']}")
|
||||||
|
if decision.get("parties_appellant"):
|
||||||
|
lines.append(f" תובע/מערער: {decision['parties_appellant']}")
|
||||||
|
if decision.get("parties_respondent"):
|
||||||
|
lines.append(f" נתבע/משיב: {decision['parties_respondent']}")
|
||||||
|
if decision.get("topics"):
|
||||||
|
topics = decision["topics"]
|
||||||
|
if isinstance(topics, str):
|
||||||
|
topics = json.loads(topics)
|
||||||
|
if topics:
|
||||||
|
lines.append(f" נושאים: {', '.join(topics)}")
|
||||||
|
if decision.get("outcome"):
|
||||||
|
lines.append(f" תוצאה: {decision['outcome']}")
|
||||||
|
if decision.get("summary"):
|
||||||
|
lines.append(f" תקציר: {decision['summary']}")
|
||||||
|
|
||||||
|
lines.append(f" סטטוס: {decision.get('extraction_status', 'unknown')}")
|
||||||
|
if decision.get("page_count"):
|
||||||
|
lines.append(f" עמודים: {decision['page_count']}")
|
||||||
|
|
||||||
|
# Include extracted text (truncated)
|
||||||
|
text = decision.get("extracted_text", "")
|
||||||
|
if text:
|
||||||
|
lines.append("")
|
||||||
|
lines.append("── טקסט מחולץ ──")
|
||||||
|
if len(text) > 15000:
|
||||||
|
lines.append(text[:15000])
|
||||||
|
lines.append(f"\n... (נקטע, סה\"כ {len(text)} תווים)")
|
||||||
|
else:
|
||||||
|
lines.append(text)
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
async def decision_list(
|
||||||
|
court: str = "",
|
||||||
|
topic: str = "",
|
||||||
|
judge: str = "",
|
||||||
|
date_from: str = "",
|
||||||
|
date_to: str = "",
|
||||||
|
outcome: str = "",
|
||||||
|
limit: int = 50,
|
||||||
|
) -> str:
|
||||||
|
"""List decisions with optional filters."""
|
||||||
|
d_from = date.fromisoformat(date_from) if date_from else None
|
||||||
|
d_to = date.fromisoformat(date_to) if date_to else None
|
||||||
|
|
||||||
|
decisions = await db.list_decisions(
|
||||||
|
court=court, topic=topic, judge=judge,
|
||||||
|
date_from=d_from, date_to=d_to,
|
||||||
|
outcome=outcome, limit=limit,
|
||||||
|
)
|
||||||
|
|
||||||
|
if not decisions:
|
||||||
|
return "לא נמצאו פסקי דין"
|
||||||
|
|
||||||
|
lines = [f"נמצאו {len(decisions)} פסקי דין:\n"]
|
||||||
|
for d in decisions:
|
||||||
|
parts = [f"• {d.get('title', 'ללא כותרת')}"]
|
||||||
|
if d.get("case_number"):
|
||||||
|
parts.append(f" [{d['case_number']}]")
|
||||||
|
if d.get("court"):
|
||||||
|
parts.append(f" {d['court']}")
|
||||||
|
if d.get("decision_date"):
|
||||||
|
parts.append(f" {d['decision_date']}")
|
||||||
|
if d.get("outcome"):
|
||||||
|
parts.append(f" ({d['outcome']})")
|
||||||
|
lines.append(" ".join(parts))
|
||||||
|
lines.append(f" מזהה: {d['id']}")
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
async def decision_update(
|
||||||
|
decision_id: str,
|
||||||
|
title: str = "",
|
||||||
|
court: str = "",
|
||||||
|
decision_date: str = "",
|
||||||
|
case_number: str = "",
|
||||||
|
judge: str = "",
|
||||||
|
parties_appellant: str = "",
|
||||||
|
parties_respondent: str = "",
|
||||||
|
topics: list[str] | None = None,
|
||||||
|
outcome: str = "",
|
||||||
|
summary: str = "",
|
||||||
|
) -> str:
|
||||||
|
"""Update decision metadata."""
|
||||||
|
fields = {}
|
||||||
|
if title:
|
||||||
|
fields["title"] = title
|
||||||
|
if court:
|
||||||
|
fields["court"] = court
|
||||||
|
if decision_date:
|
||||||
|
fields["decision_date"] = date.fromisoformat(decision_date)
|
||||||
|
if case_number:
|
||||||
|
fields["case_number"] = case_number
|
||||||
|
if judge:
|
||||||
|
fields["judge"] = judge
|
||||||
|
if parties_appellant:
|
||||||
|
fields["parties_appellant"] = parties_appellant
|
||||||
|
if parties_respondent:
|
||||||
|
fields["parties_respondent"] = parties_respondent
|
||||||
|
if topics is not None:
|
||||||
|
fields["topics"] = topics
|
||||||
|
if outcome:
|
||||||
|
fields["outcome"] = outcome
|
||||||
|
if summary:
|
||||||
|
fields["summary"] = summary
|
||||||
|
|
||||||
|
if not fields:
|
||||||
|
return "❌ לא צוינו שדות לעדכון"
|
||||||
|
|
||||||
|
result = await db.update_decision(UUID(decision_id), **fields)
|
||||||
|
if not result:
|
||||||
|
return "❌ פסק דין לא נמצא"
|
||||||
|
|
||||||
|
return f"✅ פסק דין {decision_id} עודכן ({', '.join(fields.keys())})"
|
||||||
|
|
||||||
|
|
||||||
|
async def decision_delete(decision_id: str) -> str:
|
||||||
|
"""Delete a decision and all its chunks."""
|
||||||
|
deleted = await db.delete_decision(UUID(decision_id))
|
||||||
|
if deleted:
|
||||||
|
return f"✅ פסק דין {decision_id} נמחק"
|
||||||
|
return "❌ פסק דין לא נמצא"
|
||||||
97
mcp-server/src/din_leumi/tools/search.py
Normal file
97
mcp-server/src/din_leumi/tools/search.py
Normal file
@@ -0,0 +1,97 @@
|
|||||||
|
"""Search tool implementations."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from datetime import date
|
||||||
|
|
||||||
|
from din_leumi.services import db, embeddings
|
||||||
|
|
||||||
|
|
||||||
|
async def decision_search(
|
||||||
|
query: str,
|
||||||
|
limit: int = 10,
|
||||||
|
court: str = "",
|
||||||
|
topic: str = "",
|
||||||
|
date_from: str = "",
|
||||||
|
date_to: str = "",
|
||||||
|
outcome: str = "",
|
||||||
|
) -> str:
|
||||||
|
"""Semantic search across all decisions with optional metadata filters."""
|
||||||
|
if not query.strip():
|
||||||
|
return "❌ נדרש טקסט לחיפוש"
|
||||||
|
|
||||||
|
# Embed query
|
||||||
|
query_emb = await embeddings.embed_query(query)
|
||||||
|
|
||||||
|
d_from = date.fromisoformat(date_from) if date_from else None
|
||||||
|
d_to = date.fromisoformat(date_to) if date_to else None
|
||||||
|
|
||||||
|
results = await db.search_similar(
|
||||||
|
query_embedding=query_emb,
|
||||||
|
limit=limit,
|
||||||
|
court=court,
|
||||||
|
topic=topic,
|
||||||
|
date_from=d_from,
|
||||||
|
date_to=d_to,
|
||||||
|
outcome=outcome,
|
||||||
|
)
|
||||||
|
|
||||||
|
if not results:
|
||||||
|
return "לא נמצאו תוצאות"
|
||||||
|
|
||||||
|
lines = [f"🔍 נמצאו {len(results)} תוצאות עבור: \"{query}\"\n"]
|
||||||
|
|
||||||
|
for i, r in enumerate(results, 1):
|
||||||
|
score = r.get("score", 0)
|
||||||
|
lines.append(f"── תוצאה {i} (ציון: {score:.3f}) ──")
|
||||||
|
lines.append(f" פסק דין: {r.get('title', '')}")
|
||||||
|
if r.get("case_number"):
|
||||||
|
lines.append(f" מספר תיק: {r['case_number']}")
|
||||||
|
if r.get("court"):
|
||||||
|
lines.append(f" בית משפט: {r['court']}")
|
||||||
|
if r.get("decision_date"):
|
||||||
|
lines.append(f" תאריך: {r['decision_date']}")
|
||||||
|
if r.get("judge"):
|
||||||
|
lines.append(f" שופט: {r['judge']}")
|
||||||
|
if r.get("outcome"):
|
||||||
|
lines.append(f" תוצאה: {r['outcome']}")
|
||||||
|
lines.append(f" סוג קטע: {r.get('section_type', 'other')}")
|
||||||
|
lines.append(f" מזהה: {r.get('decision_id', '')}")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
# Show content snippet
|
||||||
|
content = r.get("content", "")
|
||||||
|
if len(content) > 500:
|
||||||
|
content = content[:500] + "..."
|
||||||
|
lines.append(f" {content}")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
async def system_status() -> str:
|
||||||
|
"""Get system statistics."""
|
||||||
|
stats = await db.get_stats()
|
||||||
|
|
||||||
|
lines = [
|
||||||
|
"📊 סטטוס מערכת דין לאומי",
|
||||||
|
"",
|
||||||
|
f" סה\"כ פסקי דין: {stats['total_decisions']}",
|
||||||
|
f" עובדו בהצלחה: {stats['completed_decisions']}",
|
||||||
|
f" סה\"כ chunks: {stats['total_chunks']}",
|
||||||
|
]
|
||||||
|
|
||||||
|
if stats.get("courts"):
|
||||||
|
lines.append("")
|
||||||
|
lines.append(" בתי משפט:")
|
||||||
|
for c in stats["courts"]:
|
||||||
|
lines.append(f" • {c['court']}: {c['count']}")
|
||||||
|
|
||||||
|
if stats.get("date_range"):
|
||||||
|
dr = stats["date_range"]
|
||||||
|
if dr.get("earliest"):
|
||||||
|
lines.append("")
|
||||||
|
lines.append(f" טווח תאריכים: {dr['earliest']} — {dr['latest']}")
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
289
web/app.py
Normal file
289
web/app.py
Normal file
@@ -0,0 +1,289 @@
|
|||||||
|
"""Din Leumi — Web interface for uploading and searching court decisions."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
import shutil
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
from contextlib import asynccontextmanager
|
||||||
|
from datetime import date
|
||||||
|
from pathlib import Path
|
||||||
|
from uuid import UUID, uuid4
|
||||||
|
|
||||||
|
# Allow importing din_leumi from the MCP server source
|
||||||
|
sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "mcp-server" / "src"))
|
||||||
|
|
||||||
|
from fastapi import FastAPI, File, HTTPException, UploadFile
|
||||||
|
from fastapi.responses import FileResponse, JSONResponse, StreamingResponse
|
||||||
|
from fastapi.staticfiles import StaticFiles
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
from din_leumi import config
|
||||||
|
from din_leumi.services import db, processor
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
UPLOAD_DIR = config.DATA_DIR / "uploads"
|
||||||
|
ALLOWED_EXTENSIONS = {".pdf", ".docx", ".rtf", ".txt"}
|
||||||
|
MAX_FILE_SIZE = 50 * 1024 * 1024 # 50MB
|
||||||
|
|
||||||
|
# In-memory progress tracking
|
||||||
|
_progress: dict[str, dict] = {}
|
||||||
|
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def lifespan(app: FastAPI):
|
||||||
|
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
config.DECISIONS_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
await db.init_schema()
|
||||||
|
yield
|
||||||
|
await db.close_pool()
|
||||||
|
|
||||||
|
|
||||||
|
app = FastAPI(title="Din Leumi — דין לאומי", lifespan=lifespan)
|
||||||
|
|
||||||
|
STATIC_DIR = Path(__file__).parent / "static"
|
||||||
|
|
||||||
|
|
||||||
|
# ── Health ─────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
@app.get("/health")
|
||||||
|
async def health():
|
||||||
|
return {"status": "ok"}
|
||||||
|
|
||||||
|
|
||||||
|
# ── Pages ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
@app.get("/")
|
||||||
|
async def index():
|
||||||
|
return FileResponse(STATIC_DIR / "index.html")
|
||||||
|
|
||||||
|
|
||||||
|
# ── Upload API ─────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
@app.post("/api/upload")
|
||||||
|
async def upload_file(file: UploadFile = File(...)):
|
||||||
|
"""Upload a file to the temporary uploads directory."""
|
||||||
|
if not file.filename:
|
||||||
|
raise HTTPException(400, "No filename provided")
|
||||||
|
|
||||||
|
ext = Path(file.filename).suffix.lower()
|
||||||
|
if ext not in ALLOWED_EXTENSIONS:
|
||||||
|
raise HTTPException(400, f"סוג קובץ לא נתמך: {ext}")
|
||||||
|
|
||||||
|
safe_name = re.sub(r"[^\w\u0590-\u05FF\s.\-()]", "", Path(file.filename).stem)
|
||||||
|
if not safe_name:
|
||||||
|
safe_name = "document"
|
||||||
|
timestamp = int(time.time())
|
||||||
|
filename = f"{timestamp}_{safe_name}{ext}"
|
||||||
|
|
||||||
|
content = await file.read()
|
||||||
|
if len(content) > MAX_FILE_SIZE:
|
||||||
|
raise HTTPException(400, f"הקובץ גדול מדי. מקסימום: {MAX_FILE_SIZE // (1024*1024)}MB")
|
||||||
|
|
||||||
|
dest = UPLOAD_DIR / filename
|
||||||
|
dest.write_bytes(content)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"filename": filename,
|
||||||
|
"original_name": file.filename,
|
||||||
|
"size": len(content),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ── Decision API ───────────────────────────────────────────────────
|
||||||
|
|
||||||
|
class DecisionCreateRequest(BaseModel):
|
||||||
|
filename: str
|
||||||
|
title: str = ""
|
||||||
|
court: str = ""
|
||||||
|
decision_date: str = ""
|
||||||
|
case_number: str = ""
|
||||||
|
judge: str = ""
|
||||||
|
parties_appellant: str = ""
|
||||||
|
parties_respondent: str = "המוסד לביטוח לאומי"
|
||||||
|
topics: list[str] = []
|
||||||
|
outcome: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/api/decisions")
|
||||||
|
async def create_decision(req: DecisionCreateRequest):
|
||||||
|
"""Create a decision record and start processing."""
|
||||||
|
source = UPLOAD_DIR / req.filename
|
||||||
|
if not source.exists() or not source.parent.samefile(UPLOAD_DIR):
|
||||||
|
raise HTTPException(404, "קובץ לא נמצא")
|
||||||
|
|
||||||
|
# Copy to decisions directory
|
||||||
|
original_name = re.sub(r"^\d+_", "", source.name)
|
||||||
|
dest = config.DECISIONS_DIR / original_name
|
||||||
|
if dest.exists():
|
||||||
|
stem = dest.stem
|
||||||
|
dest = config.DECISIONS_DIR / f"{stem}_{int(time.time())}{dest.suffix}"
|
||||||
|
shutil.copy2(str(source), str(dest))
|
||||||
|
|
||||||
|
# Parse date
|
||||||
|
d_date = None
|
||||||
|
if req.decision_date:
|
||||||
|
try:
|
||||||
|
d_date = date.fromisoformat(req.decision_date)
|
||||||
|
except ValueError:
|
||||||
|
raise HTTPException(400, f"פורמט תאריך לא תקין: {req.decision_date}")
|
||||||
|
|
||||||
|
title = req.title or original_name.rsplit(".", 1)[0]
|
||||||
|
|
||||||
|
# Create DB record
|
||||||
|
decision = await db.create_decision(
|
||||||
|
title=title,
|
||||||
|
file_path=str(dest),
|
||||||
|
court=req.court,
|
||||||
|
decision_date=d_date,
|
||||||
|
case_number=req.case_number,
|
||||||
|
judge=req.judge,
|
||||||
|
parties_appellant=req.parties_appellant,
|
||||||
|
parties_respondent=req.parties_respondent,
|
||||||
|
topics=req.topics if req.topics else None,
|
||||||
|
outcome=req.outcome,
|
||||||
|
)
|
||||||
|
|
||||||
|
task_id = str(uuid4())
|
||||||
|
_progress[task_id] = {"status": "queued", "filename": req.filename}
|
||||||
|
|
||||||
|
# Process in background
|
||||||
|
asyncio.create_task(_process_decision(task_id, decision, source))
|
||||||
|
|
||||||
|
return {"task_id": task_id, "decision_id": decision["id"]}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/decisions")
|
||||||
|
async def list_decisions(
|
||||||
|
court: str = "",
|
||||||
|
topic: str = "",
|
||||||
|
judge: str = "",
|
||||||
|
date_from: str = "",
|
||||||
|
date_to: str = "",
|
||||||
|
outcome: str = "",
|
||||||
|
limit: int = 50,
|
||||||
|
):
|
||||||
|
"""List decisions with optional filters."""
|
||||||
|
d_from = date.fromisoformat(date_from) if date_from else None
|
||||||
|
d_to = date.fromisoformat(date_to) if date_to else None
|
||||||
|
|
||||||
|
decisions = await db.list_decisions(
|
||||||
|
court=court, topic=topic, judge=judge,
|
||||||
|
date_from=d_from, date_to=d_to,
|
||||||
|
outcome=outcome, limit=limit,
|
||||||
|
)
|
||||||
|
return decisions
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/decisions/{decision_id}")
|
||||||
|
async def get_decision(decision_id: str):
|
||||||
|
"""Get a single decision."""
|
||||||
|
decision = await db.get_decision(UUID(decision_id))
|
||||||
|
if not decision:
|
||||||
|
raise HTTPException(404, "פסק דין לא נמצא")
|
||||||
|
return decision
|
||||||
|
|
||||||
|
|
||||||
|
@app.delete("/api/decisions/{decision_id}")
|
||||||
|
async def delete_decision(decision_id: str):
|
||||||
|
"""Delete a decision."""
|
||||||
|
deleted = await db.delete_decision(UUID(decision_id))
|
||||||
|
if not deleted:
|
||||||
|
raise HTTPException(404, "פסק דין לא נמצא")
|
||||||
|
return {"deleted": decision_id}
|
||||||
|
|
||||||
|
|
||||||
|
# ── Search API ─────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
@app.get("/api/search")
|
||||||
|
async def search_decisions(
|
||||||
|
q: str = "",
|
||||||
|
court: str = "",
|
||||||
|
topic: str = "",
|
||||||
|
date_from: str = "",
|
||||||
|
date_to: str = "",
|
||||||
|
outcome: str = "",
|
||||||
|
limit: int = 10,
|
||||||
|
):
|
||||||
|
"""Semantic search across decisions."""
|
||||||
|
if not q.strip():
|
||||||
|
raise HTTPException(400, "נדרש טקסט לחיפוש")
|
||||||
|
|
||||||
|
from din_leumi.services import embeddings
|
||||||
|
|
||||||
|
query_emb = await embeddings.embed_query(q)
|
||||||
|
d_from = date.fromisoformat(date_from) if date_from else None
|
||||||
|
d_to = date.fromisoformat(date_to) if date_to else None
|
||||||
|
|
||||||
|
results = await db.search_similar(
|
||||||
|
query_embedding=query_emb,
|
||||||
|
limit=limit,
|
||||||
|
court=court,
|
||||||
|
topic=topic,
|
||||||
|
date_from=d_from,
|
||||||
|
date_to=d_to,
|
||||||
|
outcome=outcome,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Serialize dates
|
||||||
|
for r in results:
|
||||||
|
if r.get("decision_date"):
|
||||||
|
r["decision_date"] = str(r["decision_date"])
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
# ── Progress SSE ───────────────────────────────────────────────────
|
||||||
|
|
||||||
|
@app.get("/api/progress/{task_id}")
|
||||||
|
async def progress_stream(task_id: str):
|
||||||
|
"""SSE stream of processing progress."""
|
||||||
|
if task_id not in _progress:
|
||||||
|
raise HTTPException(404, "Task not found")
|
||||||
|
|
||||||
|
async def event_stream():
|
||||||
|
while True:
|
||||||
|
data = _progress.get(task_id, {})
|
||||||
|
yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
|
||||||
|
if data.get("status") in ("completed", "failed"):
|
||||||
|
break
|
||||||
|
await asyncio.sleep(1)
|
||||||
|
await asyncio.sleep(30)
|
||||||
|
_progress.pop(task_id, None)
|
||||||
|
|
||||||
|
return StreamingResponse(event_stream(), media_type="text/event-stream")
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/stats")
|
||||||
|
async def stats():
|
||||||
|
"""System statistics."""
|
||||||
|
return await db.get_stats()
|
||||||
|
|
||||||
|
|
||||||
|
# ── Background Processing ─────────────────────────────────────────
|
||||||
|
|
||||||
|
async def _process_decision(task_id: str, decision: dict, source: Path):
|
||||||
|
"""Process a decision in the background."""
|
||||||
|
try:
|
||||||
|
_progress[task_id] = {"status": "processing", "filename": source.name}
|
||||||
|
result = await processor.process_decision(UUID(decision["id"]))
|
||||||
|
source.unlink(missing_ok=True)
|
||||||
|
|
||||||
|
_progress[task_id] = {
|
||||||
|
"status": result.get("status", "completed"),
|
||||||
|
"filename": source.name,
|
||||||
|
"decision_id": decision["id"],
|
||||||
|
"result": result,
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception("Processing failed for %s", source.name)
|
||||||
|
_progress[task_id] = {
|
||||||
|
"status": "failed",
|
||||||
|
"error": str(e),
|
||||||
|
"filename": source.name,
|
||||||
|
}
|
||||||
492
web/static/index.html
Normal file
492
web/static/index.html
Normal file
@@ -0,0 +1,492 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="he" dir="rtl">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>דין לאומי - קטלוג פסקי דין</title>
|
||||||
|
<style>
|
||||||
|
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||||
|
body {
|
||||||
|
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', system-ui, sans-serif;
|
||||||
|
background: #f5f7fa; color: #1a1a2e; line-height: 1.6;
|
||||||
|
}
|
||||||
|
.container { max-width: 1100px; margin: 0 auto; padding: 20px; }
|
||||||
|
header {
|
||||||
|
background: linear-gradient(135deg, #1a365d, #2d5086);
|
||||||
|
color: white; padding: 24px 0; margin-bottom: 24px;
|
||||||
|
}
|
||||||
|
header h1 { font-size: 1.8em; }
|
||||||
|
header p { opacity: 0.8; margin-top: 4px; }
|
||||||
|
|
||||||
|
.tabs {
|
||||||
|
display: flex; gap: 0; margin-bottom: 24px;
|
||||||
|
border-bottom: 2px solid #e2e8f0;
|
||||||
|
}
|
||||||
|
.tab {
|
||||||
|
padding: 10px 24px; cursor: pointer; border: none;
|
||||||
|
background: none; font-size: 1em; color: #64748b;
|
||||||
|
border-bottom: 2px solid transparent; margin-bottom: -2px;
|
||||||
|
}
|
||||||
|
.tab.active { color: #1a365d; border-bottom-color: #1a365d; font-weight: 600; }
|
||||||
|
.tab:hover { color: #1a365d; }
|
||||||
|
|
||||||
|
.panel { display: none; }
|
||||||
|
.panel.active { display: block; }
|
||||||
|
|
||||||
|
/* Cards */
|
||||||
|
.card {
|
||||||
|
background: white; border-radius: 8px; padding: 20px;
|
||||||
|
box-shadow: 0 1px 3px rgba(0,0,0,0.1); margin-bottom: 16px;
|
||||||
|
}
|
||||||
|
.card h3 { margin-bottom: 12px; color: #1a365d; }
|
||||||
|
|
||||||
|
/* Forms */
|
||||||
|
.form-grid { display: grid; grid-template-columns: 1fr 1fr; gap: 12px; }
|
||||||
|
.form-group { display: flex; flex-direction: column; gap: 4px; }
|
||||||
|
.form-group.full { grid-column: 1 / -1; }
|
||||||
|
label { font-size: 0.85em; font-weight: 600; color: #475569; }
|
||||||
|
input, select, textarea {
|
||||||
|
padding: 8px 12px; border: 1px solid #d1d5db; border-radius: 6px;
|
||||||
|
font-size: 0.95em; font-family: inherit;
|
||||||
|
}
|
||||||
|
input:focus, select:focus, textarea:focus {
|
||||||
|
outline: none; border-color: #2d5086; box-shadow: 0 0 0 2px rgba(45,80,134,0.15);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Buttons */
|
||||||
|
.btn {
|
||||||
|
padding: 10px 20px; border: none; border-radius: 6px;
|
||||||
|
font-size: 0.95em; cursor: pointer; font-weight: 600;
|
||||||
|
}
|
||||||
|
.btn-primary { background: #1a365d; color: white; }
|
||||||
|
.btn-primary:hover { background: #2d5086; }
|
||||||
|
.btn-danger { background: #dc2626; color: white; }
|
||||||
|
.btn-danger:hover { background: #b91c1c; }
|
||||||
|
.btn:disabled { opacity: 0.6; cursor: not-allowed; }
|
||||||
|
|
||||||
|
/* Upload area */
|
||||||
|
.upload-area {
|
||||||
|
border: 2px dashed #d1d5db; border-radius: 8px; padding: 40px;
|
||||||
|
text-align: center; cursor: pointer; transition: all 0.2s;
|
||||||
|
}
|
||||||
|
.upload-area:hover, .upload-area.dragover {
|
||||||
|
border-color: #2d5086; background: #f0f4ff;
|
||||||
|
}
|
||||||
|
.upload-area input { display: none; }
|
||||||
|
|
||||||
|
/* Results */
|
||||||
|
.result-item {
|
||||||
|
border: 1px solid #e2e8f0; border-radius: 6px; padding: 12px;
|
||||||
|
margin-bottom: 8px;
|
||||||
|
}
|
||||||
|
.result-item:hover { border-color: #2d5086; }
|
||||||
|
.result-score {
|
||||||
|
display: inline-block; background: #e0e7ff; color: #3730a3;
|
||||||
|
padding: 2px 8px; border-radius: 4px; font-size: 0.8em; font-weight: 600;
|
||||||
|
}
|
||||||
|
.result-meta { font-size: 0.85em; color: #64748b; margin: 4px 0; }
|
||||||
|
.result-content {
|
||||||
|
font-size: 0.9em; color: #374151; margin-top: 8px;
|
||||||
|
background: #f9fafb; padding: 8px; border-radius: 4px;
|
||||||
|
max-height: 150px; overflow-y: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Decision list */
|
||||||
|
.decision-row {
|
||||||
|
display: grid; grid-template-columns: 2fr 1.5fr 1fr 1fr 80px;
|
||||||
|
gap: 8px; padding: 10px 12px; align-items: center;
|
||||||
|
border-bottom: 1px solid #f1f5f9; font-size: 0.9em;
|
||||||
|
}
|
||||||
|
.decision-row:hover { background: #f8fafc; }
|
||||||
|
.decision-header { font-weight: 600; color: #475569; background: #f1f5f9; border-radius: 6px 6px 0 0; }
|
||||||
|
|
||||||
|
/* Tags */
|
||||||
|
.tag {
|
||||||
|
display: inline-block; background: #e0f2fe; color: #0369a1;
|
||||||
|
padding: 2px 8px; border-radius: 12px; font-size: 0.8em; margin: 2px;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Progress */
|
||||||
|
.progress-bar {
|
||||||
|
height: 4px; background: #e2e8f0; border-radius: 2px; overflow: hidden;
|
||||||
|
}
|
||||||
|
.progress-bar .fill {
|
||||||
|
height: 100%; background: #2d5086; transition: width 0.3s;
|
||||||
|
animation: pulse 1.5s infinite;
|
||||||
|
}
|
||||||
|
@keyframes pulse { 50% { opacity: 0.6; } }
|
||||||
|
|
||||||
|
.status-badge {
|
||||||
|
display: inline-block; padding: 2px 8px; border-radius: 4px;
|
||||||
|
font-size: 0.8em; font-weight: 600;
|
||||||
|
}
|
||||||
|
.status-completed { background: #d1fae5; color: #065f46; }
|
||||||
|
.status-processing { background: #fef3c7; color: #92400e; }
|
||||||
|
.status-failed { background: #fee2e2; color: #991b1b; }
|
||||||
|
|
||||||
|
.stats-grid { display: grid; grid-template-columns: repeat(3, 1fr); gap: 12px; margin-bottom: 16px; }
|
||||||
|
.stat-box {
|
||||||
|
background: white; border-radius: 8px; padding: 16px;
|
||||||
|
text-align: center; box-shadow: 0 1px 3px rgba(0,0,0,0.1);
|
||||||
|
}
|
||||||
|
.stat-number { font-size: 2em; font-weight: 700; color: #1a365d; }
|
||||||
|
.stat-label { font-size: 0.85em; color: #64748b; }
|
||||||
|
|
||||||
|
.empty-state { text-align: center; padding: 40px; color: #94a3b8; }
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<header>
|
||||||
|
<div class="container">
|
||||||
|
<h1>דין לאומי</h1>
|
||||||
|
<p>קטלוג וחיפוש סמנטי של פסקי דין בתחום ביטוח לאומי</p>
|
||||||
|
</div>
|
||||||
|
</header>
|
||||||
|
|
||||||
|
<div class="container">
|
||||||
|
<!-- Stats -->
|
||||||
|
<div class="stats-grid" id="stats-grid">
|
||||||
|
<div class="stat-box"><div class="stat-number" id="stat-decisions">-</div><div class="stat-label">פסקי דין</div></div>
|
||||||
|
<div class="stat-box"><div class="stat-number" id="stat-chunks">-</div><div class="stat-label">chunks</div></div>
|
||||||
|
<div class="stat-box"><div class="stat-number" id="stat-completed">-</div><div class="stat-label">עובדו בהצלחה</div></div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Tabs -->
|
||||||
|
<div class="tabs">
|
||||||
|
<button class="tab active" data-tab="upload">העלאה</button>
|
||||||
|
<button class="tab" data-tab="search">חיפוש</button>
|
||||||
|
<button class="tab" data-tab="browse">פסקי דין</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Upload Panel -->
|
||||||
|
<div class="panel active" id="panel-upload">
|
||||||
|
<div class="card">
|
||||||
|
<h3>העלאת פסק דין</h3>
|
||||||
|
<div class="upload-area" id="drop-zone">
|
||||||
|
<p>גרור קובץ לכאן או לחץ לבחירה</p>
|
||||||
|
<p style="font-size:0.85em;color:#94a3b8;margin-top:8px">PDF, DOCX, RTF, TXT (עד 50MB)</p>
|
||||||
|
<input type="file" id="file-input" accept=".pdf,.docx,.rtf,.txt">
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div id="upload-form" style="display:none; margin-top:16px">
|
||||||
|
<div id="uploaded-file-info" style="margin-bottom:12px;padding:8px;background:#f0fdf4;border-radius:6px"></div>
|
||||||
|
<div class="form-grid">
|
||||||
|
<div class="form-group full">
|
||||||
|
<label>כותרת</label>
|
||||||
|
<input type="text" id="inp-title" placeholder="כותרת תיאורית לפסק הדין">
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
<label>מספר תיק</label>
|
||||||
|
<input type="text" id="inp-case-number" placeholder="בל 12345-06-20">
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
<label>בית משפט</label>
|
||||||
|
<input type="text" id="inp-court" placeholder="בית הדין האזורי לעבודה ת"א">
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
<label>תאריך פסק דין</label>
|
||||||
|
<input type="date" id="inp-date">
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
<label>שופט/ת</label>
|
||||||
|
<input type="text" id="inp-judge">
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
<label>תובע/מערער</label>
|
||||||
|
<input type="text" id="inp-appellant">
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
<label>נתבע/משיב</label>
|
||||||
|
<input type="text" id="inp-respondent" value="המוסד לביטוח לאומי">
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
<label>נושאים (מופרדים בפסיקים)</label>
|
||||||
|
<input type="text" id="inp-topics" placeholder="נכות כללית, תאונת עבודה">
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
<label>תוצאה</label>
|
||||||
|
<select id="inp-outcome">
|
||||||
|
<option value="">לא צוין</option>
|
||||||
|
<option value="accepted">התקבלה</option>
|
||||||
|
<option value="rejected">נדחתה</option>
|
||||||
|
<option value="partial">התקבלה חלקית</option>
|
||||||
|
<option value="remanded">הוחזרה לדיון</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div style="margin-top:16px;display:flex;gap:8px">
|
||||||
|
<button class="btn btn-primary" id="btn-process">העלה ועבד</button>
|
||||||
|
<button class="btn" id="btn-cancel" style="background:#e2e8f0">ביטול</button>
|
||||||
|
</div>
|
||||||
|
<div id="progress-area" style="display:none;margin-top:12px">
|
||||||
|
<div class="progress-bar"><div class="fill" style="width:100%"></div></div>
|
||||||
|
<p id="progress-text" style="font-size:0.85em;color:#64748b;margin-top:4px">מעבד...</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Search Panel -->
|
||||||
|
<div class="panel" id="panel-search">
|
||||||
|
<div class="card">
|
||||||
|
<h3>חיפוש סמנטי</h3>
|
||||||
|
<div style="display:flex;gap:8px;margin-bottom:12px">
|
||||||
|
<input type="text" id="search-query" placeholder="הזן שאילתת חיפוש..." style="flex:1">
|
||||||
|
<button class="btn btn-primary" id="btn-search">חפש</button>
|
||||||
|
</div>
|
||||||
|
<div class="form-grid" style="margin-bottom:12px">
|
||||||
|
<div class="form-group">
|
||||||
|
<label>בית משפט</label>
|
||||||
|
<input type="text" id="search-court" placeholder="סינון לפי בית משפט">
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
<label>נושא</label>
|
||||||
|
<input type="text" id="search-topic" placeholder="סינון לפי נושא">
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
<label>תוצאה</label>
|
||||||
|
<select id="search-outcome">
|
||||||
|
<option value="">הכל</option>
|
||||||
|
<option value="accepted">התקבלה</option>
|
||||||
|
<option value="rejected">נדחתה</option>
|
||||||
|
<option value="partial">חלקית</option>
|
||||||
|
<option value="remanded">הוחזרה</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div id="search-results">
|
||||||
|
<div class="empty-state">הזן שאילתה לחיפוש בפסקי הדין</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Browse Panel -->
|
||||||
|
<div class="panel" id="panel-browse">
|
||||||
|
<div class="card">
|
||||||
|
<h3>רשימת פסקי דין</h3>
|
||||||
|
<div id="decisions-list">
|
||||||
|
<div class="empty-state">טוען...</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
// ── State ──
|
||||||
|
let uploadedFilename = null;
|
||||||
|
|
||||||
|
// ── Tabs ──
|
||||||
|
document.querySelectorAll('.tab').forEach(tab => {
|
||||||
|
tab.addEventListener('click', () => {
|
||||||
|
document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
|
||||||
|
document.querySelectorAll('.panel').forEach(p => p.classList.remove('active'));
|
||||||
|
tab.classList.add('active');
|
||||||
|
document.getElementById('panel-' + tab.dataset.tab).classList.add('active');
|
||||||
|
if (tab.dataset.tab === 'browse') loadDecisions();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ── Stats ──
|
||||||
|
async function loadStats() {
|
||||||
|
try {
|
||||||
|
const res = await fetch('/api/stats');
|
||||||
|
const data = await res.json();
|
||||||
|
document.getElementById('stat-decisions').textContent = data.total_decisions || 0;
|
||||||
|
document.getElementById('stat-chunks').textContent = data.total_chunks || 0;
|
||||||
|
document.getElementById('stat-completed').textContent = data.completed_decisions || 0;
|
||||||
|
} catch(e) { console.error('Failed to load stats', e); }
|
||||||
|
}
|
||||||
|
loadStats();
|
||||||
|
|
||||||
|
// ── Upload ──
|
||||||
|
const dropZone = document.getElementById('drop-zone');
|
||||||
|
const fileInput = document.getElementById('file-input');
|
||||||
|
|
||||||
|
dropZone.addEventListener('click', () => fileInput.click());
|
||||||
|
dropZone.addEventListener('dragover', e => { e.preventDefault(); dropZone.classList.add('dragover'); });
|
||||||
|
dropZone.addEventListener('dragleave', () => dropZone.classList.remove('dragover'));
|
||||||
|
dropZone.addEventListener('drop', e => {
|
||||||
|
e.preventDefault(); dropZone.classList.remove('dragover');
|
||||||
|
if (e.dataTransfer.files.length) handleFile(e.dataTransfer.files[0]);
|
||||||
|
});
|
||||||
|
fileInput.addEventListener('change', () => { if (fileInput.files.length) handleFile(fileInput.files[0]); });
|
||||||
|
|
||||||
|
async function handleFile(file) {
|
||||||
|
const formData = new FormData();
|
||||||
|
formData.append('file', file);
|
||||||
|
try {
|
||||||
|
const res = await fetch('/api/upload', { method: 'POST', body: formData });
|
||||||
|
if (!res.ok) throw new Error((await res.json()).detail);
|
||||||
|
const data = await res.json();
|
||||||
|
uploadedFilename = data.filename;
|
||||||
|
document.getElementById('uploaded-file-info').innerHTML =
|
||||||
|
`<strong>${data.original_name}</strong> (${(data.size/1024).toFixed(1)} KB)`;
|
||||||
|
document.getElementById('upload-form').style.display = 'block';
|
||||||
|
dropZone.style.display = 'none';
|
||||||
|
} catch(e) { alert('שגיאה בהעלאה: ' + e.message); }
|
||||||
|
}
|
||||||
|
|
||||||
|
document.getElementById('btn-cancel').addEventListener('click', () => {
|
||||||
|
document.getElementById('upload-form').style.display = 'none';
|
||||||
|
dropZone.style.display = 'block';
|
||||||
|
uploadedFilename = null;
|
||||||
|
});
|
||||||
|
|
||||||
|
document.getElementById('btn-process').addEventListener('click', async () => {
|
||||||
|
if (!uploadedFilename) return;
|
||||||
|
const btn = document.getElementById('btn-process');
|
||||||
|
btn.disabled = true;
|
||||||
|
|
||||||
|
const topics = document.getElementById('inp-topics').value
|
||||||
|
.split(',').map(t => t.trim()).filter(Boolean);
|
||||||
|
|
||||||
|
const body = {
|
||||||
|
filename: uploadedFilename,
|
||||||
|
title: document.getElementById('inp-title').value,
|
||||||
|
case_number: document.getElementById('inp-case-number').value,
|
||||||
|
court: document.getElementById('inp-court').value,
|
||||||
|
decision_date: document.getElementById('inp-date').value,
|
||||||
|
judge: document.getElementById('inp-judge').value,
|
||||||
|
parties_appellant: document.getElementById('inp-appellant').value,
|
||||||
|
parties_respondent: document.getElementById('inp-respondent').value,
|
||||||
|
topics: topics,
|
||||||
|
outcome: document.getElementById('inp-outcome').value,
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
const res = await fetch('/api/decisions', {
|
||||||
|
method: 'POST', headers: {'Content-Type': 'application/json'},
|
||||||
|
body: JSON.stringify(body),
|
||||||
|
});
|
||||||
|
if (!res.ok) throw new Error((await res.json()).detail);
|
||||||
|
const data = await res.json();
|
||||||
|
|
||||||
|
document.getElementById('progress-area').style.display = 'block';
|
||||||
|
|
||||||
|
// Listen to SSE progress
|
||||||
|
const evtSource = new EventSource('/api/progress/' + data.task_id);
|
||||||
|
evtSource.onmessage = (e) => {
|
||||||
|
const prog = JSON.parse(e.data);
|
||||||
|
document.getElementById('progress-text').textContent =
|
||||||
|
prog.status === 'completed' ? 'הושלם!' :
|
||||||
|
prog.status === 'failed' ? 'שגיאה: ' + (prog.error || '') :
|
||||||
|
'מעבד... (' + prog.status + ')';
|
||||||
|
if (prog.status === 'completed' || prog.status === 'failed') {
|
||||||
|
evtSource.close();
|
||||||
|
btn.disabled = false;
|
||||||
|
loadStats();
|
||||||
|
if (prog.status === 'completed') {
|
||||||
|
setTimeout(() => {
|
||||||
|
document.getElementById('upload-form').style.display = 'none';
|
||||||
|
document.getElementById('progress-area').style.display = 'none';
|
||||||
|
dropZone.style.display = 'block';
|
||||||
|
uploadedFilename = null;
|
||||||
|
// Reset form
|
||||||
|
document.querySelectorAll('#upload-form input[type=text], #upload-form input[type=date]').forEach(i => i.value = '');
|
||||||
|
document.getElementById('inp-respondent').value = 'המוסד לביטוח לאומי';
|
||||||
|
document.getElementById('inp-outcome').value = '';
|
||||||
|
}, 2000);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} catch(e) {
|
||||||
|
alert('שגיאה: ' + e.message);
|
||||||
|
btn.disabled = false;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// ── Search ──
|
||||||
|
document.getElementById('btn-search').addEventListener('click', doSearch);
|
||||||
|
document.getElementById('search-query').addEventListener('keypress', e => { if (e.key === 'Enter') doSearch(); });
|
||||||
|
|
||||||
|
async function doSearch() {
|
||||||
|
const q = document.getElementById('search-query').value.trim();
|
||||||
|
if (!q) return;
|
||||||
|
|
||||||
|
const params = new URLSearchParams({ q });
|
||||||
|
const court = document.getElementById('search-court').value;
|
||||||
|
const topic = document.getElementById('search-topic').value;
|
||||||
|
const outcome = document.getElementById('search-outcome').value;
|
||||||
|
if (court) params.append('court', court);
|
||||||
|
if (topic) params.append('topic', topic);
|
||||||
|
if (outcome) params.append('outcome', outcome);
|
||||||
|
|
||||||
|
const container = document.getElementById('search-results');
|
||||||
|
container.innerHTML = '<div class="empty-state">מחפש...</div>';
|
||||||
|
|
||||||
|
try {
|
||||||
|
const res = await fetch('/api/search?' + params);
|
||||||
|
if (!res.ok) throw new Error((await res.json()).detail);
|
||||||
|
const results = await res.json();
|
||||||
|
|
||||||
|
if (!results.length) {
|
||||||
|
container.innerHTML = '<div class="empty-state">לא נמצאו תוצאות</div>';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
container.innerHTML = results.map((r, i) => `
|
||||||
|
<div class="result-item">
|
||||||
|
<span class="result-score">${(r.score * 100).toFixed(1)}%</span>
|
||||||
|
<strong>${r.title || 'ללא כותרת'}</strong>
|
||||||
|
<div class="result-meta">
|
||||||
|
${r.case_number ? r.case_number + ' | ' : ''}
|
||||||
|
${r.court || ''}
|
||||||
|
${r.decision_date ? ' | ' + r.decision_date : ''}
|
||||||
|
${r.judge ? ' | שופט: ' + r.judge : ''}
|
||||||
|
${r.outcome ? ' | ' + outcomeHeb(r.outcome) : ''}
|
||||||
|
</div>
|
||||||
|
<div class="result-content">${escapeHtml(r.content || '').substring(0, 500)}</div>
|
||||||
|
</div>
|
||||||
|
`).join('');
|
||||||
|
} catch(e) {
|
||||||
|
container.innerHTML = `<div class="empty-state">שגיאה: ${e.message}</div>`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Browse ──
|
||||||
|
async function loadDecisions() {
|
||||||
|
const container = document.getElementById('decisions-list');
|
||||||
|
try {
|
||||||
|
const res = await fetch('/api/decisions');
|
||||||
|
const decisions = await res.json();
|
||||||
|
|
||||||
|
if (!decisions.length) {
|
||||||
|
container.innerHTML = '<div class="empty-state">אין פסקי דין במערכת</div>';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
container.innerHTML = `
|
||||||
|
<div class="decision-row decision-header">
|
||||||
|
<span>כותרת</span><span>בית משפט</span><span>תאריך</span><span>תוצאה</span><span></span>
|
||||||
|
</div>
|
||||||
|
${decisions.map(d => `
|
||||||
|
<div class="decision-row">
|
||||||
|
<span>
|
||||||
|
<strong>${d.title || 'ללא כותרת'}</strong>
|
||||||
|
${d.case_number ? '<br><small style="color:#64748b">' + d.case_number + '</small>' : ''}
|
||||||
|
${(d.topics || []).map(t => '<span class="tag">' + t + '</span>').join('')}
|
||||||
|
</span>
|
||||||
|
<span>${d.court || '-'}</span>
|
||||||
|
<span>${d.decision_date || '-'}</span>
|
||||||
|
<span>${d.outcome ? outcomeHeb(d.outcome) : '-'}</span>
|
||||||
|
<span>
|
||||||
|
<span class="status-badge status-${d.extraction_status || 'pending'}">${d.extraction_status || 'pending'}</span>
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
`).join('')}
|
||||||
|
`;
|
||||||
|
} catch(e) {
|
||||||
|
container.innerHTML = `<div class="empty-state">שגיאה: ${e.message}</div>`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Helpers ──
|
||||||
|
function outcomeHeb(o) {
|
||||||
|
return { accepted: 'התקבלה', rejected: 'נדחתה', partial: 'חלקית', remanded: 'הוחזרה' }[o] || o;
|
||||||
|
}
|
||||||
|
function escapeHtml(s) {
|
||||||
|
const d = document.createElement('div'); d.textContent = s; return d.innerHTML;
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
Reference in New Issue
Block a user