Initial commit: din-leumi MCP server + web app

MCP server with 7 tools for cataloging and searching
National Insurance court decisions with pgvector semantic search.
Web interface for upload, search, and browse.
This commit is contained in:
Chaim
2026-03-25 15:49:03 +00:00
commit 5c1fdd643f
20 changed files with 2190 additions and 0 deletions

9
.gitignore vendored Normal file
View File

@@ -0,0 +1,9 @@
__pycache__/
*.pyc
.venv/
*.egg-info/
dist/
build/
data/uploads/
data/decisions/
.env

14
.mcp.json Normal file
View File

@@ -0,0 +1,14 @@
{
"mcpServers": {
"din-leumi": {
"type": "stdio",
"command": "/home/chaim/din-leumi/mcp-server/.venv/bin/python",
"args": ["-m", "din_leumi.server"],
"cwd": "/home/chaim/din-leumi/mcp-server",
"env": {
"DOTENV_PATH": "/home/chaim/.env",
"DIN_LEUMI_DATA_DIR": "/home/chaim/din-leumi/data"
}
}
}
}

32
CLAUDE.md Normal file
View File

@@ -0,0 +1,32 @@
# דין לאומי (Din Leumi)
מערכת לקטלוג וחיפוש סמנטי של פסקי דין בתחום ביטוח לאומי.
## כלי MCP זמינים
### ניהול פסקי דין
- `decision_upload` - העלאה ועיבוד פסק דין (PDF/DOCX/RTF/TXT) עם מטאדאטא
- `decision_get` - פרטי פסק דין מלאים כולל טקסט מחולץ
- `decision_list` - רשימת פסקי דין עם סינון (בית משפט, נושא, שופט, תאריכים, תוצאה)
- `decision_update` - עדכון מטאדאטא של פסק דין
- `decision_delete` - מחיקת פסק דין
### חיפוש
- `decision_search` - חיפוש סמנטי עם סינון מטאדאטא
### מערכת
- `system_status` - סטטיסטיקות מערכת
## נושאים נפוצים
נכות כללית, נכות מעבודה, תאונת עבודה, דמי לידה, דמי אבטלה, גמלת הבטחת הכנסה, גמלת ניידות, גמלת סיעוד, קצבת זקנה, קצבת שאירים
## תוצאות אפשריות
- `accepted` - התקבלה
- `rejected` - נדחתה
- `partial` - התקבלה חלקית
- `remanded` - הוחזרה לדיון מחדש
## תהליך עבודה
1. העלאת פסק דין: `decision_upload` עם נתיב לקובץ + מטאדאטא
2. חיפוש תקדימים: `decision_search` עם שאילתה + סינון אופציונלי
3. צפייה בפסק דין: `decision_get` לקריאת הטקסט המלא

26
Dockerfile Normal file
View File

@@ -0,0 +1,26 @@
FROM python:3.12-slim
WORKDIR /app
# System deps for PyMuPDF and document processing
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc libmupdf-dev libfreetype6-dev libharfbuzz-dev libjpeg62-turbo-dev \
libopenjp2-7-dev curl && rm -rf /var/lib/apt/lists/*
# Copy MCP server source (for importing services)
COPY mcp-server/pyproject.toml /app/mcp-server/pyproject.toml
COPY mcp-server/src/ /app/mcp-server/src/
# Install MCP server dependencies + web deps
RUN pip install --no-cache-dir /app/mcp-server && \
pip install --no-cache-dir fastapi uvicorn python-multipart
# Copy web app
COPY web/ /app/web/
ENV PYTHONPATH=/app/mcp-server/src
ENV DOTENV_PATH=/home/chaim/.env
EXPOSE 8081
CMD ["uvicorn", "web.app:app", "--host", "0.0.0.0", "--port", "8081"]

25
mcp-server/pyproject.toml Normal file
View File

@@ -0,0 +1,25 @@
[project]
name = "din-leumi"
version = "0.1.0"
description = "MCP server for cataloging and searching National Insurance court decisions"
requires-python = ">=3.10"
dependencies = [
"mcp[cli]>=1.0.0",
"asyncpg>=0.29.0",
"pgvector>=0.3.0",
"voyageai>=0.3.0",
"anthropic>=0.40.0",
"python-dotenv>=1.0.0",
"pydantic>=2.0.0",
"pymupdf>=1.25.0",
"python-docx>=1.1.0",
"striprtf>=0.0.26",
"pillow>=10.0.0",
]
[build-system]
requires = ["setuptools>=68.0"]
build-backend = "setuptools.build_meta"
[tool.setuptools.packages.find]
where = ["src"]

View File

View File

@@ -0,0 +1,4 @@
"""Allow running with: python -m din_leumi"""
from din_leumi.server import main
main()

View File

@@ -0,0 +1,36 @@
"""Configuration loaded from central .env file."""
import os
from pathlib import Path
from dotenv import load_dotenv
# Load from central .env or override path
dotenv_path = os.environ.get("DOTENV_PATH", str(Path.home() / ".env"))
load_dotenv(dotenv_path)
# PostgreSQL - uses shared server but separate database
POSTGRES_URL = os.environ.get(
"DIN_LEUMI_POSTGRES_URL",
f"postgres://{os.environ.get('POSTGRES_USER', 'legal_ai')}:"
f"{os.environ.get('POSTGRES_PASSWORD', '')}@"
f"{os.environ.get('POSTGRES_HOST', '127.0.0.1')}:"
f"{os.environ.get('POSTGRES_PORT', '5433')}/"
f"{os.environ.get('DIN_LEUMI_POSTGRES_DB', 'din_leumi')}",
)
# Voyage AI
VOYAGE_API_KEY = os.environ.get("VOYAGE_API_KEY", "")
VOYAGE_MODEL = "voyage-3-large"
VOYAGE_DIMENSIONS = 1024
# Anthropic (for Claude Vision OCR)
ANTHROPIC_API_KEY = os.environ.get("ANTHROPIC_API_KEY", "")
# Data directory
DATA_DIR = Path(os.environ.get("DIN_LEUMI_DATA_DIR", str(Path.home() / "din-leumi" / "data")))
DECISIONS_DIR = DATA_DIR / "decisions"
# Chunking parameters
CHUNK_SIZE_TOKENS = 600
CHUNK_OVERLAP_TOKENS = 100

View File

@@ -0,0 +1,156 @@
"""Din Leumi - MCP Server entry point.
Run with: python -m din_leumi.server
"""
from __future__ import annotations
import logging
import sys
from collections.abc import AsyncIterator
from contextlib import asynccontextmanager
from mcp.server.fastmcp import FastMCP
# Configure logging to stderr (stdout is reserved for JSON-RPC)
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(name)s] %(levelname)s: %(message)s",
stream=sys.stderr,
)
logger = logging.getLogger("din_leumi")
@asynccontextmanager
async def lifespan(server: FastMCP) -> AsyncIterator[None]:
"""Initialize DB schema on startup, close pool on shutdown."""
from din_leumi.services.db import close_pool, init_schema
logger.info("Initializing database schema...")
await init_schema()
logger.info("Din Leumi MCP server ready")
try:
yield
finally:
await close_pool()
logger.info("Din Leumi MCP server stopped")
# Create MCP server
mcp = FastMCP(
"Din Leumi - דין לאומי",
instructions="מערכת לקטלוג וחיפוש סמנטי של פסקי דין בתחום ביטוח לאומי",
lifespan=lifespan,
)
# ── Import tool modules ────────────────────────────────────────────
from din_leumi.tools import decisions, search # noqa: E402
# ── Decision management ────────────────────────────────────────────
@mcp.tool()
async def decision_upload(
file_path: str,
title: str = "",
court: str = "",
decision_date: str = "",
case_number: str = "",
judge: str = "",
parties_appellant: str = "",
parties_respondent: str = "המוסד לביטוח לאומי",
topics: list[str] | None = None,
outcome: str = "",
) -> str:
"""העלאה ועיבוד פסק דין של בית דין לעבודה בתחום ביטוח לאומי (PDF/DOCX/RTF/TXT).
מחלץ טקסט, יוצר chunks ו-embeddings לחיפוש סמנטי.
outcome: accepted/rejected/partial/remanded."""
return await decisions.decision_upload(
file_path, title, court, decision_date, case_number,
judge, parties_appellant, parties_respondent, topics, outcome,
)
@mcp.tool()
async def decision_search(
query: str,
limit: int = 10,
court: str = "",
topic: str = "",
date_from: str = "",
date_to: str = "",
outcome: str = "",
) -> str:
"""חיפוש סמנטי בפסקי דין של ביטוח לאומי.
ניתן לסנן לפי בית משפט, נושא, טווח תאריכים ותוצאה.
נושאים נפוצים: נכות כללית, נכות מעבודה, דמי לידה, תאונת עבודה, גמלת הבטחת הכנסה, דמי אבטלה."""
return await search.decision_search(
query, limit, court, topic, date_from, date_to, outcome,
)
@mcp.tool()
async def decision_list(
court: str = "",
topic: str = "",
judge: str = "",
date_from: str = "",
date_to: str = "",
outcome: str = "",
limit: int = 50,
) -> str:
"""רשימת פסקי דין לפי מטאדאטא. סינון אופציונלי לפי בית משפט, נושא, שופט, תאריכים, תוצאה."""
return await decisions.decision_list(
court, topic, judge, date_from, date_to, outcome, limit,
)
@mcp.tool()
async def decision_get(
decision_id: str = "",
case_number: str = "",
) -> str:
"""פרטי פסק דין מלאים כולל טקסט מחולץ. חיפוש לפי decision_id או case_number."""
return await decisions.decision_get(decision_id, case_number)
@mcp.tool()
async def decision_update(
decision_id: str,
title: str = "",
court: str = "",
decision_date: str = "",
case_number: str = "",
judge: str = "",
parties_appellant: str = "",
parties_respondent: str = "",
topics: list[str] | None = None,
outcome: str = "",
summary: str = "",
) -> str:
"""עדכון מטאדאטא של פסק דין."""
return await decisions.decision_update(
decision_id, title, court, decision_date, case_number,
judge, parties_appellant, parties_respondent, topics, outcome, summary,
)
@mcp.tool()
async def decision_delete(decision_id: str) -> str:
"""מחיקת פסק דין וכל ה-chunks שלו מהמערכת."""
return await decisions.decision_delete(decision_id)
@mcp.tool()
async def system_status() -> str:
"""סטטוס מערכת דין לאומי - מספר פסקי דין, chunks, סטטיסטיקות."""
return await search.system_status()
def main():
mcp.run(transport="stdio")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,132 @@
"""Legal document chunker - splits text into sections and chunks for RAG.
Adapted for National Insurance (Bituach Leumi) court decisions.
"""
from __future__ import annotations
import re
from dataclasses import dataclass
from din_leumi import config
# Hebrew legal section headers for NI court decisions
SECTION_PATTERNS = [
(r"רקע\s*עובדתי|רקע\s*כללי|העובדות|הרקע|עובדות\s*המקרה", "facts"),
(r"טענות\s*התובע[ת]?|טענות\s*המבוטח[ת]?|טענות\s*המערער[ת]?|עיקר\s*טענות\s*התובע", "claimant_claims"),
(r"טענות\s*הנתבע[ת]?|טענות\s*המוסד|עיקר\s*טענות\s*הנתבע|תשובת\s*המוסד", "respondent_claims"),
(r"דיון\s*והכרעה|דיון|הכרעה|ניתוח\s*משפטי|המסגרת\s*המשפטית|הכרעת\s*הדין", "legal_analysis"),
(r"מסקנ[הות]|סיכום", "conclusion"),
(r"סוף\s*דבר|לסיכום|התוצאה|אשר\s*על\s*כן", "ruling"),
(r"מבוא|פתיחה|לפניי|לפני[נו]?", "intro"),
(r"הדין\s*החל|המסגרת\s*הנורמטיבית|הוראות\s*החוק", "legal_framework"),
]
@dataclass
class Chunk:
content: str
section_type: str = "other"
page_number: int | None = None
chunk_index: int = 0
def chunk_document(
text: str,
chunk_size: int = config.CHUNK_SIZE_TOKENS,
overlap: int = config.CHUNK_OVERLAP_TOKENS,
) -> list[Chunk]:
"""Split a legal document into chunks, respecting section boundaries."""
if not text.strip():
return []
sections = _split_into_sections(text)
chunks: list[Chunk] = []
idx = 0
for section_type, section_text in sections:
section_chunks = _split_section(section_text, chunk_size, overlap)
for chunk_text in section_chunks:
chunks.append(Chunk(
content=chunk_text,
section_type=section_type,
chunk_index=idx,
))
idx += 1
return chunks
def _split_into_sections(text: str) -> list[tuple[str, str]]:
"""Split text into (section_type, text) pairs based on Hebrew headers."""
markers: list[tuple[int, str]] = []
for pattern, section_type in SECTION_PATTERNS:
for match in re.finditer(pattern, text):
markers.append((match.start(), section_type))
if not markers:
return [("other", text)]
markers.sort(key=lambda x: x[0])
sections: list[tuple[str, str]] = []
# Text before first section
if markers[0][0] > 0:
intro_text = text[: markers[0][0]].strip()
if intro_text:
sections.append(("intro", intro_text))
# Each section
for i, (pos, section_type) in enumerate(markers):
end = markers[i + 1][0] if i + 1 < len(markers) else len(text)
section_text = text[pos:end].strip()
if section_text:
sections.append((section_type, section_text))
return sections
def _split_section(text: str, chunk_size: int, overlap: int) -> list[str]:
"""Split a section into overlapping chunks by paragraphs.
Uses approximate token counting (Hebrew ~1.5 chars per token).
"""
if not text.strip():
return []
paragraphs = [p.strip() for p in text.split("\n") if p.strip()]
chunks: list[str] = []
current: list[str] = []
current_tokens = 0
for para in paragraphs:
para_tokens = _estimate_tokens(para)
if current_tokens + para_tokens > chunk_size and current:
chunks.append("\n".join(current))
# Keep overlap
overlap_paras: list[str] = []
overlap_tokens = 0
for p in reversed(current):
pt = _estimate_tokens(p)
if overlap_tokens + pt > overlap:
break
overlap_paras.insert(0, p)
overlap_tokens += pt
current = overlap_paras
current_tokens = overlap_tokens
current.append(para)
current_tokens += para_tokens
if current:
chunks.append("\n".join(current))
return chunks
def _estimate_tokens(text: str) -> int:
"""Rough token estimate for Hebrew text (~1.5 chars per token)."""
return max(1, len(text) // 2)

View File

@@ -0,0 +1,374 @@
"""Database service - asyncpg connection pool and queries for din-leumi."""
from __future__ import annotations
import json
import logging
from datetime import date
from uuid import UUID, uuid4
import asyncpg
from pgvector.asyncpg import register_vector
from din_leumi import config
logger = logging.getLogger(__name__)
_pool: asyncpg.Pool | None = None
async def get_pool() -> asyncpg.Pool:
global _pool
if _pool is None:
conn = await asyncpg.connect(config.POSTGRES_URL)
await conn.execute('CREATE EXTENSION IF NOT EXISTS vector')
await conn.execute('CREATE EXTENSION IF NOT EXISTS "uuid-ossp"')
await conn.close()
_pool = await asyncpg.create_pool(
config.POSTGRES_URL,
min_size=2,
max_size=10,
init=_init_connection,
)
return _pool
async def _init_connection(conn: asyncpg.Connection) -> None:
await register_vector(conn)
async def close_pool() -> None:
global _pool
if _pool:
await _pool.close()
_pool = None
# ── Schema ──────────────────────────────────────────────────────────
SCHEMA_SQL = """
CREATE TABLE IF NOT EXISTS decisions (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
title TEXT NOT NULL,
file_path TEXT NOT NULL,
extracted_text TEXT DEFAULT '',
extraction_status TEXT DEFAULT 'pending',
court TEXT DEFAULT '',
decision_date DATE,
case_number TEXT DEFAULT '',
judge TEXT DEFAULT '',
parties_appellant TEXT DEFAULT '',
parties_respondent TEXT DEFAULT 'המוסד לביטוח לאומי',
topics JSONB DEFAULT '[]',
outcome TEXT DEFAULT '',
summary TEXT DEFAULT '',
page_count INTEGER,
created_at TIMESTAMPTZ DEFAULT now(),
updated_at TIMESTAMPTZ DEFAULT now()
);
CREATE TABLE IF NOT EXISTS decision_chunks (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
decision_id UUID REFERENCES decisions(id) ON DELETE CASCADE,
chunk_index INTEGER NOT NULL,
content TEXT NOT NULL,
section_type TEXT DEFAULT 'other',
embedding vector(1024),
page_number INTEGER,
created_at TIMESTAMPTZ DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_chunks_decision ON decision_chunks(decision_id);
CREATE INDEX IF NOT EXISTS idx_decisions_court ON decisions(court);
CREATE INDEX IF NOT EXISTS idx_decisions_date ON decisions(decision_date);
CREATE INDEX IF NOT EXISTS idx_decisions_case_number ON decisions(case_number);
CREATE INDEX IF NOT EXISTS idx_decisions_topics ON decisions USING gin(topics);
"""
# IVFFlat index requires data to exist; create after first batch of decisions
IVFFLAT_INDEX_SQL = """
CREATE INDEX IF NOT EXISTS idx_chunks_embedding
ON decision_chunks USING ivfflat (embedding vector_cosine_ops)
WITH (lists = 100);
"""
async def init_schema() -> None:
pool = await get_pool()
async with pool.acquire() as conn:
await conn.execute(SCHEMA_SQL)
# Check if we have enough data for IVFFlat
count = await conn.fetchval("SELECT count(*) FROM decision_chunks")
if count and count > 100:
await conn.execute(IVFFLAT_INDEX_SQL)
logger.info("IVFFlat index created (%d chunks)", count)
logger.info("Database schema initialized")
async def ensure_ivfflat_index() -> None:
"""Create IVFFlat index if enough data exists."""
pool = await get_pool()
async with pool.acquire() as conn:
count = await conn.fetchval("SELECT count(*) FROM decision_chunks")
if count and count > 100:
await conn.execute(IVFFLAT_INDEX_SQL)
# ── Decision CRUD ───────────────────────────────────────────────────
async def create_decision(
title: str,
file_path: str,
court: str = "",
decision_date: date | None = None,
case_number: str = "",
judge: str = "",
parties_appellant: str = "",
parties_respondent: str = "המוסד לביטוח לאומי",
topics: list[str] | None = None,
outcome: str = "",
) -> dict:
pool = await get_pool()
decision_id = uuid4()
async with pool.acquire() as conn:
await conn.execute(
"""INSERT INTO decisions (id, title, file_path, court, decision_date,
case_number, judge, parties_appellant, parties_respondent,
topics, outcome)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)""",
decision_id, title, file_path, court, decision_date,
case_number, judge, parties_appellant, parties_respondent,
json.dumps(topics or []), outcome,
)
return await get_decision(decision_id)
async def get_decision(decision_id: UUID) -> dict | None:
pool = await get_pool()
async with pool.acquire() as conn:
row = await conn.fetchrow("SELECT * FROM decisions WHERE id = $1", decision_id)
if row is None:
return None
return _row_to_decision(row)
async def get_decision_by_case_number(case_number: str) -> dict | None:
pool = await get_pool()
async with pool.acquire() as conn:
row = await conn.fetchrow(
"SELECT * FROM decisions WHERE case_number = $1", case_number
)
if row is None:
return None
return _row_to_decision(row)
async def list_decisions(
court: str = "",
topic: str = "",
judge: str = "",
date_from: date | None = None,
date_to: date | None = None,
outcome: str = "",
limit: int = 50,
) -> list[dict]:
pool = await get_pool()
conditions = []
params: list = []
idx = 1
if court:
conditions.append(f"court ILIKE ${idx}")
params.append(f"%{court}%")
idx += 1
if topic:
conditions.append(f"topics @> ${idx}::jsonb")
params.append(json.dumps([topic]))
idx += 1
if judge:
conditions.append(f"judge ILIKE ${idx}")
params.append(f"%{judge}%")
idx += 1
if date_from:
conditions.append(f"decision_date >= ${idx}")
params.append(date_from)
idx += 1
if date_to:
conditions.append(f"decision_date <= ${idx}")
params.append(date_to)
idx += 1
if outcome:
conditions.append(f"outcome = ${idx}")
params.append(outcome)
idx += 1
where = f"WHERE {' AND '.join(conditions)}" if conditions else ""
params.append(limit)
sql = f"""
SELECT id, title, court, decision_date, case_number, judge,
parties_appellant, parties_respondent, topics, outcome, summary,
extraction_status, page_count, created_at
FROM decisions
{where}
ORDER BY decision_date DESC NULLS LAST, created_at DESC
LIMIT ${idx}
"""
async with pool.acquire() as conn:
rows = await conn.fetch(sql, *params)
return [_row_to_decision(r) for r in rows]
async def update_decision(decision_id: UUID, **fields) -> dict | None:
if not fields:
return await get_decision(decision_id)
pool = await get_pool()
set_clauses = []
values = []
for i, (key, val) in enumerate(fields.items(), start=2):
if key == "topics":
val = json.dumps(val)
set_clauses.append(f"{key} = ${i}")
values.append(val)
set_clauses.append("updated_at = now()")
sql = f"UPDATE decisions SET {', '.join(set_clauses)} WHERE id = $1"
async with pool.acquire() as conn:
await conn.execute(sql, decision_id, *values)
return await get_decision(decision_id)
async def delete_decision(decision_id: UUID) -> bool:
pool = await get_pool()
async with pool.acquire() as conn:
result = await conn.execute("DELETE FROM decisions WHERE id = $1", decision_id)
return result == "DELETE 1"
def _row_to_decision(row: asyncpg.Record) -> dict:
d = dict(row)
if isinstance(d.get("topics"), str):
d["topics"] = json.loads(d["topics"])
for field in ("id",):
if field in d and d[field] is not None:
d[field] = str(d[field])
return d
# ── Chunks & Vectors ───────────────────────────────────────────────
async def store_chunks(
decision_id: UUID,
chunks: list[dict],
) -> int:
"""Store decision chunks with embeddings."""
pool = await get_pool()
async with pool.acquire() as conn:
await conn.execute(
"DELETE FROM decision_chunks WHERE decision_id = $1", decision_id
)
for chunk in chunks:
await conn.execute(
"""INSERT INTO decision_chunks
(decision_id, chunk_index, content, section_type, embedding, page_number)
VALUES ($1, $2, $3, $4, $5, $6)""",
decision_id,
chunk["chunk_index"],
chunk["content"],
chunk.get("section_type", "other"),
chunk["embedding"],
chunk.get("page_number"),
)
return len(chunks)
async def search_similar(
query_embedding: list[float],
limit: int = 10,
court: str = "",
topic: str = "",
date_from: date | None = None,
date_to: date | None = None,
outcome: str = "",
) -> list[dict]:
"""Cosine similarity search on decision chunks with metadata filtering."""
pool = await get_pool()
conditions = []
params: list = [query_embedding, limit]
idx = 3
if court:
conditions.append(f"d.court ILIKE ${idx}")
params.append(f"%{court}%")
idx += 1
if topic:
conditions.append(f"d.topics @> ${idx}::jsonb")
params.append(json.dumps([topic]))
idx += 1
if date_from:
conditions.append(f"d.decision_date >= ${idx}")
params.append(date_from)
idx += 1
if date_to:
conditions.append(f"d.decision_date <= ${idx}")
params.append(date_to)
idx += 1
if outcome:
conditions.append(f"d.outcome = ${idx}")
params.append(outcome)
idx += 1
where = f"WHERE {' AND '.join(conditions)}" if conditions else ""
sql = f"""
SELECT dc.content, dc.section_type, dc.page_number,
dc.decision_id,
d.title, d.case_number, d.court, d.decision_date,
d.judge, d.outcome,
1 - (dc.embedding <=> $1) AS score
FROM decision_chunks dc
JOIN decisions d ON d.id = dc.decision_id
{where}
ORDER BY dc.embedding <=> $1
LIMIT $2
"""
async with pool.acquire() as conn:
rows = await conn.fetch(sql, *params)
results = []
for r in rows:
d = dict(r)
if d.get("decision_id"):
d["decision_id"] = str(d["decision_id"])
results.append(d)
return results
# ── Stats ──────────────────────────────────────────────────────────
async def get_stats() -> dict:
pool = await get_pool()
async with pool.acquire() as conn:
total_decisions = await conn.fetchval("SELECT count(*) FROM decisions")
total_chunks = await conn.fetchval("SELECT count(*) FROM decision_chunks")
completed = await conn.fetchval(
"SELECT count(*) FROM decisions WHERE extraction_status = 'completed'"
)
courts = await conn.fetch(
"SELECT court, count(*) as cnt FROM decisions WHERE court != '' GROUP BY court ORDER BY cnt DESC LIMIT 10"
)
date_range = await conn.fetchrow(
"SELECT min(decision_date) as earliest, max(decision_date) as latest FROM decisions WHERE decision_date IS NOT NULL"
)
return {
"total_decisions": total_decisions,
"completed_decisions": completed,
"total_chunks": total_chunks,
"courts": [{"court": r["court"], "count": r["cnt"]} for r in courts],
"date_range": {
"earliest": str(date_range["earliest"]) if date_range and date_range["earliest"] else None,
"latest": str(date_range["latest"]) if date_range and date_range["latest"] else None,
} if date_range else None,
}

View File

@@ -0,0 +1,55 @@
"""Embedding service using Voyage AI API."""
from __future__ import annotations
import logging
import voyageai
from din_leumi import config
logger = logging.getLogger(__name__)
_client: voyageai.Client | None = None
def _get_client() -> voyageai.Client:
global _client
if _client is None:
_client = voyageai.Client(api_key=config.VOYAGE_API_KEY)
return _client
async def embed_texts(texts: list[str], input_type: str = "document") -> list[list[float]]:
"""Embed a batch of texts using Voyage AI.
Args:
texts: List of texts to embed (max 128 per call).
input_type: "document" for indexing, "query" for search queries.
Returns:
List of embedding vectors (1024 dimensions each).
"""
if not texts:
return []
client = _get_client()
all_embeddings = []
# Voyage AI supports up to 128 texts per batch
for i in range(0, len(texts), 128):
batch = texts[i : i + 128]
result = client.embed(
batch,
model=config.VOYAGE_MODEL,
input_type=input_type,
)
all_embeddings.extend(result.embeddings)
return all_embeddings
async def embed_query(query: str) -> list[float]:
"""Embed a single search query."""
results = await embed_texts([query], input_type="query")
return results[0]

View File

@@ -0,0 +1,126 @@
"""Text extraction from PDF, DOCX, and RTF files.
Primary PDF extraction: Claude Vision API (for scanned documents).
Fallback: PyMuPDF direct text extraction (for born-digital PDFs).
"""
from __future__ import annotations
import base64
import logging
from pathlib import Path
import anthropic
import fitz # PyMuPDF
from docx import Document as DocxDocument
from striprtf.striprtf import rtf_to_text
from din_leumi import config
logger = logging.getLogger(__name__)
_anthropic_client: anthropic.Anthropic | None = None
def _get_anthropic() -> anthropic.Anthropic:
global _anthropic_client
if _anthropic_client is None:
_anthropic_client = anthropic.Anthropic(api_key=config.ANTHROPIC_API_KEY)
return _anthropic_client
async def extract_text(file_path: str) -> tuple[str, int]:
"""Extract text from a document file.
Returns:
Tuple of (extracted_text, page_count).
page_count is 0 for non-PDF files.
"""
path = Path(file_path)
suffix = path.suffix.lower()
if suffix == ".pdf":
return await _extract_pdf(path)
elif suffix == ".docx":
return _extract_docx(path), 0
elif suffix == ".rtf":
return _extract_rtf(path), 0
elif suffix == ".txt":
return path.read_text(encoding="utf-8"), 0
else:
raise ValueError(f"Unsupported file type: {suffix}")
async def _extract_pdf(path: Path) -> tuple[str, int]:
"""Extract text from PDF. Try direct text first, fall back to Claude Vision for scanned pages."""
doc = fitz.open(str(path))
page_count = len(doc)
pages_text: list[str] = []
for page_num in range(page_count):
page = doc[page_num]
# Try direct text extraction first
text = page.get_text().strip()
if len(text) > 50:
# Sufficient text found - born-digital page
pages_text.append(text)
logger.debug("Page %d: direct text extraction (%d chars)", page_num + 1, len(text))
else:
# Likely scanned - use Claude Vision
logger.info("Page %d: using Claude Vision OCR", page_num + 1)
pix = page.get_pixmap(dpi=200)
img_bytes = pix.tobytes("png")
ocr_text = await _ocr_with_claude(img_bytes, page_num + 1)
pages_text.append(ocr_text)
doc.close()
return "\n\n".join(pages_text), page_count
async def _ocr_with_claude(image_bytes: bytes, page_num: int) -> str:
"""OCR a single page image using Claude Vision API."""
client = _get_anthropic()
b64_image = base64.b64encode(image_bytes).decode("utf-8")
message = client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=4096,
messages=[
{
"role": "user",
"content": [
{
"type": "image",
"source": {
"type": "base64",
"media_type": "image/png",
"data": b64_image,
},
},
{
"type": "text",
"text": (
"חלץ את כל הטקסט מהתמונה הזו. זהו מסמך משפטי בעברית. "
"שמור על מבנה הפסקאות המקורי. "
"החזר רק את הטקסט המחולץ, ללא הערות נוספות."
),
},
],
}
],
)
return message.content[0].text
def _extract_docx(path: Path) -> str:
"""Extract text from DOCX file."""
doc = DocxDocument(str(path))
paragraphs = [p.text for p in doc.paragraphs if p.text.strip()]
return "\n\n".join(paragraphs)
def _extract_rtf(path: Path) -> str:
"""Extract text from RTF file."""
rtf_content = path.read_text(encoding="utf-8", errors="replace")
return rtf_to_text(rtf_content)

View File

@@ -0,0 +1,82 @@
"""Decision processing pipeline: extract → chunk → embed → store."""
from __future__ import annotations
import logging
from uuid import UUID
from din_leumi.services import chunker, db, embeddings, extractor
logger = logging.getLogger(__name__)
async def process_decision(decision_id: UUID) -> dict:
"""Full processing pipeline for a decision.
1. Extract text from file
2. Split into chunks
3. Generate embeddings
4. Store chunks + embeddings in DB
Returns processing summary.
"""
decision = await db.get_decision(decision_id)
if not decision:
raise ValueError(f"Decision {decision_id} not found")
await db.update_decision(decision_id, extraction_status="processing")
try:
# Step 1: Extract text
logger.info("Extracting text from %s", decision["file_path"])
text, page_count = await extractor.extract_text(decision["file_path"])
await db.update_decision(
decision_id,
extracted_text=text,
page_count=page_count,
)
# Step 2: Chunk
logger.info("Chunking decision (%d chars)", len(text))
chunks = chunker.chunk_document(text)
if not chunks:
await db.update_decision(decision_id, extraction_status="completed")
return {"status": "completed", "chunks": 0, "message": "No text to chunk"}
# Step 3: Embed
logger.info("Generating embeddings for %d chunks", len(chunks))
texts = [c.content for c in chunks]
embs = await embeddings.embed_texts(texts, input_type="document")
# Step 4: Store
chunk_dicts = [
{
"content": c.content,
"section_type": c.section_type,
"embedding": emb,
"page_number": c.page_number,
"chunk_index": c.chunk_index,
}
for c, emb in zip(chunks, embs)
]
stored = await db.store_chunks(decision_id, chunk_dicts)
await db.update_decision(decision_id, extraction_status="completed")
# Try to create IVFFlat index if we have enough data
await db.ensure_ivfflat_index()
logger.info("Decision processed: %d chunks stored", stored)
return {
"status": "completed",
"chunks": stored,
"pages": page_count,
"text_length": len(text),
}
except Exception as e:
logger.exception("Decision processing failed: %s", e)
await db.update_decision(decision_id, extraction_status="failed")
return {"status": "failed", "error": str(e)}

View File

@@ -0,0 +1,241 @@
"""Decision CRUD tool implementations."""
from __future__ import annotations
import json
import shutil
from datetime import date
from pathlib import Path
from uuid import UUID
from din_leumi import config
from din_leumi.services import db, processor
async def decision_upload(
file_path: str,
title: str = "",
court: str = "",
decision_date: str = "",
case_number: str = "",
judge: str = "",
parties_appellant: str = "",
parties_respondent: str = "המוסד לביטוח לאומי",
topics: list[str] | None = None,
outcome: str = "",
) -> str:
"""Upload and process a court decision."""
source = Path(file_path)
if not source.exists():
return f"❌ הקובץ לא נמצא: {file_path}"
ext = source.suffix.lower()
if ext not in (".pdf", ".docx", ".rtf", ".txt"):
return f"❌ סוג קובץ לא נתמך: {ext}"
# Copy to decisions directory
config.DECISIONS_DIR.mkdir(parents=True, exist_ok=True)
dest = config.DECISIONS_DIR / source.name
if dest.exists():
# Add suffix to avoid overwrite
dest = config.DECISIONS_DIR / f"{source.stem}_{UUID.__class__.__name__[:8]}{ext}"
shutil.copy2(str(source), str(dest))
# Parse date
d_date = None
if decision_date:
try:
d_date = date.fromisoformat(decision_date)
except ValueError:
return f"❌ פורמט תאריך לא תקין: {decision_date}. נדרש YYYY-MM-DD"
# Create decision record
if not title:
title = source.stem
decision = await db.create_decision(
title=title,
file_path=str(dest),
court=court,
decision_date=d_date,
case_number=case_number,
judge=judge,
parties_appellant=parties_appellant,
parties_respondent=parties_respondent,
topics=topics,
outcome=outcome,
)
# Process (extract → chunk → embed → store)
result = await processor.process_decision(UUID(decision["id"]))
status_icon = "" if result["status"] == "completed" else ""
lines = [
f"{status_icon} פסק דין הועלה ועובד",
f" כותרת: {title}",
f" מזהה: {decision['id']}",
]
if case_number:
lines.append(f" מספר תיק: {case_number}")
if result.get("chunks"):
lines.append(f" chunks: {result['chunks']}")
if result.get("pages"):
lines.append(f" עמודים: {result['pages']}")
if result.get("error"):
lines.append(f" שגיאה: {result['error']}")
return "\n".join(lines)
async def decision_get(
decision_id: str = "",
case_number: str = "",
) -> str:
"""Get full decision details."""
if not decision_id and not case_number:
return "❌ נדרש decision_id או case_number"
if decision_id:
decision = await db.get_decision(UUID(decision_id))
else:
decision = await db.get_decision_by_case_number(case_number)
if not decision:
return "❌ פסק דין לא נמצא"
lines = [
f"📄 {decision.get('title', '')}",
f" מזהה: {decision['id']}",
]
if decision.get("case_number"):
lines.append(f" מספר תיק: {decision['case_number']}")
if decision.get("court"):
lines.append(f" בית משפט: {decision['court']}")
if decision.get("decision_date"):
lines.append(f" תאריך: {decision['decision_date']}")
if decision.get("judge"):
lines.append(f" שופט: {decision['judge']}")
if decision.get("parties_appellant"):
lines.append(f" תובע/מערער: {decision['parties_appellant']}")
if decision.get("parties_respondent"):
lines.append(f" נתבע/משיב: {decision['parties_respondent']}")
if decision.get("topics"):
topics = decision["topics"]
if isinstance(topics, str):
topics = json.loads(topics)
if topics:
lines.append(f" נושאים: {', '.join(topics)}")
if decision.get("outcome"):
lines.append(f" תוצאה: {decision['outcome']}")
if decision.get("summary"):
lines.append(f" תקציר: {decision['summary']}")
lines.append(f" סטטוס: {decision.get('extraction_status', 'unknown')}")
if decision.get("page_count"):
lines.append(f" עמודים: {decision['page_count']}")
# Include extracted text (truncated)
text = decision.get("extracted_text", "")
if text:
lines.append("")
lines.append("── טקסט מחולץ ──")
if len(text) > 15000:
lines.append(text[:15000])
lines.append(f"\n... (נקטע, סה\"כ {len(text)} תווים)")
else:
lines.append(text)
return "\n".join(lines)
async def decision_list(
court: str = "",
topic: str = "",
judge: str = "",
date_from: str = "",
date_to: str = "",
outcome: str = "",
limit: int = 50,
) -> str:
"""List decisions with optional filters."""
d_from = date.fromisoformat(date_from) if date_from else None
d_to = date.fromisoformat(date_to) if date_to else None
decisions = await db.list_decisions(
court=court, topic=topic, judge=judge,
date_from=d_from, date_to=d_to,
outcome=outcome, limit=limit,
)
if not decisions:
return "לא נמצאו פסקי דין"
lines = [f"נמצאו {len(decisions)} פסקי דין:\n"]
for d in decisions:
parts = [f"{d.get('title', 'ללא כותרת')}"]
if d.get("case_number"):
parts.append(f" [{d['case_number']}]")
if d.get("court"):
parts.append(f" {d['court']}")
if d.get("decision_date"):
parts.append(f" {d['decision_date']}")
if d.get("outcome"):
parts.append(f" ({d['outcome']})")
lines.append(" ".join(parts))
lines.append(f" מזהה: {d['id']}")
return "\n".join(lines)
async def decision_update(
decision_id: str,
title: str = "",
court: str = "",
decision_date: str = "",
case_number: str = "",
judge: str = "",
parties_appellant: str = "",
parties_respondent: str = "",
topics: list[str] | None = None,
outcome: str = "",
summary: str = "",
) -> str:
"""Update decision metadata."""
fields = {}
if title:
fields["title"] = title
if court:
fields["court"] = court
if decision_date:
fields["decision_date"] = date.fromisoformat(decision_date)
if case_number:
fields["case_number"] = case_number
if judge:
fields["judge"] = judge
if parties_appellant:
fields["parties_appellant"] = parties_appellant
if parties_respondent:
fields["parties_respondent"] = parties_respondent
if topics is not None:
fields["topics"] = topics
if outcome:
fields["outcome"] = outcome
if summary:
fields["summary"] = summary
if not fields:
return "❌ לא צוינו שדות לעדכון"
result = await db.update_decision(UUID(decision_id), **fields)
if not result:
return "❌ פסק דין לא נמצא"
return f"✅ פסק דין {decision_id} עודכן ({', '.join(fields.keys())})"
async def decision_delete(decision_id: str) -> str:
"""Delete a decision and all its chunks."""
deleted = await db.delete_decision(UUID(decision_id))
if deleted:
return f"✅ פסק דין {decision_id} נמחק"
return "❌ פסק דין לא נמצא"

View File

@@ -0,0 +1,97 @@
"""Search tool implementations."""
from __future__ import annotations
import json
from datetime import date
from din_leumi.services import db, embeddings
async def decision_search(
query: str,
limit: int = 10,
court: str = "",
topic: str = "",
date_from: str = "",
date_to: str = "",
outcome: str = "",
) -> str:
"""Semantic search across all decisions with optional metadata filters."""
if not query.strip():
return "❌ נדרש טקסט לחיפוש"
# Embed query
query_emb = await embeddings.embed_query(query)
d_from = date.fromisoformat(date_from) if date_from else None
d_to = date.fromisoformat(date_to) if date_to else None
results = await db.search_similar(
query_embedding=query_emb,
limit=limit,
court=court,
topic=topic,
date_from=d_from,
date_to=d_to,
outcome=outcome,
)
if not results:
return "לא נמצאו תוצאות"
lines = [f"🔍 נמצאו {len(results)} תוצאות עבור: \"{query}\"\n"]
for i, r in enumerate(results, 1):
score = r.get("score", 0)
lines.append(f"── תוצאה {i} (ציון: {score:.3f}) ──")
lines.append(f" פסק דין: {r.get('title', '')}")
if r.get("case_number"):
lines.append(f" מספר תיק: {r['case_number']}")
if r.get("court"):
lines.append(f" בית משפט: {r['court']}")
if r.get("decision_date"):
lines.append(f" תאריך: {r['decision_date']}")
if r.get("judge"):
lines.append(f" שופט: {r['judge']}")
if r.get("outcome"):
lines.append(f" תוצאה: {r['outcome']}")
lines.append(f" סוג קטע: {r.get('section_type', 'other')}")
lines.append(f" מזהה: {r.get('decision_id', '')}")
lines.append("")
# Show content snippet
content = r.get("content", "")
if len(content) > 500:
content = content[:500] + "..."
lines.append(f" {content}")
lines.append("")
return "\n".join(lines)
async def system_status() -> str:
"""Get system statistics."""
stats = await db.get_stats()
lines = [
"📊 סטטוס מערכת דין לאומי",
"",
f" סה\"כ פסקי דין: {stats['total_decisions']}",
f" עובדו בהצלחה: {stats['completed_decisions']}",
f" סה\"כ chunks: {stats['total_chunks']}",
]
if stats.get("courts"):
lines.append("")
lines.append(" בתי משפט:")
for c in stats["courts"]:
lines.append(f"{c['court']}: {c['count']}")
if stats.get("date_range"):
dr = stats["date_range"]
if dr.get("earliest"):
lines.append("")
lines.append(f" טווח תאריכים: {dr['earliest']}{dr['latest']}")
return "\n".join(lines)

289
web/app.py Normal file
View File

@@ -0,0 +1,289 @@
"""Din Leumi — Web interface for uploading and searching court decisions."""
from __future__ import annotations
import asyncio
import json
import logging
import re
import shutil
import sys
import time
from contextlib import asynccontextmanager
from datetime import date
from pathlib import Path
from uuid import UUID, uuid4
# Allow importing din_leumi from the MCP server source
sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "mcp-server" / "src"))
from fastapi import FastAPI, File, HTTPException, UploadFile
from fastapi.responses import FileResponse, JSONResponse, StreamingResponse
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel
from din_leumi import config
from din_leumi.services import db, processor
logger = logging.getLogger(__name__)
UPLOAD_DIR = config.DATA_DIR / "uploads"
ALLOWED_EXTENSIONS = {".pdf", ".docx", ".rtf", ".txt"}
MAX_FILE_SIZE = 50 * 1024 * 1024 # 50MB
# In-memory progress tracking
_progress: dict[str, dict] = {}
@asynccontextmanager
async def lifespan(app: FastAPI):
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
config.DECISIONS_DIR.mkdir(parents=True, exist_ok=True)
await db.init_schema()
yield
await db.close_pool()
app = FastAPI(title="Din Leumi — דין לאומי", lifespan=lifespan)
STATIC_DIR = Path(__file__).parent / "static"
# ── Health ─────────────────────────────────────────────────────────
@app.get("/health")
async def health():
return {"status": "ok"}
# ── Pages ──────────────────────────────────────────────────────────
@app.get("/")
async def index():
return FileResponse(STATIC_DIR / "index.html")
# ── Upload API ─────────────────────────────────────────────────────
@app.post("/api/upload")
async def upload_file(file: UploadFile = File(...)):
"""Upload a file to the temporary uploads directory."""
if not file.filename:
raise HTTPException(400, "No filename provided")
ext = Path(file.filename).suffix.lower()
if ext not in ALLOWED_EXTENSIONS:
raise HTTPException(400, f"סוג קובץ לא נתמך: {ext}")
safe_name = re.sub(r"[^\w\u0590-\u05FF\s.\-()]", "", Path(file.filename).stem)
if not safe_name:
safe_name = "document"
timestamp = int(time.time())
filename = f"{timestamp}_{safe_name}{ext}"
content = await file.read()
if len(content) > MAX_FILE_SIZE:
raise HTTPException(400, f"הקובץ גדול מדי. מקסימום: {MAX_FILE_SIZE // (1024*1024)}MB")
dest = UPLOAD_DIR / filename
dest.write_bytes(content)
return {
"filename": filename,
"original_name": file.filename,
"size": len(content),
}
# ── Decision API ───────────────────────────────────────────────────
class DecisionCreateRequest(BaseModel):
filename: str
title: str = ""
court: str = ""
decision_date: str = ""
case_number: str = ""
judge: str = ""
parties_appellant: str = ""
parties_respondent: str = "המוסד לביטוח לאומי"
topics: list[str] = []
outcome: str = ""
@app.post("/api/decisions")
async def create_decision(req: DecisionCreateRequest):
"""Create a decision record and start processing."""
source = UPLOAD_DIR / req.filename
if not source.exists() or not source.parent.samefile(UPLOAD_DIR):
raise HTTPException(404, "קובץ לא נמצא")
# Copy to decisions directory
original_name = re.sub(r"^\d+_", "", source.name)
dest = config.DECISIONS_DIR / original_name
if dest.exists():
stem = dest.stem
dest = config.DECISIONS_DIR / f"{stem}_{int(time.time())}{dest.suffix}"
shutil.copy2(str(source), str(dest))
# Parse date
d_date = None
if req.decision_date:
try:
d_date = date.fromisoformat(req.decision_date)
except ValueError:
raise HTTPException(400, f"פורמט תאריך לא תקין: {req.decision_date}")
title = req.title or original_name.rsplit(".", 1)[0]
# Create DB record
decision = await db.create_decision(
title=title,
file_path=str(dest),
court=req.court,
decision_date=d_date,
case_number=req.case_number,
judge=req.judge,
parties_appellant=req.parties_appellant,
parties_respondent=req.parties_respondent,
topics=req.topics if req.topics else None,
outcome=req.outcome,
)
task_id = str(uuid4())
_progress[task_id] = {"status": "queued", "filename": req.filename}
# Process in background
asyncio.create_task(_process_decision(task_id, decision, source))
return {"task_id": task_id, "decision_id": decision["id"]}
@app.get("/api/decisions")
async def list_decisions(
court: str = "",
topic: str = "",
judge: str = "",
date_from: str = "",
date_to: str = "",
outcome: str = "",
limit: int = 50,
):
"""List decisions with optional filters."""
d_from = date.fromisoformat(date_from) if date_from else None
d_to = date.fromisoformat(date_to) if date_to else None
decisions = await db.list_decisions(
court=court, topic=topic, judge=judge,
date_from=d_from, date_to=d_to,
outcome=outcome, limit=limit,
)
return decisions
@app.get("/api/decisions/{decision_id}")
async def get_decision(decision_id: str):
"""Get a single decision."""
decision = await db.get_decision(UUID(decision_id))
if not decision:
raise HTTPException(404, "פסק דין לא נמצא")
return decision
@app.delete("/api/decisions/{decision_id}")
async def delete_decision(decision_id: str):
"""Delete a decision."""
deleted = await db.delete_decision(UUID(decision_id))
if not deleted:
raise HTTPException(404, "פסק דין לא נמצא")
return {"deleted": decision_id}
# ── Search API ─────────────────────────────────────────────────────
@app.get("/api/search")
async def search_decisions(
q: str = "",
court: str = "",
topic: str = "",
date_from: str = "",
date_to: str = "",
outcome: str = "",
limit: int = 10,
):
"""Semantic search across decisions."""
if not q.strip():
raise HTTPException(400, "נדרש טקסט לחיפוש")
from din_leumi.services import embeddings
query_emb = await embeddings.embed_query(q)
d_from = date.fromisoformat(date_from) if date_from else None
d_to = date.fromisoformat(date_to) if date_to else None
results = await db.search_similar(
query_embedding=query_emb,
limit=limit,
court=court,
topic=topic,
date_from=d_from,
date_to=d_to,
outcome=outcome,
)
# Serialize dates
for r in results:
if r.get("decision_date"):
r["decision_date"] = str(r["decision_date"])
return results
# ── Progress SSE ───────────────────────────────────────────────────
@app.get("/api/progress/{task_id}")
async def progress_stream(task_id: str):
"""SSE stream of processing progress."""
if task_id not in _progress:
raise HTTPException(404, "Task not found")
async def event_stream():
while True:
data = _progress.get(task_id, {})
yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
if data.get("status") in ("completed", "failed"):
break
await asyncio.sleep(1)
await asyncio.sleep(30)
_progress.pop(task_id, None)
return StreamingResponse(event_stream(), media_type="text/event-stream")
@app.get("/api/stats")
async def stats():
"""System statistics."""
return await db.get_stats()
# ── Background Processing ─────────────────────────────────────────
async def _process_decision(task_id: str, decision: dict, source: Path):
"""Process a decision in the background."""
try:
_progress[task_id] = {"status": "processing", "filename": source.name}
result = await processor.process_decision(UUID(decision["id"]))
source.unlink(missing_ok=True)
_progress[task_id] = {
"status": result.get("status", "completed"),
"filename": source.name,
"decision_id": decision["id"],
"result": result,
}
except Exception as e:
logger.exception("Processing failed for %s", source.name)
_progress[task_id] = {
"status": "failed",
"error": str(e),
"filename": source.name,
}

492
web/static/index.html Normal file
View File

@@ -0,0 +1,492 @@
<!DOCTYPE html>
<html lang="he" dir="rtl">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>דין לאומי - קטלוג פסקי דין</title>
<style>
* { box-sizing: border-box; margin: 0; padding: 0; }
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', system-ui, sans-serif;
background: #f5f7fa; color: #1a1a2e; line-height: 1.6;
}
.container { max-width: 1100px; margin: 0 auto; padding: 20px; }
header {
background: linear-gradient(135deg, #1a365d, #2d5086);
color: white; padding: 24px 0; margin-bottom: 24px;
}
header h1 { font-size: 1.8em; }
header p { opacity: 0.8; margin-top: 4px; }
.tabs {
display: flex; gap: 0; margin-bottom: 24px;
border-bottom: 2px solid #e2e8f0;
}
.tab {
padding: 10px 24px; cursor: pointer; border: none;
background: none; font-size: 1em; color: #64748b;
border-bottom: 2px solid transparent; margin-bottom: -2px;
}
.tab.active { color: #1a365d; border-bottom-color: #1a365d; font-weight: 600; }
.tab:hover { color: #1a365d; }
.panel { display: none; }
.panel.active { display: block; }
/* Cards */
.card {
background: white; border-radius: 8px; padding: 20px;
box-shadow: 0 1px 3px rgba(0,0,0,0.1); margin-bottom: 16px;
}
.card h3 { margin-bottom: 12px; color: #1a365d; }
/* Forms */
.form-grid { display: grid; grid-template-columns: 1fr 1fr; gap: 12px; }
.form-group { display: flex; flex-direction: column; gap: 4px; }
.form-group.full { grid-column: 1 / -1; }
label { font-size: 0.85em; font-weight: 600; color: #475569; }
input, select, textarea {
padding: 8px 12px; border: 1px solid #d1d5db; border-radius: 6px;
font-size: 0.95em; font-family: inherit;
}
input:focus, select:focus, textarea:focus {
outline: none; border-color: #2d5086; box-shadow: 0 0 0 2px rgba(45,80,134,0.15);
}
/* Buttons */
.btn {
padding: 10px 20px; border: none; border-radius: 6px;
font-size: 0.95em; cursor: pointer; font-weight: 600;
}
.btn-primary { background: #1a365d; color: white; }
.btn-primary:hover { background: #2d5086; }
.btn-danger { background: #dc2626; color: white; }
.btn-danger:hover { background: #b91c1c; }
.btn:disabled { opacity: 0.6; cursor: not-allowed; }
/* Upload area */
.upload-area {
border: 2px dashed #d1d5db; border-radius: 8px; padding: 40px;
text-align: center; cursor: pointer; transition: all 0.2s;
}
.upload-area:hover, .upload-area.dragover {
border-color: #2d5086; background: #f0f4ff;
}
.upload-area input { display: none; }
/* Results */
.result-item {
border: 1px solid #e2e8f0; border-radius: 6px; padding: 12px;
margin-bottom: 8px;
}
.result-item:hover { border-color: #2d5086; }
.result-score {
display: inline-block; background: #e0e7ff; color: #3730a3;
padding: 2px 8px; border-radius: 4px; font-size: 0.8em; font-weight: 600;
}
.result-meta { font-size: 0.85em; color: #64748b; margin: 4px 0; }
.result-content {
font-size: 0.9em; color: #374151; margin-top: 8px;
background: #f9fafb; padding: 8px; border-radius: 4px;
max-height: 150px; overflow-y: auto;
}
/* Decision list */
.decision-row {
display: grid; grid-template-columns: 2fr 1.5fr 1fr 1fr 80px;
gap: 8px; padding: 10px 12px; align-items: center;
border-bottom: 1px solid #f1f5f9; font-size: 0.9em;
}
.decision-row:hover { background: #f8fafc; }
.decision-header { font-weight: 600; color: #475569; background: #f1f5f9; border-radius: 6px 6px 0 0; }
/* Tags */
.tag {
display: inline-block; background: #e0f2fe; color: #0369a1;
padding: 2px 8px; border-radius: 12px; font-size: 0.8em; margin: 2px;
}
/* Progress */
.progress-bar {
height: 4px; background: #e2e8f0; border-radius: 2px; overflow: hidden;
}
.progress-bar .fill {
height: 100%; background: #2d5086; transition: width 0.3s;
animation: pulse 1.5s infinite;
}
@keyframes pulse { 50% { opacity: 0.6; } }
.status-badge {
display: inline-block; padding: 2px 8px; border-radius: 4px;
font-size: 0.8em; font-weight: 600;
}
.status-completed { background: #d1fae5; color: #065f46; }
.status-processing { background: #fef3c7; color: #92400e; }
.status-failed { background: #fee2e2; color: #991b1b; }
.stats-grid { display: grid; grid-template-columns: repeat(3, 1fr); gap: 12px; margin-bottom: 16px; }
.stat-box {
background: white; border-radius: 8px; padding: 16px;
text-align: center; box-shadow: 0 1px 3px rgba(0,0,0,0.1);
}
.stat-number { font-size: 2em; font-weight: 700; color: #1a365d; }
.stat-label { font-size: 0.85em; color: #64748b; }
.empty-state { text-align: center; padding: 40px; color: #94a3b8; }
</style>
</head>
<body>
<header>
<div class="container">
<h1>דין לאומי</h1>
<p>קטלוג וחיפוש סמנטי של פסקי דין בתחום ביטוח לאומי</p>
</div>
</header>
<div class="container">
<!-- Stats -->
<div class="stats-grid" id="stats-grid">
<div class="stat-box"><div class="stat-number" id="stat-decisions">-</div><div class="stat-label">פסקי דין</div></div>
<div class="stat-box"><div class="stat-number" id="stat-chunks">-</div><div class="stat-label">chunks</div></div>
<div class="stat-box"><div class="stat-number" id="stat-completed">-</div><div class="stat-label">עובדו בהצלחה</div></div>
</div>
<!-- Tabs -->
<div class="tabs">
<button class="tab active" data-tab="upload">העלאה</button>
<button class="tab" data-tab="search">חיפוש</button>
<button class="tab" data-tab="browse">פסקי דין</button>
</div>
<!-- Upload Panel -->
<div class="panel active" id="panel-upload">
<div class="card">
<h3>העלאת פסק דין</h3>
<div class="upload-area" id="drop-zone">
<p>גרור קובץ לכאן או לחץ לבחירה</p>
<p style="font-size:0.85em;color:#94a3b8;margin-top:8px">PDF, DOCX, RTF, TXT (עד 50MB)</p>
<input type="file" id="file-input" accept=".pdf,.docx,.rtf,.txt">
</div>
<div id="upload-form" style="display:none; margin-top:16px">
<div id="uploaded-file-info" style="margin-bottom:12px;padding:8px;background:#f0fdf4;border-radius:6px"></div>
<div class="form-grid">
<div class="form-group full">
<label>כותרת</label>
<input type="text" id="inp-title" placeholder="כותרת תיאורית לפסק הדין">
</div>
<div class="form-group">
<label>מספר תיק</label>
<input type="text" id="inp-case-number" placeholder="בל 12345-06-20">
</div>
<div class="form-group">
<label>בית משפט</label>
<input type="text" id="inp-court" placeholder="בית הדין האזורי לעבודה ת"א">
</div>
<div class="form-group">
<label>תאריך פסק דין</label>
<input type="date" id="inp-date">
</div>
<div class="form-group">
<label>שופט/ת</label>
<input type="text" id="inp-judge">
</div>
<div class="form-group">
<label>תובע/מערער</label>
<input type="text" id="inp-appellant">
</div>
<div class="form-group">
<label>נתבע/משיב</label>
<input type="text" id="inp-respondent" value="המוסד לביטוח לאומי">
</div>
<div class="form-group">
<label>נושאים (מופרדים בפסיקים)</label>
<input type="text" id="inp-topics" placeholder="נכות כללית, תאונת עבודה">
</div>
<div class="form-group">
<label>תוצאה</label>
<select id="inp-outcome">
<option value="">לא צוין</option>
<option value="accepted">התקבלה</option>
<option value="rejected">נדחתה</option>
<option value="partial">התקבלה חלקית</option>
<option value="remanded">הוחזרה לדיון</option>
</select>
</div>
</div>
<div style="margin-top:16px;display:flex;gap:8px">
<button class="btn btn-primary" id="btn-process">העלה ועבד</button>
<button class="btn" id="btn-cancel" style="background:#e2e8f0">ביטול</button>
</div>
<div id="progress-area" style="display:none;margin-top:12px">
<div class="progress-bar"><div class="fill" style="width:100%"></div></div>
<p id="progress-text" style="font-size:0.85em;color:#64748b;margin-top:4px">מעבד...</p>
</div>
</div>
</div>
</div>
<!-- Search Panel -->
<div class="panel" id="panel-search">
<div class="card">
<h3>חיפוש סמנטי</h3>
<div style="display:flex;gap:8px;margin-bottom:12px">
<input type="text" id="search-query" placeholder="הזן שאילתת חיפוש..." style="flex:1">
<button class="btn btn-primary" id="btn-search">חפש</button>
</div>
<div class="form-grid" style="margin-bottom:12px">
<div class="form-group">
<label>בית משפט</label>
<input type="text" id="search-court" placeholder="סינון לפי בית משפט">
</div>
<div class="form-group">
<label>נושא</label>
<input type="text" id="search-topic" placeholder="סינון לפי נושא">
</div>
<div class="form-group">
<label>תוצאה</label>
<select id="search-outcome">
<option value="">הכל</option>
<option value="accepted">התקבלה</option>
<option value="rejected">נדחתה</option>
<option value="partial">חלקית</option>
<option value="remanded">הוחזרה</option>
</select>
</div>
</div>
<div id="search-results">
<div class="empty-state">הזן שאילתה לחיפוש בפסקי הדין</div>
</div>
</div>
</div>
<!-- Browse Panel -->
<div class="panel" id="panel-browse">
<div class="card">
<h3>רשימת פסקי דין</h3>
<div id="decisions-list">
<div class="empty-state">טוען...</div>
</div>
</div>
</div>
</div>
<script>
// ── State ──
let uploadedFilename = null;
// ── Tabs ──
document.querySelectorAll('.tab').forEach(tab => {
tab.addEventListener('click', () => {
document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
document.querySelectorAll('.panel').forEach(p => p.classList.remove('active'));
tab.classList.add('active');
document.getElementById('panel-' + tab.dataset.tab).classList.add('active');
if (tab.dataset.tab === 'browse') loadDecisions();
});
});
// ── Stats ──
async function loadStats() {
try {
const res = await fetch('/api/stats');
const data = await res.json();
document.getElementById('stat-decisions').textContent = data.total_decisions || 0;
document.getElementById('stat-chunks').textContent = data.total_chunks || 0;
document.getElementById('stat-completed').textContent = data.completed_decisions || 0;
} catch(e) { console.error('Failed to load stats', e); }
}
loadStats();
// ── Upload ──
const dropZone = document.getElementById('drop-zone');
const fileInput = document.getElementById('file-input');
dropZone.addEventListener('click', () => fileInput.click());
dropZone.addEventListener('dragover', e => { e.preventDefault(); dropZone.classList.add('dragover'); });
dropZone.addEventListener('dragleave', () => dropZone.classList.remove('dragover'));
dropZone.addEventListener('drop', e => {
e.preventDefault(); dropZone.classList.remove('dragover');
if (e.dataTransfer.files.length) handleFile(e.dataTransfer.files[0]);
});
fileInput.addEventListener('change', () => { if (fileInput.files.length) handleFile(fileInput.files[0]); });
async function handleFile(file) {
const formData = new FormData();
formData.append('file', file);
try {
const res = await fetch('/api/upload', { method: 'POST', body: formData });
if (!res.ok) throw new Error((await res.json()).detail);
const data = await res.json();
uploadedFilename = data.filename;
document.getElementById('uploaded-file-info').innerHTML =
`<strong>${data.original_name}</strong> (${(data.size/1024).toFixed(1)} KB)`;
document.getElementById('upload-form').style.display = 'block';
dropZone.style.display = 'none';
} catch(e) { alert('שגיאה בהעלאה: ' + e.message); }
}
document.getElementById('btn-cancel').addEventListener('click', () => {
document.getElementById('upload-form').style.display = 'none';
dropZone.style.display = 'block';
uploadedFilename = null;
});
document.getElementById('btn-process').addEventListener('click', async () => {
if (!uploadedFilename) return;
const btn = document.getElementById('btn-process');
btn.disabled = true;
const topics = document.getElementById('inp-topics').value
.split(',').map(t => t.trim()).filter(Boolean);
const body = {
filename: uploadedFilename,
title: document.getElementById('inp-title').value,
case_number: document.getElementById('inp-case-number').value,
court: document.getElementById('inp-court').value,
decision_date: document.getElementById('inp-date').value,
judge: document.getElementById('inp-judge').value,
parties_appellant: document.getElementById('inp-appellant').value,
parties_respondent: document.getElementById('inp-respondent').value,
topics: topics,
outcome: document.getElementById('inp-outcome').value,
};
try {
const res = await fetch('/api/decisions', {
method: 'POST', headers: {'Content-Type': 'application/json'},
body: JSON.stringify(body),
});
if (!res.ok) throw new Error((await res.json()).detail);
const data = await res.json();
document.getElementById('progress-area').style.display = 'block';
// Listen to SSE progress
const evtSource = new EventSource('/api/progress/' + data.task_id);
evtSource.onmessage = (e) => {
const prog = JSON.parse(e.data);
document.getElementById('progress-text').textContent =
prog.status === 'completed' ? 'הושלם!' :
prog.status === 'failed' ? 'שגיאה: ' + (prog.error || '') :
'מעבד... (' + prog.status + ')';
if (prog.status === 'completed' || prog.status === 'failed') {
evtSource.close();
btn.disabled = false;
loadStats();
if (prog.status === 'completed') {
setTimeout(() => {
document.getElementById('upload-form').style.display = 'none';
document.getElementById('progress-area').style.display = 'none';
dropZone.style.display = 'block';
uploadedFilename = null;
// Reset form
document.querySelectorAll('#upload-form input[type=text], #upload-form input[type=date]').forEach(i => i.value = '');
document.getElementById('inp-respondent').value = 'המוסד לביטוח לאומי';
document.getElementById('inp-outcome').value = '';
}, 2000);
}
}
};
} catch(e) {
alert('שגיאה: ' + e.message);
btn.disabled = false;
}
});
// ── Search ──
document.getElementById('btn-search').addEventListener('click', doSearch);
document.getElementById('search-query').addEventListener('keypress', e => { if (e.key === 'Enter') doSearch(); });
async function doSearch() {
const q = document.getElementById('search-query').value.trim();
if (!q) return;
const params = new URLSearchParams({ q });
const court = document.getElementById('search-court').value;
const topic = document.getElementById('search-topic').value;
const outcome = document.getElementById('search-outcome').value;
if (court) params.append('court', court);
if (topic) params.append('topic', topic);
if (outcome) params.append('outcome', outcome);
const container = document.getElementById('search-results');
container.innerHTML = '<div class="empty-state">מחפש...</div>';
try {
const res = await fetch('/api/search?' + params);
if (!res.ok) throw new Error((await res.json()).detail);
const results = await res.json();
if (!results.length) {
container.innerHTML = '<div class="empty-state">לא נמצאו תוצאות</div>';
return;
}
container.innerHTML = results.map((r, i) => `
<div class="result-item">
<span class="result-score">${(r.score * 100).toFixed(1)}%</span>
<strong>${r.title || 'ללא כותרת'}</strong>
<div class="result-meta">
${r.case_number ? r.case_number + ' | ' : ''}
${r.court || ''}
${r.decision_date ? ' | ' + r.decision_date : ''}
${r.judge ? ' | שופט: ' + r.judge : ''}
${r.outcome ? ' | ' + outcomeHeb(r.outcome) : ''}
</div>
<div class="result-content">${escapeHtml(r.content || '').substring(0, 500)}</div>
</div>
`).join('');
} catch(e) {
container.innerHTML = `<div class="empty-state">שגיאה: ${e.message}</div>`;
}
}
// ── Browse ──
async function loadDecisions() {
const container = document.getElementById('decisions-list');
try {
const res = await fetch('/api/decisions');
const decisions = await res.json();
if (!decisions.length) {
container.innerHTML = '<div class="empty-state">אין פסקי דין במערכת</div>';
return;
}
container.innerHTML = `
<div class="decision-row decision-header">
<span>כותרת</span><span>בית משפט</span><span>תאריך</span><span>תוצאה</span><span></span>
</div>
${decisions.map(d => `
<div class="decision-row">
<span>
<strong>${d.title || 'ללא כותרת'}</strong>
${d.case_number ? '<br><small style="color:#64748b">' + d.case_number + '</small>' : ''}
${(d.topics || []).map(t => '<span class="tag">' + t + '</span>').join('')}
</span>
<span>${d.court || '-'}</span>
<span>${d.decision_date || '-'}</span>
<span>${d.outcome ? outcomeHeb(d.outcome) : '-'}</span>
<span>
<span class="status-badge status-${d.extraction_status || 'pending'}">${d.extraction_status || 'pending'}</span>
</span>
</div>
`).join('')}
`;
} catch(e) {
container.innerHTML = `<div class="empty-state">שגיאה: ${e.message}</div>`;
}
}
// ── Helpers ──
function outcomeHeb(o) {
return { accepted: 'התקבלה', rejected: 'נדחתה', partial: 'חלקית', remanded: 'הוחזרה' }[o] || o;
}
function escapeHtml(s) {
const d = document.createElement('div'); d.textContent = s; return d.innerHTML;
}
</script>
</body>
</html>