MCP server with 7 tools for cataloging and searching National Insurance court decisions with pgvector semantic search. Web interface for upload, search, and browse.
290 lines
9.0 KiB
Python
290 lines
9.0 KiB
Python
"""Din Leumi — Web interface for uploading and searching court decisions."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import json
|
|
import logging
|
|
import re
|
|
import shutil
|
|
import sys
|
|
import time
|
|
from contextlib import asynccontextmanager
|
|
from datetime import date
|
|
from pathlib import Path
|
|
from uuid import UUID, uuid4
|
|
|
|
# Allow importing din_leumi from the MCP server source
|
|
sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "mcp-server" / "src"))
|
|
|
|
from fastapi import FastAPI, File, HTTPException, UploadFile
|
|
from fastapi.responses import FileResponse, JSONResponse, StreamingResponse
|
|
from fastapi.staticfiles import StaticFiles
|
|
from pydantic import BaseModel
|
|
|
|
from din_leumi import config
|
|
from din_leumi.services import db, processor
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
UPLOAD_DIR = config.DATA_DIR / "uploads"
|
|
ALLOWED_EXTENSIONS = {".pdf", ".docx", ".rtf", ".txt"}
|
|
MAX_FILE_SIZE = 50 * 1024 * 1024 # 50MB
|
|
|
|
# In-memory progress tracking
|
|
_progress: dict[str, dict] = {}
|
|
|
|
|
|
@asynccontextmanager
|
|
async def lifespan(app: FastAPI):
|
|
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
|
|
config.DECISIONS_DIR.mkdir(parents=True, exist_ok=True)
|
|
await db.init_schema()
|
|
yield
|
|
await db.close_pool()
|
|
|
|
|
|
app = FastAPI(title="Din Leumi — דין לאומי", lifespan=lifespan)
|
|
|
|
STATIC_DIR = Path(__file__).parent / "static"
|
|
|
|
|
|
# ── Health ─────────────────────────────────────────────────────────
|
|
|
|
@app.get("/health")
|
|
async def health():
|
|
return {"status": "ok"}
|
|
|
|
|
|
# ── Pages ──────────────────────────────────────────────────────────
|
|
|
|
@app.get("/")
|
|
async def index():
|
|
return FileResponse(STATIC_DIR / "index.html")
|
|
|
|
|
|
# ── Upload API ─────────────────────────────────────────────────────
|
|
|
|
@app.post("/api/upload")
|
|
async def upload_file(file: UploadFile = File(...)):
|
|
"""Upload a file to the temporary uploads directory."""
|
|
if not file.filename:
|
|
raise HTTPException(400, "No filename provided")
|
|
|
|
ext = Path(file.filename).suffix.lower()
|
|
if ext not in ALLOWED_EXTENSIONS:
|
|
raise HTTPException(400, f"סוג קובץ לא נתמך: {ext}")
|
|
|
|
safe_name = re.sub(r"[^\w\u0590-\u05FF\s.\-()]", "", Path(file.filename).stem)
|
|
if not safe_name:
|
|
safe_name = "document"
|
|
timestamp = int(time.time())
|
|
filename = f"{timestamp}_{safe_name}{ext}"
|
|
|
|
content = await file.read()
|
|
if len(content) > MAX_FILE_SIZE:
|
|
raise HTTPException(400, f"הקובץ גדול מדי. מקסימום: {MAX_FILE_SIZE // (1024*1024)}MB")
|
|
|
|
dest = UPLOAD_DIR / filename
|
|
dest.write_bytes(content)
|
|
|
|
return {
|
|
"filename": filename,
|
|
"original_name": file.filename,
|
|
"size": len(content),
|
|
}
|
|
|
|
|
|
# ── Decision API ───────────────────────────────────────────────────
|
|
|
|
class DecisionCreateRequest(BaseModel):
|
|
filename: str
|
|
title: str = ""
|
|
court: str = ""
|
|
decision_date: str = ""
|
|
case_number: str = ""
|
|
judge: str = ""
|
|
parties_appellant: str = ""
|
|
parties_respondent: str = "המוסד לביטוח לאומי"
|
|
topics: list[str] = []
|
|
outcome: str = ""
|
|
|
|
|
|
@app.post("/api/decisions")
|
|
async def create_decision(req: DecisionCreateRequest):
|
|
"""Create a decision record and start processing."""
|
|
source = UPLOAD_DIR / req.filename
|
|
if not source.exists() or not source.parent.samefile(UPLOAD_DIR):
|
|
raise HTTPException(404, "קובץ לא נמצא")
|
|
|
|
# Copy to decisions directory
|
|
original_name = re.sub(r"^\d+_", "", source.name)
|
|
dest = config.DECISIONS_DIR / original_name
|
|
if dest.exists():
|
|
stem = dest.stem
|
|
dest = config.DECISIONS_DIR / f"{stem}_{int(time.time())}{dest.suffix}"
|
|
shutil.copy2(str(source), str(dest))
|
|
|
|
# Parse date
|
|
d_date = None
|
|
if req.decision_date:
|
|
try:
|
|
d_date = date.fromisoformat(req.decision_date)
|
|
except ValueError:
|
|
raise HTTPException(400, f"פורמט תאריך לא תקין: {req.decision_date}")
|
|
|
|
title = req.title or original_name.rsplit(".", 1)[0]
|
|
|
|
# Create DB record
|
|
decision = await db.create_decision(
|
|
title=title,
|
|
file_path=str(dest),
|
|
court=req.court,
|
|
decision_date=d_date,
|
|
case_number=req.case_number,
|
|
judge=req.judge,
|
|
parties_appellant=req.parties_appellant,
|
|
parties_respondent=req.parties_respondent,
|
|
topics=req.topics if req.topics else None,
|
|
outcome=req.outcome,
|
|
)
|
|
|
|
task_id = str(uuid4())
|
|
_progress[task_id] = {"status": "queued", "filename": req.filename}
|
|
|
|
# Process in background
|
|
asyncio.create_task(_process_decision(task_id, decision, source))
|
|
|
|
return {"task_id": task_id, "decision_id": decision["id"]}
|
|
|
|
|
|
@app.get("/api/decisions")
|
|
async def list_decisions(
|
|
court: str = "",
|
|
topic: str = "",
|
|
judge: str = "",
|
|
date_from: str = "",
|
|
date_to: str = "",
|
|
outcome: str = "",
|
|
limit: int = 50,
|
|
):
|
|
"""List decisions with optional filters."""
|
|
d_from = date.fromisoformat(date_from) if date_from else None
|
|
d_to = date.fromisoformat(date_to) if date_to else None
|
|
|
|
decisions = await db.list_decisions(
|
|
court=court, topic=topic, judge=judge,
|
|
date_from=d_from, date_to=d_to,
|
|
outcome=outcome, limit=limit,
|
|
)
|
|
return decisions
|
|
|
|
|
|
@app.get("/api/decisions/{decision_id}")
|
|
async def get_decision(decision_id: str):
|
|
"""Get a single decision."""
|
|
decision = await db.get_decision(UUID(decision_id))
|
|
if not decision:
|
|
raise HTTPException(404, "פסק דין לא נמצא")
|
|
return decision
|
|
|
|
|
|
@app.delete("/api/decisions/{decision_id}")
|
|
async def delete_decision(decision_id: str):
|
|
"""Delete a decision."""
|
|
deleted = await db.delete_decision(UUID(decision_id))
|
|
if not deleted:
|
|
raise HTTPException(404, "פסק דין לא נמצא")
|
|
return {"deleted": decision_id}
|
|
|
|
|
|
# ── Search API ─────────────────────────────────────────────────────
|
|
|
|
@app.get("/api/search")
|
|
async def search_decisions(
|
|
q: str = "",
|
|
court: str = "",
|
|
topic: str = "",
|
|
date_from: str = "",
|
|
date_to: str = "",
|
|
outcome: str = "",
|
|
limit: int = 10,
|
|
):
|
|
"""Semantic search across decisions."""
|
|
if not q.strip():
|
|
raise HTTPException(400, "נדרש טקסט לחיפוש")
|
|
|
|
from din_leumi.services import embeddings
|
|
|
|
query_emb = await embeddings.embed_query(q)
|
|
d_from = date.fromisoformat(date_from) if date_from else None
|
|
d_to = date.fromisoformat(date_to) if date_to else None
|
|
|
|
results = await db.search_similar(
|
|
query_embedding=query_emb,
|
|
limit=limit,
|
|
court=court,
|
|
topic=topic,
|
|
date_from=d_from,
|
|
date_to=d_to,
|
|
outcome=outcome,
|
|
)
|
|
|
|
# Serialize dates
|
|
for r in results:
|
|
if r.get("decision_date"):
|
|
r["decision_date"] = str(r["decision_date"])
|
|
|
|
return results
|
|
|
|
|
|
# ── Progress SSE ───────────────────────────────────────────────────
|
|
|
|
@app.get("/api/progress/{task_id}")
|
|
async def progress_stream(task_id: str):
|
|
"""SSE stream of processing progress."""
|
|
if task_id not in _progress:
|
|
raise HTTPException(404, "Task not found")
|
|
|
|
async def event_stream():
|
|
while True:
|
|
data = _progress.get(task_id, {})
|
|
yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
|
|
if data.get("status") in ("completed", "failed"):
|
|
break
|
|
await asyncio.sleep(1)
|
|
await asyncio.sleep(30)
|
|
_progress.pop(task_id, None)
|
|
|
|
return StreamingResponse(event_stream(), media_type="text/event-stream")
|
|
|
|
|
|
@app.get("/api/stats")
|
|
async def stats():
|
|
"""System statistics."""
|
|
return await db.get_stats()
|
|
|
|
|
|
# ── Background Processing ─────────────────────────────────────────
|
|
|
|
async def _process_decision(task_id: str, decision: dict, source: Path):
|
|
"""Process a decision in the background."""
|
|
try:
|
|
_progress[task_id] = {"status": "processing", "filename": source.name}
|
|
result = await processor.process_decision(UUID(decision["id"]))
|
|
source.unlink(missing_ok=True)
|
|
|
|
_progress[task_id] = {
|
|
"status": result.get("status", "completed"),
|
|
"filename": source.name,
|
|
"decision_id": decision["id"],
|
|
"result": result,
|
|
}
|
|
except Exception as e:
|
|
logger.exception("Processing failed for %s", source.name)
|
|
_progress[task_id] = {
|
|
"status": "failed",
|
|
"error": str(e),
|
|
"filename": source.name,
|
|
}
|