Files
din-leumi/web/app.py
Chaim 5c1fdd643f Initial commit: din-leumi MCP server + web app
MCP server with 7 tools for cataloging and searching
National Insurance court decisions with pgvector semantic search.
Web interface for upload, search, and browse.
2026-03-25 15:49:03 +00:00

290 lines
9.0 KiB
Python

"""Din Leumi — Web interface for uploading and searching court decisions."""
from __future__ import annotations
import asyncio
import json
import logging
import re
import shutil
import sys
import time
from contextlib import asynccontextmanager
from datetime import date
from pathlib import Path
from uuid import UUID, uuid4
# Allow importing din_leumi from the MCP server source
sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "mcp-server" / "src"))
from fastapi import FastAPI, File, HTTPException, UploadFile
from fastapi.responses import FileResponse, JSONResponse, StreamingResponse
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel
from din_leumi import config
from din_leumi.services import db, processor
logger = logging.getLogger(__name__)
UPLOAD_DIR = config.DATA_DIR / "uploads"
ALLOWED_EXTENSIONS = {".pdf", ".docx", ".rtf", ".txt"}
MAX_FILE_SIZE = 50 * 1024 * 1024 # 50MB
# In-memory progress tracking
_progress: dict[str, dict] = {}
@asynccontextmanager
async def lifespan(app: FastAPI):
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
config.DECISIONS_DIR.mkdir(parents=True, exist_ok=True)
await db.init_schema()
yield
await db.close_pool()
app = FastAPI(title="Din Leumi — דין לאומי", lifespan=lifespan)
STATIC_DIR = Path(__file__).parent / "static"
# ── Health ─────────────────────────────────────────────────────────
@app.get("/health")
async def health():
return {"status": "ok"}
# ── Pages ──────────────────────────────────────────────────────────
@app.get("/")
async def index():
return FileResponse(STATIC_DIR / "index.html")
# ── Upload API ─────────────────────────────────────────────────────
@app.post("/api/upload")
async def upload_file(file: UploadFile = File(...)):
"""Upload a file to the temporary uploads directory."""
if not file.filename:
raise HTTPException(400, "No filename provided")
ext = Path(file.filename).suffix.lower()
if ext not in ALLOWED_EXTENSIONS:
raise HTTPException(400, f"סוג קובץ לא נתמך: {ext}")
safe_name = re.sub(r"[^\w\u0590-\u05FF\s.\-()]", "", Path(file.filename).stem)
if not safe_name:
safe_name = "document"
timestamp = int(time.time())
filename = f"{timestamp}_{safe_name}{ext}"
content = await file.read()
if len(content) > MAX_FILE_SIZE:
raise HTTPException(400, f"הקובץ גדול מדי. מקסימום: {MAX_FILE_SIZE // (1024*1024)}MB")
dest = UPLOAD_DIR / filename
dest.write_bytes(content)
return {
"filename": filename,
"original_name": file.filename,
"size": len(content),
}
# ── Decision API ───────────────────────────────────────────────────
class DecisionCreateRequest(BaseModel):
filename: str
title: str = ""
court: str = ""
decision_date: str = ""
case_number: str = ""
judge: str = ""
parties_appellant: str = ""
parties_respondent: str = "המוסד לביטוח לאומי"
topics: list[str] = []
outcome: str = ""
@app.post("/api/decisions")
async def create_decision(req: DecisionCreateRequest):
"""Create a decision record and start processing."""
source = UPLOAD_DIR / req.filename
if not source.exists() or not source.parent.samefile(UPLOAD_DIR):
raise HTTPException(404, "קובץ לא נמצא")
# Copy to decisions directory
original_name = re.sub(r"^\d+_", "", source.name)
dest = config.DECISIONS_DIR / original_name
if dest.exists():
stem = dest.stem
dest = config.DECISIONS_DIR / f"{stem}_{int(time.time())}{dest.suffix}"
shutil.copy2(str(source), str(dest))
# Parse date
d_date = None
if req.decision_date:
try:
d_date = date.fromisoformat(req.decision_date)
except ValueError:
raise HTTPException(400, f"פורמט תאריך לא תקין: {req.decision_date}")
title = req.title or original_name.rsplit(".", 1)[0]
# Create DB record
decision = await db.create_decision(
title=title,
file_path=str(dest),
court=req.court,
decision_date=d_date,
case_number=req.case_number,
judge=req.judge,
parties_appellant=req.parties_appellant,
parties_respondent=req.parties_respondent,
topics=req.topics if req.topics else None,
outcome=req.outcome,
)
task_id = str(uuid4())
_progress[task_id] = {"status": "queued", "filename": req.filename}
# Process in background
asyncio.create_task(_process_decision(task_id, decision, source))
return {"task_id": task_id, "decision_id": decision["id"]}
@app.get("/api/decisions")
async def list_decisions(
court: str = "",
topic: str = "",
judge: str = "",
date_from: str = "",
date_to: str = "",
outcome: str = "",
limit: int = 50,
):
"""List decisions with optional filters."""
d_from = date.fromisoformat(date_from) if date_from else None
d_to = date.fromisoformat(date_to) if date_to else None
decisions = await db.list_decisions(
court=court, topic=topic, judge=judge,
date_from=d_from, date_to=d_to,
outcome=outcome, limit=limit,
)
return decisions
@app.get("/api/decisions/{decision_id}")
async def get_decision(decision_id: str):
"""Get a single decision."""
decision = await db.get_decision(UUID(decision_id))
if not decision:
raise HTTPException(404, "פסק דין לא נמצא")
return decision
@app.delete("/api/decisions/{decision_id}")
async def delete_decision(decision_id: str):
"""Delete a decision."""
deleted = await db.delete_decision(UUID(decision_id))
if not deleted:
raise HTTPException(404, "פסק דין לא נמצא")
return {"deleted": decision_id}
# ── Search API ─────────────────────────────────────────────────────
@app.get("/api/search")
async def search_decisions(
q: str = "",
court: str = "",
topic: str = "",
date_from: str = "",
date_to: str = "",
outcome: str = "",
limit: int = 10,
):
"""Semantic search across decisions."""
if not q.strip():
raise HTTPException(400, "נדרש טקסט לחיפוש")
from din_leumi.services import embeddings
query_emb = await embeddings.embed_query(q)
d_from = date.fromisoformat(date_from) if date_from else None
d_to = date.fromisoformat(date_to) if date_to else None
results = await db.search_similar(
query_embedding=query_emb,
limit=limit,
court=court,
topic=topic,
date_from=d_from,
date_to=d_to,
outcome=outcome,
)
# Serialize dates
for r in results:
if r.get("decision_date"):
r["decision_date"] = str(r["decision_date"])
return results
# ── Progress SSE ───────────────────────────────────────────────────
@app.get("/api/progress/{task_id}")
async def progress_stream(task_id: str):
"""SSE stream of processing progress."""
if task_id not in _progress:
raise HTTPException(404, "Task not found")
async def event_stream():
while True:
data = _progress.get(task_id, {})
yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
if data.get("status") in ("completed", "failed"):
break
await asyncio.sleep(1)
await asyncio.sleep(30)
_progress.pop(task_id, None)
return StreamingResponse(event_stream(), media_type="text/event-stream")
@app.get("/api/stats")
async def stats():
"""System statistics."""
return await db.get_stats()
# ── Background Processing ─────────────────────────────────────────
async def _process_decision(task_id: str, decision: dict, source: Path):
"""Process a decision in the background."""
try:
_progress[task_id] = {"status": "processing", "filename": source.name}
result = await processor.process_decision(UUID(decision["id"]))
source.unlink(missing_ok=True)
_progress[task_id] = {
"status": result.get("status", "completed"),
"filename": source.name,
"decision_id": decision["id"],
"result": result,
}
except Exception as e:
logger.exception("Processing failed for %s", source.name)
_progress[task_id] = {
"status": "failed",
"error": str(e),
"filename": source.name,
}