Initial commit: din-leumi MCP server + web app
MCP server with 7 tools for cataloging and searching National Insurance court decisions with pgvector semantic search. Web interface for upload, search, and browse.
This commit is contained in:
289
web/app.py
Normal file
289
web/app.py
Normal file
@@ -0,0 +1,289 @@
|
||||
"""Din Leumi — Web interface for uploading and searching court decisions."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import shutil
|
||||
import sys
|
||||
import time
|
||||
from contextlib import asynccontextmanager
|
||||
from datetime import date
|
||||
from pathlib import Path
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
# Allow importing din_leumi from the MCP server source
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "mcp-server" / "src"))
|
||||
|
||||
from fastapi import FastAPI, File, HTTPException, UploadFile
|
||||
from fastapi.responses import FileResponse, JSONResponse, StreamingResponse
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from pydantic import BaseModel
|
||||
|
||||
from din_leumi import config
|
||||
from din_leumi.services import db, processor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
UPLOAD_DIR = config.DATA_DIR / "uploads"
|
||||
ALLOWED_EXTENSIONS = {".pdf", ".docx", ".rtf", ".txt"}
|
||||
MAX_FILE_SIZE = 50 * 1024 * 1024 # 50MB
|
||||
|
||||
# In-memory progress tracking
|
||||
_progress: dict[str, dict] = {}
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
|
||||
config.DECISIONS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
await db.init_schema()
|
||||
yield
|
||||
await db.close_pool()
|
||||
|
||||
|
||||
app = FastAPI(title="Din Leumi — דין לאומי", lifespan=lifespan)
|
||||
|
||||
STATIC_DIR = Path(__file__).parent / "static"
|
||||
|
||||
|
||||
# ── Health ─────────────────────────────────────────────────────────
|
||||
|
||||
@app.get("/health")
|
||||
async def health():
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
# ── Pages ──────────────────────────────────────────────────────────
|
||||
|
||||
@app.get("/")
|
||||
async def index():
|
||||
return FileResponse(STATIC_DIR / "index.html")
|
||||
|
||||
|
||||
# ── Upload API ─────────────────────────────────────────────────────
|
||||
|
||||
@app.post("/api/upload")
|
||||
async def upload_file(file: UploadFile = File(...)):
|
||||
"""Upload a file to the temporary uploads directory."""
|
||||
if not file.filename:
|
||||
raise HTTPException(400, "No filename provided")
|
||||
|
||||
ext = Path(file.filename).suffix.lower()
|
||||
if ext not in ALLOWED_EXTENSIONS:
|
||||
raise HTTPException(400, f"סוג קובץ לא נתמך: {ext}")
|
||||
|
||||
safe_name = re.sub(r"[^\w\u0590-\u05FF\s.\-()]", "", Path(file.filename).stem)
|
||||
if not safe_name:
|
||||
safe_name = "document"
|
||||
timestamp = int(time.time())
|
||||
filename = f"{timestamp}_{safe_name}{ext}"
|
||||
|
||||
content = await file.read()
|
||||
if len(content) > MAX_FILE_SIZE:
|
||||
raise HTTPException(400, f"הקובץ גדול מדי. מקסימום: {MAX_FILE_SIZE // (1024*1024)}MB")
|
||||
|
||||
dest = UPLOAD_DIR / filename
|
||||
dest.write_bytes(content)
|
||||
|
||||
return {
|
||||
"filename": filename,
|
||||
"original_name": file.filename,
|
||||
"size": len(content),
|
||||
}
|
||||
|
||||
|
||||
# ── Decision API ───────────────────────────────────────────────────
|
||||
|
||||
class DecisionCreateRequest(BaseModel):
|
||||
filename: str
|
||||
title: str = ""
|
||||
court: str = ""
|
||||
decision_date: str = ""
|
||||
case_number: str = ""
|
||||
judge: str = ""
|
||||
parties_appellant: str = ""
|
||||
parties_respondent: str = "המוסד לביטוח לאומי"
|
||||
topics: list[str] = []
|
||||
outcome: str = ""
|
||||
|
||||
|
||||
@app.post("/api/decisions")
|
||||
async def create_decision(req: DecisionCreateRequest):
|
||||
"""Create a decision record and start processing."""
|
||||
source = UPLOAD_DIR / req.filename
|
||||
if not source.exists() or not source.parent.samefile(UPLOAD_DIR):
|
||||
raise HTTPException(404, "קובץ לא נמצא")
|
||||
|
||||
# Copy to decisions directory
|
||||
original_name = re.sub(r"^\d+_", "", source.name)
|
||||
dest = config.DECISIONS_DIR / original_name
|
||||
if dest.exists():
|
||||
stem = dest.stem
|
||||
dest = config.DECISIONS_DIR / f"{stem}_{int(time.time())}{dest.suffix}"
|
||||
shutil.copy2(str(source), str(dest))
|
||||
|
||||
# Parse date
|
||||
d_date = None
|
||||
if req.decision_date:
|
||||
try:
|
||||
d_date = date.fromisoformat(req.decision_date)
|
||||
except ValueError:
|
||||
raise HTTPException(400, f"פורמט תאריך לא תקין: {req.decision_date}")
|
||||
|
||||
title = req.title or original_name.rsplit(".", 1)[0]
|
||||
|
||||
# Create DB record
|
||||
decision = await db.create_decision(
|
||||
title=title,
|
||||
file_path=str(dest),
|
||||
court=req.court,
|
||||
decision_date=d_date,
|
||||
case_number=req.case_number,
|
||||
judge=req.judge,
|
||||
parties_appellant=req.parties_appellant,
|
||||
parties_respondent=req.parties_respondent,
|
||||
topics=req.topics if req.topics else None,
|
||||
outcome=req.outcome,
|
||||
)
|
||||
|
||||
task_id = str(uuid4())
|
||||
_progress[task_id] = {"status": "queued", "filename": req.filename}
|
||||
|
||||
# Process in background
|
||||
asyncio.create_task(_process_decision(task_id, decision, source))
|
||||
|
||||
return {"task_id": task_id, "decision_id": decision["id"]}
|
||||
|
||||
|
||||
@app.get("/api/decisions")
|
||||
async def list_decisions(
|
||||
court: str = "",
|
||||
topic: str = "",
|
||||
judge: str = "",
|
||||
date_from: str = "",
|
||||
date_to: str = "",
|
||||
outcome: str = "",
|
||||
limit: int = 50,
|
||||
):
|
||||
"""List decisions with optional filters."""
|
||||
d_from = date.fromisoformat(date_from) if date_from else None
|
||||
d_to = date.fromisoformat(date_to) if date_to else None
|
||||
|
||||
decisions = await db.list_decisions(
|
||||
court=court, topic=topic, judge=judge,
|
||||
date_from=d_from, date_to=d_to,
|
||||
outcome=outcome, limit=limit,
|
||||
)
|
||||
return decisions
|
||||
|
||||
|
||||
@app.get("/api/decisions/{decision_id}")
|
||||
async def get_decision(decision_id: str):
|
||||
"""Get a single decision."""
|
||||
decision = await db.get_decision(UUID(decision_id))
|
||||
if not decision:
|
||||
raise HTTPException(404, "פסק דין לא נמצא")
|
||||
return decision
|
||||
|
||||
|
||||
@app.delete("/api/decisions/{decision_id}")
|
||||
async def delete_decision(decision_id: str):
|
||||
"""Delete a decision."""
|
||||
deleted = await db.delete_decision(UUID(decision_id))
|
||||
if not deleted:
|
||||
raise HTTPException(404, "פסק דין לא נמצא")
|
||||
return {"deleted": decision_id}
|
||||
|
||||
|
||||
# ── Search API ─────────────────────────────────────────────────────
|
||||
|
||||
@app.get("/api/search")
|
||||
async def search_decisions(
|
||||
q: str = "",
|
||||
court: str = "",
|
||||
topic: str = "",
|
||||
date_from: str = "",
|
||||
date_to: str = "",
|
||||
outcome: str = "",
|
||||
limit: int = 10,
|
||||
):
|
||||
"""Semantic search across decisions."""
|
||||
if not q.strip():
|
||||
raise HTTPException(400, "נדרש טקסט לחיפוש")
|
||||
|
||||
from din_leumi.services import embeddings
|
||||
|
||||
query_emb = await embeddings.embed_query(q)
|
||||
d_from = date.fromisoformat(date_from) if date_from else None
|
||||
d_to = date.fromisoformat(date_to) if date_to else None
|
||||
|
||||
results = await db.search_similar(
|
||||
query_embedding=query_emb,
|
||||
limit=limit,
|
||||
court=court,
|
||||
topic=topic,
|
||||
date_from=d_from,
|
||||
date_to=d_to,
|
||||
outcome=outcome,
|
||||
)
|
||||
|
||||
# Serialize dates
|
||||
for r in results:
|
||||
if r.get("decision_date"):
|
||||
r["decision_date"] = str(r["decision_date"])
|
||||
|
||||
return results
|
||||
|
||||
|
||||
# ── Progress SSE ───────────────────────────────────────────────────
|
||||
|
||||
@app.get("/api/progress/{task_id}")
|
||||
async def progress_stream(task_id: str):
|
||||
"""SSE stream of processing progress."""
|
||||
if task_id not in _progress:
|
||||
raise HTTPException(404, "Task not found")
|
||||
|
||||
async def event_stream():
|
||||
while True:
|
||||
data = _progress.get(task_id, {})
|
||||
yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
|
||||
if data.get("status") in ("completed", "failed"):
|
||||
break
|
||||
await asyncio.sleep(1)
|
||||
await asyncio.sleep(30)
|
||||
_progress.pop(task_id, None)
|
||||
|
||||
return StreamingResponse(event_stream(), media_type="text/event-stream")
|
||||
|
||||
|
||||
@app.get("/api/stats")
|
||||
async def stats():
|
||||
"""System statistics."""
|
||||
return await db.get_stats()
|
||||
|
||||
|
||||
# ── Background Processing ─────────────────────────────────────────
|
||||
|
||||
async def _process_decision(task_id: str, decision: dict, source: Path):
|
||||
"""Process a decision in the background."""
|
||||
try:
|
||||
_progress[task_id] = {"status": "processing", "filename": source.name}
|
||||
result = await processor.process_decision(UUID(decision["id"]))
|
||||
source.unlink(missing_ok=True)
|
||||
|
||||
_progress[task_id] = {
|
||||
"status": result.get("status", "completed"),
|
||||
"filename": source.name,
|
||||
"decision_id": decision["id"],
|
||||
"result": result,
|
||||
}
|
||||
except Exception as e:
|
||||
logger.exception("Processing failed for %s", source.name)
|
||||
_progress[task_id] = {
|
||||
"status": "failed",
|
||||
"error": str(e),
|
||||
"filename": source.name,
|
||||
}
|
||||
Reference in New Issue
Block a user