"""Din Leumi — Web interface for uploading and searching court decisions.""" from __future__ import annotations import asyncio import json import logging import re import shutil import sys import time from contextlib import asynccontextmanager from datetime import date from pathlib import Path from uuid import UUID, uuid4 # Allow importing din_leumi from the MCP server source sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "mcp-server" / "src")) from fastapi import FastAPI, File, HTTPException, UploadFile from fastapi.responses import FileResponse, JSONResponse, StreamingResponse from fastapi.staticfiles import StaticFiles from pydantic import BaseModel from din_leumi import config from din_leumi.services import db, processor logger = logging.getLogger(__name__) UPLOAD_DIR = config.DATA_DIR / "uploads" ALLOWED_EXTENSIONS = {".pdf", ".docx", ".rtf", ".txt"} MAX_FILE_SIZE = 50 * 1024 * 1024 # 50MB # In-memory progress tracking _progress: dict[str, dict] = {} @asynccontextmanager async def lifespan(app: FastAPI): UPLOAD_DIR.mkdir(parents=True, exist_ok=True) config.DECISIONS_DIR.mkdir(parents=True, exist_ok=True) await db.init_schema() yield await db.close_pool() app = FastAPI(title="Din Leumi — דין לאומי", lifespan=lifespan) STATIC_DIR = Path(__file__).parent / "static" # ── Health ───────────────────────────────────────────────────────── @app.get("/health") async def health(): return {"status": "ok"} # ── Pages ────────────────────────────────────────────────────────── @app.get("/") async def index(): return FileResponse(STATIC_DIR / "index.html") # ── Upload API ───────────────────────────────────────────────────── @app.post("/api/upload") async def upload_file(file: UploadFile = File(...)): """Upload a file to the temporary uploads directory.""" if not file.filename: raise HTTPException(400, "No filename provided") ext = Path(file.filename).suffix.lower() if ext not in ALLOWED_EXTENSIONS: raise HTTPException(400, f"סוג קובץ לא נתמך: {ext}") safe_name = re.sub(r"[^\w\u0590-\u05FF\s.\-()]", "", Path(file.filename).stem) if not safe_name: safe_name = "document" timestamp = int(time.time()) filename = f"{timestamp}_{safe_name}{ext}" content = await file.read() if len(content) > MAX_FILE_SIZE: raise HTTPException(400, f"הקובץ גדול מדי. מקסימום: {MAX_FILE_SIZE // (1024*1024)}MB") dest = UPLOAD_DIR / filename dest.write_bytes(content) return { "filename": filename, "original_name": file.filename, "size": len(content), } # ── Decision API ─────────────────────────────────────────────────── class DecisionCreateRequest(BaseModel): filename: str title: str = "" court: str = "" decision_date: str = "" case_number: str = "" judge: str = "" parties_appellant: str = "" parties_respondent: str = "המוסד לביטוח לאומי" topics: list[str] = [] outcome: str = "" @app.post("/api/decisions") async def create_decision(req: DecisionCreateRequest): """Create a decision record and start processing.""" source = UPLOAD_DIR / req.filename if not source.exists() or not source.parent.samefile(UPLOAD_DIR): raise HTTPException(404, "קובץ לא נמצא") # Copy to decisions directory original_name = re.sub(r"^\d+_", "", source.name) dest = config.DECISIONS_DIR / original_name if dest.exists(): stem = dest.stem dest = config.DECISIONS_DIR / f"{stem}_{int(time.time())}{dest.suffix}" shutil.copy2(str(source), str(dest)) # Parse date d_date = None if req.decision_date: try: d_date = date.fromisoformat(req.decision_date) except ValueError: raise HTTPException(400, f"פורמט תאריך לא תקין: {req.decision_date}") title = req.title or original_name.rsplit(".", 1)[0] # Create DB record decision = await db.create_decision( title=title, file_path=str(dest), court=req.court, decision_date=d_date, case_number=req.case_number, judge=req.judge, parties_appellant=req.parties_appellant, parties_respondent=req.parties_respondent, topics=req.topics if req.topics else None, outcome=req.outcome, ) task_id = str(uuid4()) _progress[task_id] = {"status": "queued", "filename": req.filename} # Process in background asyncio.create_task(_process_decision(task_id, decision, source)) return {"task_id": task_id, "decision_id": decision["id"]} @app.get("/api/decisions") async def list_decisions( court: str = "", topic: str = "", judge: str = "", date_from: str = "", date_to: str = "", outcome: str = "", limit: int = 50, ): """List decisions with optional filters.""" d_from = date.fromisoformat(date_from) if date_from else None d_to = date.fromisoformat(date_to) if date_to else None decisions = await db.list_decisions( court=court, topic=topic, judge=judge, date_from=d_from, date_to=d_to, outcome=outcome, limit=limit, ) return decisions @app.get("/api/decisions/{decision_id}") async def get_decision(decision_id: str): """Get a single decision.""" decision = await db.get_decision(UUID(decision_id)) if not decision: raise HTTPException(404, "פסק דין לא נמצא") return decision @app.delete("/api/decisions/{decision_id}") async def delete_decision(decision_id: str): """Delete a decision.""" deleted = await db.delete_decision(UUID(decision_id)) if not deleted: raise HTTPException(404, "פסק דין לא נמצא") return {"deleted": decision_id} # ── Search API ───────────────────────────────────────────────────── @app.get("/api/search") async def search_decisions( q: str = "", court: str = "", topic: str = "", date_from: str = "", date_to: str = "", outcome: str = "", limit: int = 10, ): """Semantic search across decisions.""" if not q.strip(): raise HTTPException(400, "נדרש טקסט לחיפוש") from din_leumi.services import embeddings query_emb = await embeddings.embed_query(q) d_from = date.fromisoformat(date_from) if date_from else None d_to = date.fromisoformat(date_to) if date_to else None results = await db.search_similar( query_embedding=query_emb, limit=limit, court=court, topic=topic, date_from=d_from, date_to=d_to, outcome=outcome, ) # Serialize dates for r in results: if r.get("decision_date"): r["decision_date"] = str(r["decision_date"]) return results # ── Progress SSE ─────────────────────────────────────────────────── @app.get("/api/progress/{task_id}") async def progress_stream(task_id: str): """SSE stream of processing progress.""" if task_id not in _progress: raise HTTPException(404, "Task not found") async def event_stream(): while True: data = _progress.get(task_id, {}) yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n" if data.get("status") in ("completed", "failed"): break await asyncio.sleep(1) await asyncio.sleep(30) _progress.pop(task_id, None) return StreamingResponse(event_stream(), media_type="text/event-stream") @app.get("/api/stats") async def stats(): """System statistics.""" return await db.get_stats() # ── Background Processing ───────────────────────────────────────── async def _process_decision(task_id: str, decision: dict, source: Path): """Process a decision in the background.""" try: _progress[task_id] = {"status": "processing", "filename": source.name} result = await processor.process_decision(UUID(decision["id"])) source.unlink(missing_ok=True) _progress[task_id] = { "status": result.get("status", "completed"), "filename": source.name, "decision_id": decision["id"], "result": result, } except Exception as e: logger.exception("Processing failed for %s", source.name) _progress[task_id] = { "status": "failed", "error": str(e), "filename": source.name, }