Flatten cases directory structure and unify paths

- Remove cases/new|in-progress|completed subdivision (status managed in DB) - Rename documents/original → documents/originals (consistent plural) - Move exports from global data/exports/ into cases/{num}/exports/ - Add documents/research/ for case law and analysis files - Update all agents, scripts, config, web API endpoints, and DB paths Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-09 14:33:27 +00:00
parent 4d674bf475
commit 22e819363e
17 changed files with 1203 additions and 62 deletions
--- a/web/app.py
+++ b/web/app.py
@@ -207,6 +207,10 @@ async def list_cases(detail: bool = False):
            doc_count = await conn.fetchval(
                "SELECT count(*) FROM documents WHERE case_id = $1", case_id
            )
+            processing_count = await conn.fetchval(
+                "SELECT count(*) FROM documents WHERE case_id = $1 AND extraction_status != 'completed'",
+                case_id,
+            )
            result.append({
                "case_number": c["case_number"],
                "title": c["title"],
@@ -215,6 +219,7 @@ async def list_cases(detail: bool = False):
                "committee_type": c.get("committee_type", ""),
                "hearing_date": str(c["hearing_date"]) if c.get("hearing_date") else "",
                "document_count": doc_count,
+                "processing_count": processing_count,
                "gitea_url": f"https://gitea.nautilus.marcusgroup.org/cases/{c['case_number']}",
            })
    return result
@@ -566,7 +571,7 @@ async def api_learn(case_number: str):
@app.get("/api/cases/{case_number}/exports")
 async def api_list_exports(case_number: str):
    """List all exported drafts and versions for a case."""
-    export_dir = config.EXPORTS_DIR / case_number
+    export_dir = config.find_case_dir(case_number) / "exports"
    if not export_dir.exists():
        return []
    files = []
@@ -585,7 +590,7 @@ async def api_list_exports(case_number: str):
@app.get("/api/cases/{case_number}/exports/{filename}/download")
 async def api_download_export(case_number: str, filename: str):
    """Download an exported file."""
-    export_dir = config.EXPORTS_DIR / case_number
+    export_dir = config.find_case_dir(case_number) / "exports"
    path = export_dir / filename
    if not path.exists() or not path.parent.samefile(export_dir):
        raise HTTPException(404, "קובץ לא נמצא")
@@ -614,7 +619,7 @@ async def api_upload_export(case_number: str, file: UploadFile = File(...)):
    if len(content) > MAX_FILE_SIZE:
        raise HTTPException(400, f"קובץ גדול מדי. מקסימום: {MAX_FILE_SIZE // (1024*1024)}MB")

-    export_dir = config.EXPORTS_DIR / case_number
+    export_dir = config.find_case_dir(case_number) / "exports"
    export_dir.mkdir(parents=True, exist_ok=True)

    # Version numbering for uploads
@@ -644,7 +649,7 @@ async def api_mark_final(case_number: str, filename: str):
    if not case:
        raise HTTPException(404, f"תיק {case_number} לא נמצא")

-    export_dir = config.EXPORTS_DIR / case_number
+    export_dir = config.find_case_dir(case_number) / "exports"
    source = export_dir / filename
    if not source.exists() or not source.parent.samefile(export_dir):
        raise HTTPException(404, "קובץ לא נמצא")
@@ -1142,7 +1147,7 @@ async def api_upload_tagged_document(
        new_filename = generate_doc_filename(doc_type, case_number, party_name, ext)

    # Save to case directory
-    case_dir = config.find_case_dir(case_number) / "documents"
+    case_dir = config.find_case_dir(case_number) / "documents" / "originals"
    case_dir.mkdir(parents=True, exist_ok=True)
    dest = case_dir / new_filename

@@ -1216,6 +1221,29 @@ async def _process_tagged_document(task_id: str, dest: Path, case_number: str, c
        _progress[task_id] = {"status": "failed", "error": str(e), "filename": display_name}


+@app.post("/api/cases/{case_number}/documents/{doc_id}/reprocess")
+async def api_reprocess_document(case_number: str, doc_id: str):
+    """Reprocess a failed document."""
+    case = await db.get_case_by_number(case_number)
+    if not case:
+        raise HTTPException(404, f"תיק {case_number} לא נמצא")
+
+    case_id = UUID(case["id"])
+    document_id = UUID(doc_id)
+    doc = await db.get_document(document_id)
+    if not doc or UUID(doc["case_id"]) != case_id:
+        raise HTTPException(404, "מסמך לא נמצא בתיק")
+
+    # Reset status and clean old chunks
+    await db.update_document(document_id, extraction_status="pending")
+    await db.delete_document_chunks(document_id)
+
+    # Process in background
+    asyncio.create_task(processor.process_document(document_id, case_id))
+
+    return {"status": "reprocessing"}
+
+
 # ── Background Processing ─────────────────────────────────────────


@@ -1245,7 +1273,7 @@ async def _process_case_document(task_id: str, source: Path, req: ClassifyReques

    # Copy to case directory
    _progress[task_id] = {"status": "copying", "filename": req.filename}
-    case_dir = config.find_case_dir(req.case_number) / "documents"
+    case_dir = config.find_case_dir(req.case_number) / "documents" / "originals"
    case_dir.mkdir(parents=True, exist_ok=True)
    # Use original name without timestamp prefix
    original_name = re.sub(r"^\d+_", "", source.name)