Flatten cases directory structure and unify paths

- Remove cases/new|in-progress|completed subdivision (status managed in DB)
- Rename documents/original → documents/originals (consistent plural)
- Move exports from global data/exports/ into cases/{num}/exports/
- Add documents/research/ for case law and analysis files
- Update all agents, scripts, config, web API endpoints, and DB paths

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-09 14:33:27 +00:00
parent 4d674bf475
commit 22e819363e
17 changed files with 1203 additions and 62 deletions

View File

@@ -207,6 +207,10 @@ async def list_cases(detail: bool = False):
doc_count = await conn.fetchval(
"SELECT count(*) FROM documents WHERE case_id = $1", case_id
)
processing_count = await conn.fetchval(
"SELECT count(*) FROM documents WHERE case_id = $1 AND extraction_status != 'completed'",
case_id,
)
result.append({
"case_number": c["case_number"],
"title": c["title"],
@@ -215,6 +219,7 @@ async def list_cases(detail: bool = False):
"committee_type": c.get("committee_type", ""),
"hearing_date": str(c["hearing_date"]) if c.get("hearing_date") else "",
"document_count": doc_count,
"processing_count": processing_count,
"gitea_url": f"https://gitea.nautilus.marcusgroup.org/cases/{c['case_number']}",
})
return result
@@ -566,7 +571,7 @@ async def api_learn(case_number: str):
@app.get("/api/cases/{case_number}/exports")
async def api_list_exports(case_number: str):
"""List all exported drafts and versions for a case."""
export_dir = config.EXPORTS_DIR / case_number
export_dir = config.find_case_dir(case_number) / "exports"
if not export_dir.exists():
return []
files = []
@@ -585,7 +590,7 @@ async def api_list_exports(case_number: str):
@app.get("/api/cases/{case_number}/exports/{filename}/download")
async def api_download_export(case_number: str, filename: str):
"""Download an exported file."""
export_dir = config.EXPORTS_DIR / case_number
export_dir = config.find_case_dir(case_number) / "exports"
path = export_dir / filename
if not path.exists() or not path.parent.samefile(export_dir):
raise HTTPException(404, "קובץ לא נמצא")
@@ -614,7 +619,7 @@ async def api_upload_export(case_number: str, file: UploadFile = File(...)):
if len(content) > MAX_FILE_SIZE:
raise HTTPException(400, f"קובץ גדול מדי. מקסימום: {MAX_FILE_SIZE // (1024*1024)}MB")
export_dir = config.EXPORTS_DIR / case_number
export_dir = config.find_case_dir(case_number) / "exports"
export_dir.mkdir(parents=True, exist_ok=True)
# Version numbering for uploads
@@ -644,7 +649,7 @@ async def api_mark_final(case_number: str, filename: str):
if not case:
raise HTTPException(404, f"תיק {case_number} לא נמצא")
export_dir = config.EXPORTS_DIR / case_number
export_dir = config.find_case_dir(case_number) / "exports"
source = export_dir / filename
if not source.exists() or not source.parent.samefile(export_dir):
raise HTTPException(404, "קובץ לא נמצא")
@@ -1142,7 +1147,7 @@ async def api_upload_tagged_document(
new_filename = generate_doc_filename(doc_type, case_number, party_name, ext)
# Save to case directory
case_dir = config.find_case_dir(case_number) / "documents"
case_dir = config.find_case_dir(case_number) / "documents" / "originals"
case_dir.mkdir(parents=True, exist_ok=True)
dest = case_dir / new_filename
@@ -1216,6 +1221,29 @@ async def _process_tagged_document(task_id: str, dest: Path, case_number: str, c
_progress[task_id] = {"status": "failed", "error": str(e), "filename": display_name}
@app.post("/api/cases/{case_number}/documents/{doc_id}/reprocess")
async def api_reprocess_document(case_number: str, doc_id: str):
"""Reprocess a failed document."""
case = await db.get_case_by_number(case_number)
if not case:
raise HTTPException(404, f"תיק {case_number} לא נמצא")
case_id = UUID(case["id"])
document_id = UUID(doc_id)
doc = await db.get_document(document_id)
if not doc or UUID(doc["case_id"]) != case_id:
raise HTTPException(404, "מסמך לא נמצא בתיק")
# Reset status and clean old chunks
await db.update_document(document_id, extraction_status="pending")
await db.delete_document_chunks(document_id)
# Process in background
asyncio.create_task(processor.process_document(document_id, case_id))
return {"status": "reprocessing"}
# ── Background Processing ─────────────────────────────────────────
@@ -1245,7 +1273,7 @@ async def _process_case_document(task_id: str, source: Path, req: ClassifyReques
# Copy to case directory
_progress[task_id] = {"status": "copying", "filename": req.filename}
case_dir = config.find_case_dir(req.case_number) / "documents"
case_dir = config.find_case_dir(req.case_number) / "documents" / "originals"
case_dir.mkdir(parents=True, exist_ok=True)
# Use original name without timestamp prefix
original_name = re.sub(r"^\d+_", "", source.name)