feat: external precedent library with auto halacha extraction
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m27s
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m27s
Adds a third corpus of legal authority distinct from style_corpus (Daphna's prior decisions for voice) and case_precedents (chair-attached quotes per case). The new corpus holds chair-uploaded court rulings and other appeals committee decisions, with binding rules (הלכות) extracted automatically and queued for chair approval. Pipeline (web/app.py + services/precedent_library.py): file → extract → chunk → Voyage embed → halacha_extractor → store + publish progress over the existing Redis SSE channel. Schema V7 (services/db.py): extends case_law with source_kind + extraction status fields under a CHECK constraint pinning practice_area to the three appeals committee domains (rishuy_uvniya, betterment_levy, compensation_197). New precedent_chunks (vector(1024)) and halachot tables (vector(1024) over rule_statement, IVFFlat indexes, gin on practice_areas/subject_tags). Halachot start as pending_review; only approved/published rows are visible to search_precedent_library. Agents: legal-writer, legal-researcher, legal-analyst, legal-ceo, legal-qa get search_precedent_library. legal-writer prompt explains the three-corpus distinction and CREAC use; legal-qa now verifies that every cited halacha resolves to an approved row in the corpus. UI: /precedents page with four tabs — library / semantic search / pending review (J/K nav, A/R/E shortcuts, badge count) / stats. Reuses the existing upload-sheet progress + SSE pattern. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
311
web/app.py
311
web/app.py
@@ -3514,3 +3514,314 @@ async def _process_training_document(task_id: str, source: Path, req: ClassifyRe
|
||||
"chunks": chunk_count,
|
||||
},
|
||||
})
|
||||
|
||||
|
||||
# ── External Precedent Library ────────────────────────────────────
|
||||
# Chair-uploaded court rulings + appeals committee decisions, with
|
||||
# automatic halacha extraction. Distinct from /api/training (style
|
||||
# corpus) and /api/cases/{n}/precedents (chair-attached quotes).
|
||||
|
||||
from legal_mcp.services import precedent_library as plib_service # noqa: E402
|
||||
|
||||
|
||||
_PRACTICE_AREAS = {"", "rishuy_uvniya", "betterment_levy", "compensation_197"}
|
||||
_SOURCE_TYPES = {"", "court_ruling", "appeals_committee"}
|
||||
|
||||
|
||||
def _make_progress_publisher(task_id: str, filename: str):
|
||||
"""Build an async callback that pipes ingestion progress to Redis."""
|
||||
async def publish(status: str, percent: int, message: str) -> None:
|
||||
await _progress.set(task_id, {
|
||||
"status": status if status in ("completed", "failed") else "processing",
|
||||
"stage": status,
|
||||
"filename": filename,
|
||||
"step": message,
|
||||
"percent": percent,
|
||||
})
|
||||
return publish
|
||||
|
||||
|
||||
class PrecedentUpdateRequest(BaseModel):
|
||||
case_name: str | None = None
|
||||
court: str | None = None
|
||||
decision_date: str | None = None
|
||||
practice_area: str | None = None
|
||||
appeal_subtype: str | None = None
|
||||
subject_tags: list[str] | None = None
|
||||
summary: str | None = None
|
||||
headnote: str | None = None
|
||||
key_quote: str | None = None
|
||||
source_url: str | None = None
|
||||
source_type: str | None = None
|
||||
precedent_level: str | None = None
|
||||
is_binding: bool | None = None
|
||||
|
||||
|
||||
class HalachaUpdateRequest(BaseModel):
|
||||
review_status: str | None = None
|
||||
reviewer: str | None = "דפנה"
|
||||
rule_statement: str | None = None
|
||||
reasoning_summary: str | None = None
|
||||
subject_tags: list[str] | None = None
|
||||
practice_areas: list[str] | None = None
|
||||
|
||||
|
||||
@app.post("/api/precedent-library/upload")
|
||||
async def precedent_library_upload(
|
||||
file: UploadFile = File(...),
|
||||
citation: str = Form(...),
|
||||
case_name: str = Form(""),
|
||||
court: str = Form(""),
|
||||
decision_date: str = Form(""),
|
||||
source_type: str = Form(""),
|
||||
precedent_level: str = Form(""),
|
||||
practice_area: str = Form(""),
|
||||
appeal_subtype: str = Form(""),
|
||||
subject_tags: str = Form("[]"), # JSON array string
|
||||
is_binding: bool = Form(True),
|
||||
headnote: str = Form(""),
|
||||
summary: str = Form(""),
|
||||
):
|
||||
"""Upload a court ruling / appeals committee decision to the
|
||||
authoritative precedent library. Halachot are extracted in the
|
||||
background and queued for chair approval.
|
||||
"""
|
||||
if practice_area not in _PRACTICE_AREAS:
|
||||
raise HTTPException(400, "practice_area לא תקין")
|
||||
if source_type not in _SOURCE_TYPES:
|
||||
raise HTTPException(400, "source_type לא תקין")
|
||||
if not citation.strip():
|
||||
raise HTTPException(400, "citation חובה")
|
||||
|
||||
suffix = Path(file.filename or "").suffix.lower()
|
||||
if suffix not in ALLOWED_EXTENSIONS:
|
||||
raise HTTPException(400, f"סוג קובץ לא נתמך: {suffix}")
|
||||
|
||||
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
|
||||
staged = UPLOAD_DIR / f"plib_{uuid4().hex[:8]}_{file.filename}"
|
||||
size = 0
|
||||
with staged.open("wb") as out:
|
||||
while chunk := await file.read(1024 * 1024):
|
||||
size += len(chunk)
|
||||
if size > MAX_FILE_SIZE:
|
||||
staged.unlink(missing_ok=True)
|
||||
raise HTTPException(413, "קובץ גדול מדי")
|
||||
out.write(chunk)
|
||||
|
||||
try:
|
||||
tags = json.loads(subject_tags) if subject_tags else []
|
||||
if not isinstance(tags, list):
|
||||
tags = []
|
||||
except json.JSONDecodeError:
|
||||
tags = []
|
||||
|
||||
task_id = str(uuid4())
|
||||
await _progress.set(task_id, {
|
||||
"status": "queued", "filename": file.filename or "",
|
||||
"stage": "queued", "percent": 0,
|
||||
})
|
||||
|
||||
publish = _make_progress_publisher(task_id, file.filename or "")
|
||||
|
||||
async def _run():
|
||||
try:
|
||||
await plib_service.ingest_precedent(
|
||||
file_path=staged,
|
||||
citation=citation.strip(),
|
||||
case_name=case_name.strip(),
|
||||
court=court.strip(),
|
||||
decision_date=decision_date or None,
|
||||
source_type=source_type,
|
||||
precedent_level=precedent_level,
|
||||
practice_area=practice_area,
|
||||
appeal_subtype=appeal_subtype.strip(),
|
||||
subject_tags=tags,
|
||||
is_binding=is_binding,
|
||||
headnote=headnote.strip(),
|
||||
summary=summary.strip(),
|
||||
progress=publish,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.exception("precedent-library upload failed")
|
||||
await _progress.set(task_id, {
|
||||
"status": "failed", "error": str(e),
|
||||
"filename": file.filename or "",
|
||||
})
|
||||
finally:
|
||||
staged.unlink(missing_ok=True)
|
||||
|
||||
asyncio.create_task(_run())
|
||||
return {"task_id": task_id}
|
||||
|
||||
|
||||
@app.get("/api/precedent-library")
|
||||
async def precedent_library_list(
|
||||
practice_area: str = "",
|
||||
court: str = "",
|
||||
precedent_level: str = "",
|
||||
source_type: str = "",
|
||||
search: str = "",
|
||||
limit: int = 100,
|
||||
offset: int = 0,
|
||||
):
|
||||
rows = await db.list_external_case_law(
|
||||
practice_area=practice_area, court=court,
|
||||
precedent_level=precedent_level, source_type=source_type,
|
||||
search=search, limit=limit, offset=offset,
|
||||
)
|
||||
return {"items": rows, "count": len(rows)}
|
||||
|
||||
|
||||
@app.get("/api/precedent-library/stats")
|
||||
async def precedent_library_stats():
|
||||
return await db.precedent_library_stats()
|
||||
|
||||
|
||||
@app.get("/api/precedent-library/search")
|
||||
async def precedent_library_search(
|
||||
q: str,
|
||||
practice_area: str = "",
|
||||
court: str = "",
|
||||
precedent_level: str = "",
|
||||
appeal_subtype: str = "",
|
||||
subject_tag: str = "",
|
||||
limit: int = 10,
|
||||
include_halachot: bool = True,
|
||||
):
|
||||
if not q or len(q.strip()) < 2:
|
||||
return {"items": [], "count": 0}
|
||||
results = await plib_service.search_library(
|
||||
query=q.strip(),
|
||||
practice_area=practice_area,
|
||||
court=court,
|
||||
precedent_level=precedent_level,
|
||||
appeal_subtype=appeal_subtype,
|
||||
subject_tag=subject_tag,
|
||||
limit=limit,
|
||||
include_halachot=include_halachot,
|
||||
)
|
||||
return {"items": results, "count": len(results)}
|
||||
|
||||
|
||||
@app.get("/api/precedent-library/{case_law_id}")
|
||||
async def precedent_library_get(case_law_id: str):
|
||||
try:
|
||||
cid = UUID(case_law_id)
|
||||
except ValueError:
|
||||
raise HTTPException(400, "case_law_id לא תקין")
|
||||
record = await plib_service.get_precedent(cid)
|
||||
if not record:
|
||||
raise HTTPException(404, "פסיקה לא נמצאה")
|
||||
return record
|
||||
|
||||
|
||||
@app.patch("/api/precedent-library/{case_law_id}")
|
||||
async def precedent_library_update(case_law_id: str, req: PrecedentUpdateRequest):
|
||||
try:
|
||||
cid = UUID(case_law_id)
|
||||
except ValueError:
|
||||
raise HTTPException(400, "case_law_id לא תקין")
|
||||
fields = {k: v for k, v in req.model_dump(exclude_unset=True).items() if v is not None}
|
||||
if "practice_area" in fields and fields["practice_area"] not in _PRACTICE_AREAS:
|
||||
raise HTTPException(400, "practice_area לא תקין")
|
||||
if "decision_date" in fields and fields["decision_date"]:
|
||||
try:
|
||||
from datetime import date as date_type
|
||||
fields["date"] = date_type.fromisoformat(fields.pop("decision_date")[:10])
|
||||
except ValueError:
|
||||
raise HTTPException(400, "decision_date לא תקין")
|
||||
record = await db.update_case_law(cid, **fields)
|
||||
if not record:
|
||||
raise HTTPException(404, "פסיקה לא נמצאה")
|
||||
return record
|
||||
|
||||
|
||||
@app.delete("/api/precedent-library/{case_law_id}")
|
||||
async def precedent_library_delete(case_law_id: str):
|
||||
try:
|
||||
cid = UUID(case_law_id)
|
||||
except ValueError:
|
||||
raise HTTPException(400, "case_law_id לא תקין")
|
||||
ok = await plib_service.delete_precedent(cid)
|
||||
if not ok:
|
||||
raise HTTPException(404, "פסיקה לא נמצאה")
|
||||
return {"deleted": True, "case_law_id": case_law_id}
|
||||
|
||||
|
||||
@app.post("/api/precedent-library/{case_law_id}/extract-halachot")
|
||||
async def precedent_library_reextract(case_law_id: str):
|
||||
"""Re-run halacha extraction in background. Returns a task_id for SSE."""
|
||||
try:
|
||||
cid = UUID(case_law_id)
|
||||
except ValueError:
|
||||
raise HTTPException(400, "case_law_id לא תקין")
|
||||
record = await db.get_case_law(cid)
|
||||
if not record:
|
||||
raise HTTPException(404, "פסיקה לא נמצאה")
|
||||
|
||||
task_id = str(uuid4())
|
||||
label = record.get("case_number") or case_law_id
|
||||
await _progress.set(task_id, {
|
||||
"status": "queued", "filename": label, "stage": "queued", "percent": 0,
|
||||
})
|
||||
publish = _make_progress_publisher(task_id, label)
|
||||
|
||||
async def _run():
|
||||
try:
|
||||
await plib_service.reextract_halachot(cid, progress=publish)
|
||||
except Exception as e:
|
||||
logger.exception("re-extract halachot failed")
|
||||
await _progress.set(task_id, {
|
||||
"status": "failed", "error": str(e), "filename": label,
|
||||
})
|
||||
|
||||
asyncio.create_task(_run())
|
||||
return {"task_id": task_id}
|
||||
|
||||
|
||||
@app.get("/api/halachot")
|
||||
async def halachot_list(
|
||||
case_law_id: str = "",
|
||||
review_status: str = "",
|
||||
practice_area: str = "",
|
||||
limit: int = 200,
|
||||
offset: int = 0,
|
||||
):
|
||||
cid: UUID | None = None
|
||||
if case_law_id:
|
||||
try:
|
||||
cid = UUID(case_law_id)
|
||||
except ValueError:
|
||||
raise HTTPException(400, "case_law_id לא תקין")
|
||||
rows = await db.list_halachot(
|
||||
case_law_id=cid,
|
||||
review_status=review_status or None,
|
||||
practice_area=practice_area or None,
|
||||
limit=limit, offset=offset,
|
||||
)
|
||||
return {"items": rows, "count": len(rows)}
|
||||
|
||||
|
||||
@app.patch("/api/halachot/{halacha_id}")
|
||||
async def halacha_update(halacha_id: str, req: HalachaUpdateRequest):
|
||||
"""Approve / reject / edit a halacha. Used by the chair review queue."""
|
||||
try:
|
||||
hid = UUID(halacha_id)
|
||||
except ValueError:
|
||||
raise HTTPException(400, "halacha_id לא תקין")
|
||||
if req.review_status and req.review_status not in {
|
||||
"pending_review", "approved", "rejected", "published",
|
||||
}:
|
||||
raise HTTPException(400, "review_status לא תקין")
|
||||
row = await db.update_halacha(
|
||||
halacha_id=hid,
|
||||
review_status=req.review_status,
|
||||
reviewer=req.reviewer or "",
|
||||
rule_statement=req.rule_statement,
|
||||
reasoning_summary=req.reasoning_summary,
|
||||
subject_tags=req.subject_tags,
|
||||
practice_areas=req.practice_areas,
|
||||
)
|
||||
if not row:
|
||||
raise HTTPException(404, "הלכה לא נמצאה")
|
||||
return row
|
||||
|
||||
Reference in New Issue
Block a user