feat: external precedent library with auto halacha extraction
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m27s

Adds a third corpus of legal authority distinct from style_corpus
(Daphna's prior decisions for voice) and case_precedents (chair-attached
quotes per case). The new corpus holds chair-uploaded court rulings and
other appeals committee decisions, with binding rules (הלכות) extracted
automatically and queued for chair approval.

Pipeline (web/app.py + services/precedent_library.py):
file → extract → chunk → Voyage embed → halacha_extractor → store +
publish progress over the existing Redis SSE channel.

Schema V7 (services/db.py): extends case_law with source_kind +
extraction status fields under a CHECK constraint pinning practice_area
to the three appeals committee domains (rishuy_uvniya, betterment_levy,
compensation_197). New precedent_chunks (vector(1024)) and halachot
tables (vector(1024) over rule_statement, IVFFlat indexes, gin on
practice_areas/subject_tags). Halachot start as pending_review; only
approved/published rows are visible to search_precedent_library.

Agents: legal-writer, legal-researcher, legal-analyst, legal-ceo,
legal-qa get search_precedent_library. legal-writer prompt explains
the three-corpus distinction and CREAC use; legal-qa now verifies that
every cited halacha resolves to an approved row in the corpus.

UI: /precedents page with four tabs — library / semantic search /
pending review (J/K nav, A/R/E shortcuts, badge count) / stats.
Reuses the existing upload-sheet progress + SSE pattern.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-03 08:38:18 +00:00
parent a6edb75bbf
commit 7ee90dce31
23 changed files with 3853 additions and 67 deletions

View File

@@ -3514,3 +3514,314 @@ async def _process_training_document(task_id: str, source: Path, req: ClassifyRe
"chunks": chunk_count,
},
})
# ── External Precedent Library ────────────────────────────────────
# Chair-uploaded court rulings + appeals committee decisions, with
# automatic halacha extraction. Distinct from /api/training (style
# corpus) and /api/cases/{n}/precedents (chair-attached quotes).
from legal_mcp.services import precedent_library as plib_service # noqa: E402
_PRACTICE_AREAS = {"", "rishuy_uvniya", "betterment_levy", "compensation_197"}
_SOURCE_TYPES = {"", "court_ruling", "appeals_committee"}
def _make_progress_publisher(task_id: str, filename: str):
"""Build an async callback that pipes ingestion progress to Redis."""
async def publish(status: str, percent: int, message: str) -> None:
await _progress.set(task_id, {
"status": status if status in ("completed", "failed") else "processing",
"stage": status,
"filename": filename,
"step": message,
"percent": percent,
})
return publish
class PrecedentUpdateRequest(BaseModel):
case_name: str | None = None
court: str | None = None
decision_date: str | None = None
practice_area: str | None = None
appeal_subtype: str | None = None
subject_tags: list[str] | None = None
summary: str | None = None
headnote: str | None = None
key_quote: str | None = None
source_url: str | None = None
source_type: str | None = None
precedent_level: str | None = None
is_binding: bool | None = None
class HalachaUpdateRequest(BaseModel):
review_status: str | None = None
reviewer: str | None = "דפנה"
rule_statement: str | None = None
reasoning_summary: str | None = None
subject_tags: list[str] | None = None
practice_areas: list[str] | None = None
@app.post("/api/precedent-library/upload")
async def precedent_library_upload(
file: UploadFile = File(...),
citation: str = Form(...),
case_name: str = Form(""),
court: str = Form(""),
decision_date: str = Form(""),
source_type: str = Form(""),
precedent_level: str = Form(""),
practice_area: str = Form(""),
appeal_subtype: str = Form(""),
subject_tags: str = Form("[]"), # JSON array string
is_binding: bool = Form(True),
headnote: str = Form(""),
summary: str = Form(""),
):
"""Upload a court ruling / appeals committee decision to the
authoritative precedent library. Halachot are extracted in the
background and queued for chair approval.
"""
if practice_area not in _PRACTICE_AREAS:
raise HTTPException(400, "practice_area לא תקין")
if source_type not in _SOURCE_TYPES:
raise HTTPException(400, "source_type לא תקין")
if not citation.strip():
raise HTTPException(400, "citation חובה")
suffix = Path(file.filename or "").suffix.lower()
if suffix not in ALLOWED_EXTENSIONS:
raise HTTPException(400, f"סוג קובץ לא נתמך: {suffix}")
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
staged = UPLOAD_DIR / f"plib_{uuid4().hex[:8]}_{file.filename}"
size = 0
with staged.open("wb") as out:
while chunk := await file.read(1024 * 1024):
size += len(chunk)
if size > MAX_FILE_SIZE:
staged.unlink(missing_ok=True)
raise HTTPException(413, "קובץ גדול מדי")
out.write(chunk)
try:
tags = json.loads(subject_tags) if subject_tags else []
if not isinstance(tags, list):
tags = []
except json.JSONDecodeError:
tags = []
task_id = str(uuid4())
await _progress.set(task_id, {
"status": "queued", "filename": file.filename or "",
"stage": "queued", "percent": 0,
})
publish = _make_progress_publisher(task_id, file.filename or "")
async def _run():
try:
await plib_service.ingest_precedent(
file_path=staged,
citation=citation.strip(),
case_name=case_name.strip(),
court=court.strip(),
decision_date=decision_date or None,
source_type=source_type,
precedent_level=precedent_level,
practice_area=practice_area,
appeal_subtype=appeal_subtype.strip(),
subject_tags=tags,
is_binding=is_binding,
headnote=headnote.strip(),
summary=summary.strip(),
progress=publish,
)
except Exception as e:
logger.exception("precedent-library upload failed")
await _progress.set(task_id, {
"status": "failed", "error": str(e),
"filename": file.filename or "",
})
finally:
staged.unlink(missing_ok=True)
asyncio.create_task(_run())
return {"task_id": task_id}
@app.get("/api/precedent-library")
async def precedent_library_list(
practice_area: str = "",
court: str = "",
precedent_level: str = "",
source_type: str = "",
search: str = "",
limit: int = 100,
offset: int = 0,
):
rows = await db.list_external_case_law(
practice_area=practice_area, court=court,
precedent_level=precedent_level, source_type=source_type,
search=search, limit=limit, offset=offset,
)
return {"items": rows, "count": len(rows)}
@app.get("/api/precedent-library/stats")
async def precedent_library_stats():
return await db.precedent_library_stats()
@app.get("/api/precedent-library/search")
async def precedent_library_search(
q: str,
practice_area: str = "",
court: str = "",
precedent_level: str = "",
appeal_subtype: str = "",
subject_tag: str = "",
limit: int = 10,
include_halachot: bool = True,
):
if not q or len(q.strip()) < 2:
return {"items": [], "count": 0}
results = await plib_service.search_library(
query=q.strip(),
practice_area=practice_area,
court=court,
precedent_level=precedent_level,
appeal_subtype=appeal_subtype,
subject_tag=subject_tag,
limit=limit,
include_halachot=include_halachot,
)
return {"items": results, "count": len(results)}
@app.get("/api/precedent-library/{case_law_id}")
async def precedent_library_get(case_law_id: str):
try:
cid = UUID(case_law_id)
except ValueError:
raise HTTPException(400, "case_law_id לא תקין")
record = await plib_service.get_precedent(cid)
if not record:
raise HTTPException(404, "פסיקה לא נמצאה")
return record
@app.patch("/api/precedent-library/{case_law_id}")
async def precedent_library_update(case_law_id: str, req: PrecedentUpdateRequest):
try:
cid = UUID(case_law_id)
except ValueError:
raise HTTPException(400, "case_law_id לא תקין")
fields = {k: v for k, v in req.model_dump(exclude_unset=True).items() if v is not None}
if "practice_area" in fields and fields["practice_area"] not in _PRACTICE_AREAS:
raise HTTPException(400, "practice_area לא תקין")
if "decision_date" in fields and fields["decision_date"]:
try:
from datetime import date as date_type
fields["date"] = date_type.fromisoformat(fields.pop("decision_date")[:10])
except ValueError:
raise HTTPException(400, "decision_date לא תקין")
record = await db.update_case_law(cid, **fields)
if not record:
raise HTTPException(404, "פסיקה לא נמצאה")
return record
@app.delete("/api/precedent-library/{case_law_id}")
async def precedent_library_delete(case_law_id: str):
try:
cid = UUID(case_law_id)
except ValueError:
raise HTTPException(400, "case_law_id לא תקין")
ok = await plib_service.delete_precedent(cid)
if not ok:
raise HTTPException(404, "פסיקה לא נמצאה")
return {"deleted": True, "case_law_id": case_law_id}
@app.post("/api/precedent-library/{case_law_id}/extract-halachot")
async def precedent_library_reextract(case_law_id: str):
"""Re-run halacha extraction in background. Returns a task_id for SSE."""
try:
cid = UUID(case_law_id)
except ValueError:
raise HTTPException(400, "case_law_id לא תקין")
record = await db.get_case_law(cid)
if not record:
raise HTTPException(404, "פסיקה לא נמצאה")
task_id = str(uuid4())
label = record.get("case_number") or case_law_id
await _progress.set(task_id, {
"status": "queued", "filename": label, "stage": "queued", "percent": 0,
})
publish = _make_progress_publisher(task_id, label)
async def _run():
try:
await plib_service.reextract_halachot(cid, progress=publish)
except Exception as e:
logger.exception("re-extract halachot failed")
await _progress.set(task_id, {
"status": "failed", "error": str(e), "filename": label,
})
asyncio.create_task(_run())
return {"task_id": task_id}
@app.get("/api/halachot")
async def halachot_list(
case_law_id: str = "",
review_status: str = "",
practice_area: str = "",
limit: int = 200,
offset: int = 0,
):
cid: UUID | None = None
if case_law_id:
try:
cid = UUID(case_law_id)
except ValueError:
raise HTTPException(400, "case_law_id לא תקין")
rows = await db.list_halachot(
case_law_id=cid,
review_status=review_status or None,
practice_area=practice_area or None,
limit=limit, offset=offset,
)
return {"items": rows, "count": len(rows)}
@app.patch("/api/halachot/{halacha_id}")
async def halacha_update(halacha_id: str, req: HalachaUpdateRequest):
"""Approve / reject / edit a halacha. Used by the chair review queue."""
try:
hid = UUID(halacha_id)
except ValueError:
raise HTTPException(400, "halacha_id לא תקין")
if req.review_status and req.review_status not in {
"pending_review", "approved", "rejected", "published",
}:
raise HTTPException(400, "review_status לא תקין")
row = await db.update_halacha(
halacha_id=hid,
review_status=req.review_status,
reviewer=req.reviewer or "",
rule_statement=req.rule_statement,
reasoning_summary=req.reasoning_summary,
subject_tags=req.subject_tags,
practice_areas=req.practice_areas,
)
if not row:
raise HTTPException(404, "הלכה לא נמצאה")
return row