Files
legal-ai/web/app.py
Chaim ac279220c4 feat(goldset): interactive gold-set tagging page (#81.7/#81.8)
Replaces the CSV-edit workflow with an in-app tagging page so the chair/Dafna
can label the extraction-quality gold-set by clicking, and see validator
precision/recall live.

Schema (V29): halacha_goldset — a stratified, human-tagged evaluation batch
(is_holding / correct_type / quote_complete, NULL until tagged).

db.py:
- goldset_create_sample (stratified round-robin over case×rule_type, idempotent),
- goldset_list (items + halacha content + the machine's own labels),
- goldset_tag (partial — one field at a time for keyboard tagging),
- goldset_score (ports the script's P/R/F1: each validator scored as a
  not-a-holding detector against the human tags — the #81.8 input).

API: GET /api/goldset, POST /api/goldset/sample, GET /api/goldset/score,
PATCH /api/goldset/{id}.

web-ui:
- lib/api/goldset.ts (hooks),
- components/goldset/goldset-panel.tsx — card-per-item, keyboard-first
  (J/K nav, H/N holding, C/X quote), progress bar, hide-tagged toggle, and a
  collapsible live score table,
- app/goldset/page.tsx + nav link "מדגם-זהב" under ידע ולמידה.

Methodology guard kept explicit in UI + docstrings: tags are HUMAN ground truth,
no AI pre-fill (circular bias). Populated a 150-item stratified batch.

Verified: backend create/list/tag/score against the live DB; tsc --noEmit 0;
py_compile ok. (Local Turbopack build blocked by worktree symlink — CI builds clean.)

Invariants: G1 (eval set modeled at source in its own table); G2 (reuses the same
halacha_quality validators the extractor runs — no parallel scoring logic).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-06 21:52:05 +00:00

6627 lines
247 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Ezer Mishpati — Web upload interface for legal documents."""
from __future__ import annotations
import asyncio
import json
import logging
import os
import re
import shutil
import subprocess
import sys
import time
from contextlib import asynccontextmanager
from datetime import date as date_type, datetime, timezone
from pathlib import Path
from uuid import UUID, uuid4
# Allow importing legal_mcp from the MCP server source
sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "mcp-server" / "src"))
import zipfile
from fastapi import BackgroundTasks, FastAPI, File, Form, HTTPException, UploadFile
from fastapi.responses import FileResponse, StreamingResponse
from typing import Any, Literal
from pydantic import BaseModel
import asyncpg
import httpx
from legal_mcp import config
from legal_mcp.services import chunker, db, embeddings, extractor, git_sync, metrics as metrics_service, processor, proofreader, research_md
from legal_mcp.tools import cases as cases_tools, search as search_tools, workflow as workflow_tools, drafting as drafting_tools, precedents as precedents_tools
from legal_mcp.tools.envelope import envelope_unwrap
# Import integration clients (same directory)
_web_dir = Path(__file__).resolve().parent
sys.path.insert(0, str(_web_dir.parent))
from web.gitea_client import commit_and_push, create_repo, setup_remote_and_push
from web.mcp_env_catalog import (
ENV_CATALOG,
EnvSpec,
coerce,
mask_secret,
normalize_for_compare,
)
from web.progress_store import ProgressStore
from web.paperclip_api import emit_case_status_webhook, pc_request, require_paperclip_db_url
from web.paperclip_client import (
COMPANIES as PAPERCLIP_COMPANIES,
accept_interaction as pc_accept_interaction,
archive_project as pc_archive_project,
create_project as pc_create_project,
create_workflow_issue as pc_create_workflow_issue,
get_agents_for_case as pc_get_agents_for_case,
get_agents_for_company as pc_get_agents,
get_case_issues as pc_get_case_issues,
get_issue_comments as pc_get_issue_comments,
get_issue_interactions as pc_get_issue_interactions,
get_project_url,
post_comment as pc_post_comment,
reject_interaction as pc_reject_interaction,
respond_to_interaction as pc_respond_to_interaction,
restore_project as pc_restore_project,
wake_analyst_for_appraiser_facts as pc_wake_analyst_for_appraiser_facts,
wake_ceo_agent as pc_wake_ceo,
wake_ceo_for_feedback_fold as pc_wake_ceo_for_feedback_fold,
wake_curator_for_final as pc_wake_curator_for_final,
wake_for_precedent_extraction as pc_wake_for_precedent_extraction,
)
logger = logging.getLogger(__name__)
UPLOAD_DIR = config.DATA_DIR / "uploads"
ALLOWED_EXTENSIONS = {".pdf", ".docx", ".doc", ".rtf", ".txt", ".md"}
MAX_FILE_SIZE = 50 * 1024 * 1024 # 50MB
# Progress tracking — backed by Redis with TTL.
# Each entry is a JSON-serialized dict keyed by task_id and auto-expires
# after PROGRESS_TTL_SECONDS so terminal states remain observable to late
# SSE subscribers (a 404 on reconnect was the root cause of stuck UI rows).
PROGRESS_TTL_SECONDS = 300
_progress = ProgressStore(config.REDIS_URL, ttl_seconds=PROGRESS_TTL_SECONDS)
@asynccontextmanager
async def lifespan(app: FastAPI):
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
await db.init_schema()
sync_task = asyncio.create_task(git_sync.sweep_loop())
try:
yield
finally:
sync_task.cancel()
try:
await sync_task
except asyncio.CancelledError:
pass
await db.close_pool()
await _progress.close()
app = FastAPI(title="העלאת מסמכים משפטיים", lifespan=lifespan)
# ── API Endpoints ──────────────────────────────────────────────────
@app.get("/")
async def index():
return {"status": "ok", "frontend": "https://legal-ai-next.nautilus.marcusgroup.org"}
@app.post("/api/upload")
async def upload_file(file: UploadFile = File(...)):
"""Upload a file to the temporary uploads directory."""
if not file.filename:
raise HTTPException(400, "No filename provided")
# Validate extension
ext = Path(file.filename).suffix.lower()
if ext not in ALLOWED_EXTENSIONS:
raise HTTPException(400, f"Unsupported file type: {ext}. Allowed: {', '.join(ALLOWED_EXTENSIONS)}")
# Sanitize filename
safe_name = re.sub(r"[^\w\u0590-\u05FF\s.\-()]", "", Path(file.filename).stem)
if not safe_name:
safe_name = "document"
timestamp = int(time.time())
filename = f"{timestamp}_{safe_name}{ext}"
# Read and validate size
content = await file.read()
if len(content) > MAX_FILE_SIZE:
raise HTTPException(400, f"File too large. Max: {MAX_FILE_SIZE // (1024*1024)}MB")
dest = UPLOAD_DIR / filename
dest.write_bytes(content)
return {
"filename": filename,
"original_name": file.filename,
"size": len(content),
}
@app.get("/api/uploads")
async def list_uploads():
"""List files in the uploads (pending) directory."""
if not UPLOAD_DIR.exists():
return []
files = []
for f in sorted(UPLOAD_DIR.iterdir(), key=lambda p: p.stat().st_mtime, reverse=True):
if f.is_file() and f.suffix.lower() in ALLOWED_EXTENSIONS:
stat = f.stat()
files.append({
"filename": f.name,
"size": stat.st_size,
"uploaded_at": stat.st_mtime,
})
return files
@app.delete("/api/uploads/{filename}")
async def delete_upload(filename: str):
"""Remove a file from the uploads directory."""
path = UPLOAD_DIR / filename
if not path.exists() or not path.parent.samefile(UPLOAD_DIR):
raise HTTPException(404, "File not found")
path.unlink()
return {"deleted": filename}
class ClassifyRequest(BaseModel):
filename: str
category: str # "training" or "case"
# For case documents
case_number: str = ""
doc_type: str = "appeal"
title: str = ""
# For training documents
decision_number: str = ""
decision_date: str = ""
subject_categories: list[str] = []
@app.post("/api/classify")
async def classify_file(req: ClassifyRequest):
"""Classify a pending file and start processing."""
source = UPLOAD_DIR / req.filename
if not source.exists() or not source.parent.samefile(UPLOAD_DIR):
raise HTTPException(404, "File not found in uploads")
if req.category not in ("training", "case"):
raise HTTPException(400, "Category must be 'training' or 'case'")
if req.category == "case" and not req.case_number:
raise HTTPException(400, "case_number required for case documents")
task_id = str(uuid4())
await _progress.set(task_id, {"status": "queued", "filename": req.filename})
asyncio.create_task(_process_file(task_id, source, req))
return {"task_id": task_id}
# ── Training Corpus: Analyze & Upload ─────────────────────────────
@app.post("/api/training/analyze")
async def training_analyze(filename: str = Form(...)):
"""Proofread an uploaded file and extract metadata for review.
Input: filename in UPLOAD_DIR (from /api/upload).
Output: clean text preview + extracted metadata (number, date, categories).
"""
source = UPLOAD_DIR / filename
if not source.exists() or not source.parent.samefile(UPLOAD_DIR):
raise HTTPException(404, "File not found in uploads")
try:
result = await proofreader.analyze_file(source)
except Exception as e:
logger.exception("Proofread failed for %s", filename)
raise HTTPException(500, f"Proofreading failed: {e}")
return result
class TrainingUploadRequest(BaseModel):
filename: str # name in UPLOAD_DIR
decision_number: str = ""
decision_date: str = "" # YYYY-MM-DD
subject_categories: list[str] = []
title: str = ""
@app.post("/api/training/upload")
async def training_upload(req: TrainingUploadRequest):
"""Upload a proofread file to the style corpus.
Runs proofreading again to guarantee clean text (not raw file content),
then inserts into style_corpus + chunks + embeddings.
"""
source = UPLOAD_DIR / req.filename
if not source.exists() or not source.parent.samefile(UPLOAD_DIR):
raise HTTPException(404, "File not found in uploads")
# Check for duplicate by decision_number
if req.decision_number:
pool = await db.get_pool()
async with pool.acquire() as conn:
exists = await conn.fetchval(
"SELECT 1 FROM style_corpus WHERE decision_number = $1 LIMIT 1",
req.decision_number,
)
if exists:
raise HTTPException(
409,
f"החלטה {req.decision_number} כבר קיימת בקורפוס",
)
task_id = str(uuid4())
await _progress.set(task_id, {"status": "queued", "filename": req.filename})
asyncio.create_task(_process_proofread_training(task_id, source, req))
return {"task_id": task_id}
async def _process_proofread_training(
task_id: str, source: Path, req: TrainingUploadRequest
):
"""Background task: proofread → store in corpus → chunk → embed."""
from datetime import date as date_type
try:
title = req.title or source.stem.split("_", 1)[-1]
# 1. Proofread (strip Nevo additions)
await _progress.set(task_id, {"status": "processing", "filename": req.filename, "step": "proofreading"})
clean_text, stats = await proofreader.proofread(source)
# 2. Save proofread .md to training dir (alongside original)
await _progress.set(task_id, {"status": "processing", "filename": req.filename, "step": "saving"})
training_dir = config.TRAINING_DIR
proofread_dir = training_dir / "proofread"
training_dir.mkdir(parents=True, exist_ok=True)
proofread_dir.mkdir(exist_ok=True)
# Copy original to training dir
original_name = re.sub(r"^\d+_", "", source.name)
orig_dest = training_dir / original_name
shutil.copy2(str(source), str(orig_dest))
# Save cleaned version
proofread_name = Path(original_name).stem + ".md"
proofread_dest = proofread_dir / proofread_name
proofread_dest.write_text(clean_text, encoding="utf-8")
# 3. Parse date
d_date = None
if req.decision_date:
d_date = date_type.fromisoformat(req.decision_date)
# 4. Add to style corpus
await _progress.set(task_id, {"status": "processing", "filename": req.filename, "step": "corpus"})
corpus_id = await db.add_to_style_corpus(
document_id=None,
decision_number=req.decision_number,
decision_date=d_date,
subject_categories=req.subject_categories,
full_text=clean_text,
)
# 5. Chunk + embed
await _progress.set(task_id, {"status": "processing", "filename": req.filename, "step": "chunking"})
chunks = chunker.chunk_document(clean_text)
chunk_count = 0
if chunks:
doc = await db.create_document(
case_id=None,
doc_type="decision",
title=f"[קורפוס] {title}",
file_path=str(orig_dest),
page_count=stats.get("pages", 0),
)
doc_id = UUID(doc["id"])
await db.update_document(
doc_id, extracted_text=clean_text, extraction_status="completed"
)
await _progress.set(task_id, {
"status": "processing", "filename": req.filename, "step": "embedding",
})
texts = [c.content for c in chunks]
embs = await embeddings.embed_texts(texts, input_type="document")
chunk_dicts = [
{
"content": c.content,
"section_type": c.section_type,
"embedding": emb,
"page_number": c.page_number,
"chunk_index": c.chunk_index,
}
for c, emb in zip(chunks, embs)
]
await db.store_chunks(doc_id, None, chunk_dicts)
chunk_count = len(chunks)
# 6. Cleanup upload
source.unlink(missing_ok=True)
await _progress.set(task_id, {
"status": "completed",
"filename": req.filename,
"result": {
"corpus_id": str(corpus_id),
"title": title,
"chars": len(clean_text),
"chunks": chunk_count,
"proofread_stats": stats,
},
})
except Exception as e:
logger.exception("Training upload failed for %s", req.filename)
await _progress.set(task_id, {"status": "failed", "error": str(e), "filename": req.filename})
@app.get("/api/training/patterns")
async def training_patterns():
"""List all extracted style patterns, grouped by type."""
pool = await db.get_pool()
async with pool.acquire() as conn:
rows = await conn.fetch(
"SELECT pattern_type, pattern_text, frequency, context, examples "
"FROM style_patterns "
"ORDER BY pattern_type, frequency DESC"
)
grouped: dict[str, list] = {}
for r in rows:
pt = r["pattern_type"]
examples = r["examples"]
if isinstance(examples, str):
try:
examples = json.loads(examples)
except Exception:
examples = []
grouped.setdefault(pt, []).append({
"pattern_text": r["pattern_text"],
"frequency": r["frequency"],
"context": r["context"] or "",
"examples": examples or [],
})
return {"total": len(rows), "by_type": grouped}
_style_analysis_state = {"running": False, "started_at": None, "result": None, "error": None}
@app.post("/api/training/analyze-style")
async def training_analyze_style():
"""Kick off style analysis over the corpus. Returns immediately."""
if _style_analysis_state["running"]:
raise HTTPException(409, "ניתוח סגנון כבר רץ")
_style_analysis_state.update(
{"running": True, "started_at": time.time(), "result": None, "error": None}
)
async def _run():
from legal_mcp.services.style_analyzer import analyze_corpus
try:
result = await analyze_corpus()
_style_analysis_state["result"] = result
except Exception as e:
logger.exception("Style analysis failed")
_style_analysis_state["error"] = str(e)
finally:
_style_analysis_state["running"] = False
asyncio.create_task(_run())
return {"status": "started"}
@app.get("/api/training/analyze-style/status")
async def training_analyze_style_status():
"""Poll status of the running style analysis."""
state = dict(_style_analysis_state)
if state["started_at"]:
state["elapsed"] = int(time.time() - state["started_at"])
return state
# ── Style Report — visual dashboard data ─────────────────────────
_SECTION_TYPE_HEBREW = {
"intro": "פתיחה",
"facts": "רקע",
"appellant_claims": "טענות העורר",
"respondent_claims": "טענות המשיב",
"legal_analysis": "דיון משפטי",
"ruling": "הכרעה",
"conclusion": "סוף דבר",
}
_SECTION_DISPLAY_ORDER = [
"intro", "facts", "appellant_claims", "respondent_claims",
"legal_analysis", "ruling", "conclusion",
]
def _strip_nikud(text: str) -> str:
import unicodedata
return "".join(
c for c in unicodedata.normalize("NFD", text)
if not unicodedata.combining(c)
)
def _extract_pattern_variants(pattern_text: str) -> list[str]:
"""Mirror of scripts/backfill_pattern_frequency.py logic for matching."""
alternatives = re.split(r"\s*/\s*|\s+או\s+", pattern_text)
variants: list[str] = []
for alt in alternatives:
alt = alt.strip()
if not alt:
continue
alt = re.sub(r"\[[^\]]*\]", "|", alt)
alt = re.sub(r"\.{2,}", "|", alt)
alt = alt.replace("", "|")
segments = [s.strip(" ,.:;\"'") for s in alt.split("|")]
good = [s for s in segments if len(s) >= 4]
if good:
variants.append(max(good, key=len))
return list(dict.fromkeys(variants))
async def _compute_corpus_stats(conn) -> dict:
"""Hero section: decision count, chars, subject distribution, timeline."""
stats = await conn.fetchrow(
"SELECT count(*) as n, "
" sum(length(full_text)) as total_chars, "
" avg(length(full_text))::int as avg_chars, "
" min(decision_date) as min_date, "
" max(decision_date) as max_date "
"FROM style_corpus"
)
decisions = await conn.fetch(
"SELECT decision_number, decision_date, length(full_text) as chars, "
" subject_categories "
"FROM style_corpus ORDER BY decision_date NULLS LAST"
)
# Subject distribution
from collections import Counter
subject_counter: Counter = Counter()
for d in decisions:
cats = d["subject_categories"]
if isinstance(cats, str):
try:
cats = json.loads(cats)
except Exception:
cats = []
for c in (cats or []):
subject_counter[c] += 1
# Cap at top 6 subjects, collapse rest to "אחר"
top = subject_counter.most_common(6)
other_count = sum(subject_counter.values()) - sum(c for _, c in top)
subject_distribution = [{"label": label, "count": count} for label, count in top]
if other_count > 0:
subject_distribution.append({"label": "אחר", "count": other_count})
n = stats["n"]
top_subject = top[0] if top else None
headline = (
f"קראתי {n} מההחלטות שלך. ממוצע {stats['avg_chars']:,} תווים לכל החלטה"
+ (f", הנושא הנפוץ אצלך: {top_subject[0]} ({top_subject[1]} החלטות)" if top_subject else "")
)
return {
"decision_count": n,
"total_chars": stats["total_chars"],
"avg_chars": stats["avg_chars"],
"date_range": [
str(stats["min_date"]) if stats["min_date"] else None,
str(stats["max_date"]) if stats["max_date"] else None,
],
"decisions": [
{
"number": d["decision_number"] or "",
"date": str(d["decision_date"]) if d["decision_date"] else "",
"chars": d["chars"],
"subjects": (
json.loads(d["subject_categories"])
if isinstance(d["subject_categories"], str)
else (d["subject_categories"] or [])
),
}
for d in decisions
],
"subject_distribution": subject_distribution,
"headline": headline,
}
async def _compute_anatomy(conn) -> dict:
"""Section 2: average section lengths across the training corpus."""
rows = await conn.fetch(
"""
SELECT dc.section_type,
sum(length(dc.content))::int as total_chars,
count(distinct dc.document_id) as docs
FROM document_chunks dc
JOIN documents d ON dc.document_id = d.id
WHERE d.title LIKE '[קורפוס]%'
AND dc.section_type IS NOT NULL
GROUP BY dc.section_type
"""
)
if not rows:
return {
"sections": [],
"total_coverage": 0,
"headline": "אין עדיין נתונים על מבנה ההחלטות",
}
# Map to average per decision (total_chars / docs that have this section)
sections_raw = {r["section_type"]: r for r in rows}
# Compute avg chars per section across decisions that contain it
items = []
total_all_chars = sum(r["total_chars"] for r in rows)
for st_key in _SECTION_DISPLAY_ORDER:
if st_key not in sections_raw:
continue
r = sections_raw[st_key]
avg = round(r["total_chars"] / r["docs"]) if r["docs"] else 0
pct = r["total_chars"] / total_all_chars if total_all_chars else 0
items.append({
"type": st_key,
"label": _SECTION_TYPE_HEBREW.get(st_key, st_key),
"avg_chars": avg,
"pct": round(pct, 4),
"coverage": r["docs"],
})
# Max coverage (decisions that had any chunks)
total_coverage = await conn.fetchval(
"SELECT count(distinct dc.document_id) "
"FROM document_chunks dc JOIN documents d ON dc.document_id=d.id "
"WHERE d.title LIKE '[קורפוס]%'"
)
# Headline: biggest section
biggest = max(items, key=lambda x: x["pct"]) if items else None
if biggest:
pct_int = round(biggest["pct"] * 100)
headline = f"{biggest['label']} הוא {pct_int}% מכל החלטה אצלך — זה המוקד שלך"
else:
headline = ""
return {
"sections": items,
"total_coverage": total_coverage,
"headline": headline,
}
async def _compute_signature_phrases(conn) -> dict:
"""Section 3: all patterns with real frequencies, plus headline about top."""
rows = await conn.fetch(
"SELECT pattern_type, pattern_text, context, frequency, examples "
"FROM style_patterns "
"WHERE frequency > 0 "
"ORDER BY frequency DESC"
)
items = []
for r in rows:
examples = r["examples"]
if isinstance(examples, str):
try:
examples = json.loads(examples)
except Exception:
examples = []
items.append({
"type": r["pattern_type"],
"text": r["pattern_text"],
"context": r["context"] or "",
"frequency": r["frequency"],
"examples": examples or [],
})
# Total decision count for denominator
total_decisions = await conn.fetchval("SELECT count(*) FROM style_corpus")
if items:
# Pick the first item that's a relatively clean phrase, not a template
# (templates with many placeholders make bad display text)
top = None
for item in items[:5]:
text = item["text"]
placeholder_count = len(re.findall(r"\[[^\]]*\]", text))
if placeholder_count <= 1:
top = item
break
if top is None:
top = items[0]
# Clean up for display
display = re.sub(r"\[[^\]]*\]", "", top["text"])
display = re.sub(r"\s+", " ", display).strip(" .,:;\"'")
display = display.split(" / ")[0].split(" או ")[0].strip(" .,:;\"'")
if len(display) > 60:
display = display[:57] + "..."
headline = f'הפטרן האהוב עלייך: "{display}" — מופיע ב-{top["frequency"]} מתוך {total_decisions} החלטות'
else:
headline = "טרם חולצו דפוסים — הרץ ניתוח קורפוס"
return {"items": items, "total_decisions": total_decisions, "headline": headline}
async def _compute_contribution(conn) -> dict:
"""Section 4: per-decision contribution + growth curve."""
decisions = await conn.fetch(
"SELECT id, decision_number, decision_date, full_text, "
" length(full_text) as chars, subject_categories "
"FROM style_corpus ORDER BY decision_date NULLS LAST, created_at"
)
patterns = await conn.fetch(
"SELECT id, pattern_type, pattern_text, context "
"FROM style_patterns WHERE frequency > 0"
)
if not decisions or not patterns:
return {
"growth_curve": [],
"decision_contributions": [],
"headline": "אין עדיין מספיק נתונים",
}
# Normalize texts once
normalized_decisions = [
(d["id"], d["decision_number"], _strip_nikud(d["full_text"]))
for d in decisions
]
# For each pattern, find first decision (chronologically) that contains it
# and the full set of decisions that contain it
pattern_info: dict = {} # pattern_id → {"first": decision_id, "all": set}
for p in patterns:
variants = _extract_pattern_variants(_strip_nikud(p["pattern_text"]))
if not variants:
continue
first_seen = None
all_matches = set()
for dec_id, _, text in normalized_decisions:
if any(v in text for v in variants):
if first_seen is None:
first_seen = dec_id
all_matches.add(dec_id)
if first_seen is not None:
pattern_info[p["id"]] = {
"first": first_seen,
"all": all_matches,
"type": p["pattern_type"],
"text": p["pattern_text"],
"context": p["context"] or "",
}
# Per-decision: which patterns are new vs confirmed
decision_contributions = []
cumulative_patterns: set = set()
growth_curve = []
for d in decisions:
dec_id = d["id"]
new_patterns = []
confirmed_patterns = []
for pid, info in pattern_info.items():
if info["first"] == dec_id:
new_patterns.append(info)
elif dec_id in info["all"]:
confirmed_patterns.append(info)
# First 3 new patterns as "highlight"
highlight = new_patterns[0] if new_patterns else None
decision_contributions.append({
"decision_number": d["decision_number"] or "",
"decision_date": str(d["decision_date"]) if d["decision_date"] else "",
"chars": d["chars"],
"subjects": (
json.loads(d["subject_categories"])
if isinstance(d["subject_categories"], str)
else (d["subject_categories"] or [])
),
"new_count": len(new_patterns),
"confirmed_count": len(confirmed_patterns),
"new_patterns": [
{"type": p["type"], "text": p["text"], "context": p["context"]}
for p in new_patterns[:10] # cap to keep payload small
],
"highlight": (
{"type": highlight["type"], "text": highlight["text"]}
if highlight else None
),
})
cumulative_patterns.update(pid for pid, info in pattern_info.items() if info["first"] == dec_id)
growth_curve.append({
"decision_number": d["decision_number"] or "",
"date": str(d["decision_date"]) if d["decision_date"] else "",
"cumulative": len(cumulative_patterns),
})
# Headline: when did we hit ~85%?
total_patterns = len(pattern_info)
threshold = int(total_patterns * 0.85)
n_decisions_to_85pct = None
for i, point in enumerate(growth_curve, 1):
if point["cumulative"] >= threshold:
n_decisions_to_85pct = i
break
if n_decisions_to_85pct:
headline = (
f"אחרי {n_decisions_to_85pct} החלטות כבר למדתי 85% "
f"מהסגנון שלך — השאר מיקד וחידד את הידע"
)
else:
headline = f"למדתי {total_patterns} דפוסים מ-{len(decisions)} החלטות"
return {
"growth_curve": growth_curve,
"decision_contributions": decision_contributions,
"total_patterns": total_patterns,
"headline": headline,
}
@app.get("/api/training/style-report")
async def training_style_report():
"""Visual dashboard data for Dafna's Style Portrait page."""
pool = await db.get_pool()
async with pool.acquire() as conn:
corpus = await _compute_corpus_stats(conn)
anatomy = await _compute_anatomy(conn)
phrases = await _compute_signature_phrases(conn)
contribution = await _compute_contribution(conn)
return {
"corpus": corpus,
"anatomy": anatomy,
"signature_phrases": phrases,
"contribution": contribution,
}
@app.get("/api/training/compare")
async def training_compare(a: str, b: str):
"""Compare two decisions from style_corpus by ID.
Returns side-by-side data: basic metadata, length, section breakdown,
which patterns appear in each, shared/unique patterns.
"""
try:
ida, idb = UUID(a), UUID(b)
except ValueError:
raise HTTPException(400, "invalid id(s)")
pool = await db.get_pool()
async with pool.acquire() as conn:
rows = await conn.fetch(
"SELECT id, decision_number, decision_date, subject_categories, "
" full_text, length(full_text) as chars "
"FROM style_corpus WHERE id = ANY($1::uuid[])",
[ida, idb],
)
if len(rows) != 2:
raise HTTPException(404, "אחת ההחלטות לא נמצאה")
by_id = {r["id"]: r for r in rows}
row_a = by_id[ida]
row_b = by_id[idb]
patterns = await conn.fetch(
"SELECT id, pattern_type, pattern_text, context "
"FROM style_patterns WHERE frequency > 0"
)
# Section breakdown via document_chunks.
# decision_number format is "NNNN/YY" but document titles are like
# "[קורפוס] ARAR-YY-NNNN - ..." so we match on the number segment only.
async def section_stats(corpus_row):
nm = corpus_row["decision_number"]
if not nm:
return []
# Extract the first numeric segment (e.g., "1188" from "1188/23")
num_match = re.match(r"(\d{3,4})", nm)
num = num_match.group(1) if num_match else nm
rows2 = await conn.fetch(
"SELECT dc.section_type, sum(length(dc.content))::int as chars "
"FROM document_chunks dc JOIN documents d ON dc.document_id=d.id "
"WHERE d.title LIKE '[קורפוס]%' "
" AND (d.title LIKE $1 OR d.title LIKE $2) "
" AND dc.section_type IS NOT NULL "
"GROUP BY dc.section_type ORDER BY chars DESC",
f"%{num}%",
f"%{nm}%",
)
return [{"type": r["section_type"], "chars": r["chars"]} for r in rows2]
sections_a = await section_stats(row_a)
sections_b = await section_stats(row_b)
# Pattern matching via variant extraction
def _strip_nikud_local(t: str) -> str:
import unicodedata
return "".join(c for c in unicodedata.normalize("NFD", t) if not unicodedata.combining(c))
def _variants(pt: str) -> list[str]:
alts = re.split(r"\s*/\s*|\s+או\s+", pt)
out = []
for a in alts:
a = re.sub(r"\[[^\]]*\]", "|", a)
a = re.sub(r"\.{2,}", "|", a).replace("", "|")
segs = [s.strip(" ,.:;\"'") for s in a.split("|")]
good = [s for s in segs if len(s) >= 4]
if good:
out.append(max(good, key=len))
return list(dict.fromkeys(out))
text_a = _strip_nikud_local(row_a["full_text"])
text_b = _strip_nikud_local(row_b["full_text"])
in_a, in_b = [], []
for p in patterns:
vs = _variants(_strip_nikud_local(p["pattern_text"]))
if not vs:
continue
in_a_flag = any(v in text_a for v in vs)
in_b_flag = any(v in text_b for v in vs)
entry = {
"id": str(p["id"]),
"type": p["pattern_type"],
"text": p["pattern_text"],
"context": p["context"] or "",
}
if in_a_flag:
in_a.append(entry)
if in_b_flag:
in_b.append(entry)
set_a = {p["id"] for p in in_a}
set_b = {p["id"] for p in in_b}
shared_ids = set_a & set_b
only_a_ids = set_a - set_b
only_b_ids = set_b - set_a
def serialize(row, sections, patterns_list):
cats = row["subject_categories"]
if isinstance(cats, str):
try:
cats = json.loads(cats)
except Exception:
cats = []
return {
"id": str(row["id"]),
"decision_number": row["decision_number"] or "",
"decision_date": str(row["decision_date"]) if row["decision_date"] else "",
"chars": row["chars"],
"subjects": cats or [],
"sections": sections,
"patterns_count": len(patterns_list),
}
return {
"a": serialize(row_a, sections_a, in_a),
"b": serialize(row_b, sections_b, in_b),
"shared": [p for p in in_a if p["id"] in shared_ids],
"only_a": [p for p in in_a if p["id"] in only_a_ids],
"only_b": [p for p in in_b if p["id"] in only_b_ids],
}
@app.delete("/api/training/corpus/{corpus_id}")
async def training_corpus_delete(corpus_id: str):
"""Remove a decision from the style corpus."""
try:
cid = UUID(corpus_id)
except ValueError:
raise HTTPException(400, "invalid corpus_id")
result = await db.delete_from_style_corpus(cid)
if not result.get("deleted"):
raise HTTPException(404, result.get("reason", "not found"))
return result
def _format_legal_citation(decision_number: str, decision_date: str) -> str:
"""Compose the Israeli ועדת ערר citation string from corpus metadata.
Mirrors how decisions are referenced in Daphna's own writing — e.g.
"ערר 1130-25 ועדת ערר ירושלים (26.4.2026)". Empty parts are dropped
gracefully so partially populated rows still produce a readable label.
"""
if not decision_number:
return ""
parts = [f"ערר {decision_number}", "ועדת ערר ירושלים"]
if decision_date:
try:
d = date_type.fromisoformat(decision_date)
parts.append(f"({d.day}.{d.month}.{d.year})")
except ValueError:
pass
return " ".join(parts)
_PARTIES_PATTERNS = (
# "העורר: X" or "העוררים: X". Captures up to a newline / end of stanza.
re.compile(r"העורר(?:ים|ת)?[:\s]+([^\n]{3,120})"),
re.compile(r"המבקש(?:ים|ת)?[:\s]+([^\n]{3,120})"),
re.compile(r"בעניין[:\s]+([^\n]{3,120})"),
)
_RESPONDENT_PATTERNS = (
re.compile(r"המשיב(?:ים|ה|ות)?[:\s]+([^\n]{3,120})"),
re.compile(r"נגד\s*\n+\s*([^\n]{3,120})"),
)
def _extract_parties(text: str) -> dict[str, str]:
"""Best-effort regex extraction of עורר/משיב from the first 5K of full_text.
We only scan the head of the document because the parties are always
declared at the top in Israeli legal decisions. The result is a hint
for display — never authoritative — so a miss returns an empty string
rather than raising.
"""
head = (text or "")[:5000]
appellant = respondent = ""
for pat in _PARTIES_PATTERNS:
m = pat.search(head)
if m:
appellant = m.group(1).strip(" .,-—")
break
for pat in _RESPONDENT_PATTERNS:
m = pat.search(head)
if m:
respondent = m.group(1).strip(" .,-—")
break
return {"appellant": appellant, "respondent": respondent}
@app.get("/api/training/corpus")
async def training_corpus_list():
"""List all decisions currently in the style corpus, with enriched metadata.
Joins to ``documents`` via FK when available, falling back to the
title-token match used in the chunking pipeline so legacy rows with
``style_corpus.document_id IS NULL`` still resolve to their page_count
and chunk counts.
"""
pool = await db.get_pool()
async with pool.acquire() as conn:
rows = await conn.fetch(
"""
SELECT sc.id,
sc.decision_number,
sc.decision_date,
sc.subject_categories,
length(sc.full_text) AS chars,
substring(sc.full_text from 1 for 5000) AS head_text,
sc.summary,
sc.outcome,
sc.key_principles,
sc.appeal_subtype,
sc.practice_area,
sc.document_id,
sc.created_at,
d.page_count AS page_count,
d.title AS doc_title
FROM style_corpus sc
LEFT JOIN documents d ON d.id = sc.document_id
ORDER BY sc.created_at DESC
"""
)
lessons_counts = await db.count_decision_lessons_per_corpus()
out = []
for r in rows:
cats = r["subject_categories"]
if isinstance(cats, str):
try:
cats = json.loads(cats)
except json.JSONDecodeError:
cats = []
kp = r["key_principles"]
if isinstance(kp, str):
try:
kp = json.loads(kp)
except json.JSONDecodeError:
kp = []
decision_date = str(r["decision_date"]) if r["decision_date"] else ""
parties = _extract_parties(r["head_text"] or "")
out.append({
"id": str(r["id"]),
"decision_number": r["decision_number"] or "",
"decision_date": decision_date,
"subject_categories": cats or [],
"chars": r["chars"],
"created_at": r["created_at"].isoformat() if r["created_at"] else "",
# ── enriched fields ──
"summary": r["summary"] or "",
"outcome": r["outcome"] or "",
"key_principles": kp or [],
"appeal_subtype": r["appeal_subtype"] or "",
"practice_area": r["practice_area"] or "",
"page_count": r["page_count"] or 0,
"document_id": str(r["document_id"]) if r["document_id"] else None,
"doc_title": r["doc_title"] or "",
"parties": parties,
"legal_citation": _format_legal_citation(r["decision_number"] or "", decision_date),
"lessons_count": lessons_counts.get(str(r["id"]), 0),
})
return out
# ── Style-agent chat (delegated to legal-chat-service on host) ─────
class ChatConversationCreate(BaseModel):
title: str = "שיחה חדשה"
style_corpus_id: str | None = None # optional — scope chat to a decision
class ChatMessageRequest(BaseModel):
content: str
def _conv_to_json(row: dict) -> dict:
"""Serialize a chat_conversations row for the API."""
return {
"id": str(row["id"]),
"title": row.get("title") or "",
"style_corpus_id": str(row["style_corpus_id"]) if row.get("style_corpus_id") else None,
"decision_number": row.get("decision_number") or "",
"claude_session_id": row.get("claude_session_id"),
"message_count": row.get("message_count", 0),
"created_at": row["created_at"].isoformat() if row.get("created_at") else "",
"last_message_at": row["last_message_at"].isoformat() if row.get("last_message_at") else "",
}
def _msg_to_json(row: dict) -> dict:
return {
"id": str(row["id"]),
"role": row["role"],
"content": row["content"],
"created_at": row["created_at"].isoformat() if row.get("created_at") else "",
}
@app.post("/api/training/chat/conversations")
async def chat_create_conversation(body: ChatConversationCreate):
"""Create a new style-agent chat conversation."""
corpus_uuid: UUID | None = None
if body.style_corpus_id:
try:
corpus_uuid = UUID(body.style_corpus_id)
except ValueError:
raise HTTPException(400, "invalid style_corpus_id")
row = await db.create_chat_conversation(
title=body.title.strip() or "שיחה חדשה",
style_corpus_id=corpus_uuid,
)
if not row:
raise HTTPException(500, "failed to create conversation")
return _conv_to_json(row)
@app.get("/api/training/chat/conversations")
async def chat_list_conversations(limit: int = 50):
rows = await db.list_chat_conversations(limit=limit)
return [_conv_to_json(r) for r in rows]
@app.get("/api/training/chat/conversations/{conv_id}")
async def chat_get_conversation(conv_id: str):
try:
cid = UUID(conv_id)
except ValueError:
raise HTTPException(400, "invalid conv_id")
conv = await db.get_chat_conversation(cid)
if not conv:
raise HTTPException(404, "conversation not found")
messages = await db.list_chat_messages(cid)
return {
"conversation": _conv_to_json(conv),
"messages": [_msg_to_json(m) for m in messages],
}
@app.delete("/api/training/chat/conversations/{conv_id}")
async def chat_delete_conversation(conv_id: str):
try:
cid = UUID(conv_id)
except ValueError:
raise HTTPException(400, "invalid conv_id")
result = await db.delete_chat_conversation(cid)
if not result.get("deleted"):
raise HTTPException(404, "conversation not found")
return result
@app.post("/api/training/chat/conversations/{conv_id}/messages")
async def chat_send_message(conv_id: str, body: ChatMessageRequest):
"""Send a user message; stream the assistant response as SSE.
Proxies through ``web.chat_proxy.stream_chat_message`` to the
legal-chat-service running on the host.
"""
try:
cid = UUID(conv_id)
except ValueError:
raise HTTPException(400, "invalid conv_id")
text = (body.content or "").strip()
if not text:
raise HTTPException(400, "content is required")
from web import chat_proxy
return await chat_proxy.stream_chat_message(cid, text)
@app.get("/api/training/chat/health")
async def chat_health():
"""Probe legal-chat-service liveness from inside the container.
Useful when the UI wants to gracefully degrade ("שירות הצ'אט אינו
זמין") instead of letting messages fail mid-stream.
"""
from web import chat_proxy
try:
async with httpx.AsyncClient(timeout=httpx.Timeout(5.0)) as client:
r = await client.get(f"{chat_proxy.CHAT_SERVICE_URL}/health")
return {"reachable": r.status_code == 200, "status": r.status_code,
"url": chat_proxy.CHAT_SERVICE_URL}
except Exception as e:
return {"reachable": False, "error": str(e),
"url": chat_proxy.CHAT_SERVICE_URL}
# ── Curator portrait — read prompt + stats + accept proposals ──────
# The curator agent's prompt is symlinked into Paperclip, but the source
# lives in the legal-ai repo. Resolve via env so the container (where the
# agent file is mounted from a different path) and the host both work.
_AGENTS_DIR = Path(os.environ.get(
"AGENTS_DIR",
str(Path(__file__).resolve().parent.parent / ".claude" / "agents"),
))
_CURATOR_PROPOSALS_DIR = Path(os.environ.get(
"CURATOR_PROPOSALS_DIR",
str(Path(__file__).resolve().parent.parent / "data" / "curator-proposals"),
))
_GITEA_REPO_BASE = os.environ.get(
"GITEA_REPO_BASE",
"https://gitea.nautilus.marcusgroup.org/ezer-mishpati/legal-ai",
)
@app.get("/api/training/curator/prompt")
async def get_curator_prompt():
"""Return the hermes-curator agent's prompt (read-only) + Gitea source URL.
The file is the canonical source of how the curator analyzes Daphna's
final decisions. Changes go through git/Gitea, not the UI — the UI just
surfaces it for transparency.
"""
path = _AGENTS_DIR / "hermes-curator.md"
if not path.exists():
raise HTTPException(404, f"curator prompt not found at {path}")
try:
content = path.read_text(encoding="utf-8")
stat = path.stat()
except OSError as e:
raise HTTPException(500, f"failed to read curator prompt: {e}")
gitea_url = (
f"{_GITEA_REPO_BASE}/src/branch/main/.claude/agents/hermes-curator.md"
)
return {
"content": content,
"filename": path.name,
"bytes": stat.st_size,
"last_modified": stat.st_mtime,
"gitea_url": gitea_url,
}
@app.get("/api/training/curator/style-analyzer-prompt")
async def get_style_analyzer_prompt():
"""Return the system prompt that style_analyzer.py uses to extract patterns.
Surfaces the *training-time* prompt (Claude Opus 1M context) so the
chair can compare it against the curator's post-export prompt. Both
are shown side-by-side in the curator-portrait tab.
"""
# Embedded as a string so we don't need to import the service module
# here (which would pull in claude_session + db). The prompt is the
# one defined in mcp-server/src/legal_mcp/services/style_analyzer.py.
try:
from legal_mcp.services import style_analyzer
return {
"analysis_prompt": style_analyzer.ANALYSIS_PROMPT,
"single_decision_prompt": style_analyzer.SINGLE_DECISION_PROMPT,
"synthesis_prompt": style_analyzer.SYNTHESIS_PROMPT,
"max_input_tokens": style_analyzer.MAX_INPUT_TOKENS,
}
except Exception as e:
raise HTTPException(500, f"failed to load style_analyzer prompt: {e}")
@app.get("/api/training/curator/stats")
async def get_curator_stats():
"""Cheap aggregate stats over decision_lessons + style_corpus.
Used by the Curator-Portrait tab to show "10 curator findings across 24
decisions". We deliberately keep this server-side and aggregate so the
UI can render a single card without fanning out N queries.
"""
pool = await db.get_pool()
async with pool.acquire() as conn:
total_lessons = await conn.fetchval(
"SELECT count(*) FROM decision_lessons WHERE source = 'curator'"
)
decisions_with_findings = await conn.fetchval(
"SELECT count(DISTINCT style_corpus_id) FROM decision_lessons "
"WHERE source = 'curator'"
)
total_corpus = await conn.fetchval("SELECT count(*) FROM style_corpus")
applied = await conn.fetchval(
"SELECT count(*) FROM decision_lessons "
"WHERE source = 'curator' AND applied_to_skill = true"
)
# Last 10 curator findings — newest first
recent_rows = await conn.fetch(
"""
SELECT dl.id, dl.lesson_text, dl.category, dl.applied_to_skill,
dl.created_at,
sc.decision_number, sc.decision_date
FROM decision_lessons dl
JOIN style_corpus sc ON sc.id = dl.style_corpus_id
WHERE dl.source = 'curator'
ORDER BY dl.created_at DESC
LIMIT 10
"""
)
return {
"total_findings": total_lessons or 0,
"decisions_with_findings": decisions_with_findings or 0,
"decisions_total": total_corpus or 0,
"findings_applied": applied or 0,
"recent_findings": [
{
"id": str(r["id"]),
"lesson_text": r["lesson_text"],
"category": r["category"],
"applied_to_skill": bool(r["applied_to_skill"]),
"decision_number": r["decision_number"] or "",
"decision_date": str(r["decision_date"]) if r["decision_date"] else "",
"created_at": r["created_at"].isoformat() if r["created_at"] else "",
}
for r in recent_rows
],
}
class CuratorProposal(BaseModel):
title: str
proposed_change: str # markdown — what to change in the prompt
rationale: str # markdown — why
@app.post("/api/training/curator/proposals")
async def create_curator_proposal(body: CuratorProposal):
"""Save a proposed change to the curator prompt as a file on disk.
No automatic commit, no overwrite — the chair (chaim) reviews the
file manually and applies it through git. This is intentional: the
prompt is too load-bearing to mutate from a web UI.
"""
title = (body.title or "").strip()
if not title:
raise HTTPException(400, "title is required")
if not body.proposed_change.strip():
raise HTTPException(400, "proposed_change is required")
_CURATOR_PROPOSALS_DIR.mkdir(parents=True, exist_ok=True)
# Slug-ish filename — strip anything that isn't a Hebrew letter, ASCII
# letter, digit, hyphen, or underscore. Hebrew letters are explicitly
# allowed because most proposals will be in Hebrew.
slug = re.sub(r"[^\w֐-׿\-]+", "-", title)[:60].strip("-_") or "proposal"
today = date_type.today().isoformat()
fname = f"{today}-{slug}.md"
path = _CURATOR_PROPOSALS_DIR / fname
# If a proposal with the same slug already exists today, append a
# numeric suffix so we don't silently overwrite.
idx = 2
while path.exists():
path = _CURATOR_PROPOSALS_DIR / f"{today}-{slug}-{idx}.md"
idx += 1
md = (
f"# הצעת שינוי לפרומפט hermes-curator\n\n"
f"- **תאריך:** {today}\n"
f"- **כותרת:** {title}\n\n"
f"## שינוי מוצע\n\n{body.proposed_change.strip()}\n\n"
f"## נימוק\n\n{body.rationale.strip() or '(לא ניתן)'}\n"
)
try:
path.write_text(md, encoding="utf-8")
except OSError as e:
raise HTTPException(500, f"failed to write proposal: {e}")
return {
"saved": True,
"filename": path.name,
"path": str(path),
"bytes": len(md.encode("utf-8")),
}
@app.get("/api/training/curator/proposals")
async def list_curator_proposals():
"""List proposed-change files in data/curator-proposals/, newest first."""
if not _CURATOR_PROPOSALS_DIR.exists():
return []
items = []
for p in sorted(_CURATOR_PROPOSALS_DIR.iterdir(),
key=lambda f: f.stat().st_mtime, reverse=True):
if not p.is_file() or p.suffix.lower() != ".md":
continue
stat = p.stat()
items.append({
"filename": p.name,
"bytes": stat.st_size,
"modified_at": stat.st_mtime,
})
return items
# ── Per-decision lessons (decision_lessons table) ──────────────────
class LessonCreate(BaseModel):
lesson_text: str
category: str = "general"
source: str = "manual"
class LessonPatch(BaseModel):
lesson_text: str | None = None
category: str | None = None
applied_to_skill: bool | None = None
_LESSON_CATEGORIES = {"style", "structure", "lexicon", "tabular", "general"}
_LESSON_SOURCES = {"manual", "curator", "chair", "style_analyzer"}
def _lesson_to_json(row: dict) -> dict:
return {
"id": str(row["id"]),
"style_corpus_id": str(row["style_corpus_id"]),
"lesson_text": row["lesson_text"],
"category": row["category"],
"source": row["source"],
"applied_to_skill": bool(row["applied_to_skill"]),
"created_by": row.get("created_by", ""),
"created_at": row["created_at"].isoformat() if row.get("created_at") else "",
"updated_at": row["updated_at"].isoformat() if row.get("updated_at") else "",
}
@app.get("/api/training/corpus/{corpus_id}/lessons")
async def list_corpus_lessons(corpus_id: str):
try:
cid = UUID(corpus_id)
except ValueError:
raise HTTPException(400, "invalid corpus_id")
rows = await db.list_decision_lessons(cid)
return [_lesson_to_json(r) for r in rows]
@app.post("/api/training/corpus/{corpus_id}/lessons")
async def add_corpus_lesson(corpus_id: str, body: LessonCreate):
try:
cid = UUID(corpus_id)
except ValueError:
raise HTTPException(400, "invalid corpus_id")
text = (body.lesson_text or "").strip()
if not text:
raise HTTPException(400, "lesson_text is required")
if body.category not in _LESSON_CATEGORIES:
raise HTTPException(400, f"invalid category; allowed: {sorted(_LESSON_CATEGORIES)}")
if body.source not in _LESSON_SOURCES:
raise HTTPException(400, f"invalid source; allowed: {sorted(_LESSON_SOURCES)}")
row = await db.add_decision_lesson(
cid, lesson_text=text, category=body.category, source=body.source,
)
if not row:
raise HTTPException(500, "failed to insert lesson")
return _lesson_to_json(row)
@app.patch("/api/training/lessons/{lesson_id}")
async def patch_corpus_lesson(lesson_id: str, body: LessonPatch):
try:
lid = UUID(lesson_id)
except ValueError:
raise HTTPException(400, "invalid lesson_id")
if body.category is not None and body.category not in _LESSON_CATEGORIES:
raise HTTPException(400, f"invalid category; allowed: {sorted(_LESSON_CATEGORIES)}")
result = await db.update_decision_lesson(
lid,
lesson_text=body.lesson_text,
category=body.category,
applied_to_skill=body.applied_to_skill,
)
if not result.get("updated"):
if result.get("reason") == "not found":
raise HTTPException(404, "lesson not found")
return result # "nothing to update" — 200 with reason
return result
@app.delete("/api/training/lessons/{lesson_id}")
async def delete_corpus_lesson(lesson_id: str):
try:
lid = UUID(lesson_id)
except ValueError:
raise HTTPException(400, "invalid lesson_id")
result = await db.delete_decision_lesson(lid)
if not result.get("deleted"):
raise HTTPException(404, "lesson not found")
return result
@app.get("/api/training/corpus/{corpus_id}/full-text")
async def training_corpus_full_text(corpus_id: str):
"""Return the proofread full_text for a single corpus row.
Kept out of the list endpoint because full_text is large (50K-650K chars
per decision) and the table view only needs counts. The drawer fetches
it on demand when the chair opens the "content" tab.
"""
try:
cid = UUID(corpus_id)
except ValueError:
raise HTTPException(400, "invalid corpus_id")
pool = await db.get_pool()
async with pool.acquire() as conn:
row = await conn.fetchrow(
"SELECT decision_number, full_text FROM style_corpus WHERE id = $1",
cid,
)
if not row:
raise HTTPException(404, "corpus row not found")
return {
"id": corpus_id,
"decision_number": row["decision_number"] or "",
"full_text": row["full_text"] or "",
}
class TrainingCorpusPatch(BaseModel):
"""Editable metadata fields on a style_corpus row.
full_text is intentionally NOT editable — the corpus is write-once.
For corrections, re-upload the decision via /api/training/upload.
"""
decision_number: str | None = None
decision_date: str | None = None # ISO YYYY-MM-DD, or "" to clear
subject_categories: list[str] | None = None
summary: str | None = None
outcome: str | None = None
key_principles: list[str] | None = None
appeal_subtype: str | None = None
practice_area: str | None = None
@app.patch("/api/training/corpus/{corpus_id}")
async def training_corpus_patch(corpus_id: str, patch: TrainingCorpusPatch):
"""Update metadata fields on a corpus row. Only provided fields are touched."""
try:
cid = UUID(corpus_id)
except ValueError:
raise HTTPException(400, "invalid corpus_id")
fields = patch.model_dump(exclude_none=True)
if not fields:
return {"updated": False, "reason": "no fields to update"}
# Coerce decision_date "" → SQL NULL, otherwise parse as DATE.
if "decision_date" in fields:
v = fields["decision_date"]
if v == "":
fields["decision_date"] = None
else:
try:
fields["decision_date"] = date_type.fromisoformat(v)
except ValueError as e:
raise HTTPException(400, f"invalid decision_date: {e}")
# subject_categories + key_principles are JSONB columns.
if "subject_categories" in fields:
fields["subject_categories"] = json.dumps(fields["subject_categories"])
if "key_principles" in fields:
fields["key_principles"] = json.dumps(fields["key_principles"])
# Build a positional UPDATE — asyncpg doesn't support named parameters.
cols = list(fields.keys())
set_clause = ", ".join(f"{c} = ${i + 2}" for i, c in enumerate(cols))
values = [fields[c] for c in cols]
pool = await db.get_pool()
async with pool.acquire() as conn:
result = await conn.fetchrow(
f"UPDATE style_corpus SET {set_clause} "
f"WHERE id = $1 "
f"RETURNING id, decision_number, decision_date, summary, outcome",
cid, *values,
)
if not result:
raise HTTPException(404, "corpus row not found")
return {
"updated": True,
"id": str(result["id"]),
"decision_number": result["decision_number"] or "",
"decision_date": str(result["decision_date"]) if result["decision_date"] else "",
"summary_len": len(result["summary"] or ""),
"outcome_len": len(result["outcome"] or ""),
}
# Headers that defeat proxy buffering for SSE streams. `X-Accel-Buffering: no`
# is honored by nginx/Traefik (and matches what Coolify deploys); without it,
# small text/event-stream chunks are held in HTTP/2 frames until the stream
# closes — which is exactly the bug the previous progress endpoint exhibited.
_SSE_HEADERS = {
"Cache-Control": "no-cache, no-transform",
"X-Accel-Buffering": "no",
"Connection": "keep-alive",
}
async def _get_active_tasks() -> list[dict]:
"""Extract active (non-terminal) tasks from the progress store."""
items = []
for task_id, data in await _progress.active():
items.append({
"task_id": task_id,
"status": data.get("status", "unknown"),
"step": data.get("step", ""),
"filename": data.get("filename", ""),
"error": data.get("error", ""),
})
return items
@app.get("/api/system/tasks")
async def system_tasks():
"""List all active background tasks (one-shot)."""
items = await _get_active_tasks()
return {"active": items, "count": len(items)}
@app.get("/api/system/tasks/stream")
async def system_tasks_stream():
"""SSE stream — pushes active-task snapshots when anything changes.
Replaces client-side polling. Clients connect once and receive
events whenever the task set changes. A short keepalive runs every
tick so proxies flush HTTP/2 frames promptly.
"""
async def event_gen():
last_snapshot: str | None = None
last_heartbeat = time.time()
while True:
active = await _get_active_tasks()
snapshot = json.dumps({"active": active, "count": len(active)}, ensure_ascii=False)
now = time.time()
if snapshot != last_snapshot:
yield f"event: tasks\ndata: {snapshot}\n\n"
last_snapshot = snapshot
last_heartbeat = now
elif now - last_heartbeat > 5:
yield ": heartbeat\n\n"
last_heartbeat = now
await asyncio.sleep(1)
return StreamingResponse(event_gen(), media_type="text/event-stream", headers=_SSE_HEADERS)
@app.get("/api/progress/{task_id}")
async def progress_stream(task_id: str):
"""SSE stream of processing progress for a single upload task.
Behavior:
• Late subscribers (task already cleaned up) get a terminal
``{"status":"unknown"}`` payload and a clean stream close — never
a 404. EventSource treats 404 as a transient error and reconnects
forever, leaving the UI stuck on the placeholder; we avoid that.
• A heartbeat is emitted every iteration so HTTP/2 framing in the
proxy chain flushes immediately. The previous 30-second silent
tail after completion (and the proxy buffering it caused) was
the original cause of stuck-spinner uploads.
• Cleanup is delegated to Redis TTL — the store auto-expires
entries after PROGRESS_TTL_SECONDS, so we don't hand-roll any
post-completion sleep here.
"""
async def event_stream():
last_payload: str | None = None
while True:
data = await _progress.get(task_id)
if data is None:
# Either the task never existed or its TTL expired. Emit
# a single terminal payload so the client closes cleanly
# and falls back to refetching the case detail.
yield f"data: {json.dumps({'status': 'unknown'})}\n\n"
return
payload = json.dumps(data, ensure_ascii=False)
if payload != last_payload:
yield f"data: {payload}\n\n"
last_payload = payload
else:
yield ": keepalive\n\n"
if data.get("status") in ("completed", "failed"):
return
await asyncio.sleep(1)
return StreamingResponse(event_stream(), media_type="text/event-stream", headers=_SSE_HEADERS)
@app.get("/health")
@app.get("/api/health")
async def health():
return {"status": "ok"}
@app.get("/api/cases")
async def list_cases(
detail: bool = False,
include_archived: bool = False,
archived_only: bool = False,
):
"""List existing cases. By default excludes archived (use include_archived=true
or archived_only=true to see them). With detail=true, includes doc counts."""
cases = await db.list_cases(
include_archived=include_archived,
archived_only=archived_only,
)
if not detail:
return [
{
"case_number": c["case_number"],
"title": c["title"],
"status": c["status"],
"archived_at": c["archived_at"].isoformat() if c.get("archived_at") else None,
"updated_at": c["updated_at"].isoformat() if c.get("updated_at") else None,
"practice_area": c.get("practice_area"),
"appeal_subtype": c.get("appeal_subtype"),
"proceeding_type": c.get("proceeding_type"),
}
for c in cases
]
# Enhanced listing with document counts
pool = await db.get_pool()
result = []
async with pool.acquire() as conn:
for c in cases:
case_id = UUID(c["id"])
doc_count = await conn.fetchval(
"SELECT count(*) FROM documents WHERE case_id = $1", case_id
)
processing_count = await conn.fetchval(
"SELECT count(*) FROM documents WHERE case_id = $1 AND extraction_status NOT IN ('completed', 'proofread')",
case_id,
)
result.append({
"case_number": c["case_number"],
"title": c["title"],
"status": c["status"],
"subject": c.get("subject", "") or "",
"expected_outcome": c.get("expected_outcome", ""),
"committee_type": c.get("committee_type", ""),
"hearing_date": str(c["hearing_date"]) if c.get("hearing_date") else "",
"archived_at": c["archived_at"].isoformat() if c.get("archived_at") else None,
"created_at": c["created_at"].isoformat() if c.get("created_at") else None,
"updated_at": c["updated_at"].isoformat() if c.get("updated_at") else None,
"practice_area": c.get("practice_area"),
"appeal_subtype": c.get("appeal_subtype"),
"proceeding_type": c.get("proceeding_type"),
"document_count": doc_count,
"processing_count": processing_count,
"gitea_url": f"https://gitea.nautilus.marcusgroup.org/cases/{c['case_number']}",
})
return result
@app.get("/api/cases/stale")
async def api_stale_cases(days: int = 3):
"""Return cases that haven't been updated in N days and are not in a terminal/waiting status."""
if days <= 0:
return {"cases": [], "total": 0}
pool = await db.get_pool()
async with pool.acquire() as conn:
rows = await conn.fetch(
"""
SELECT case_number, title, status,
EXTRACT(DAY FROM (now() - updated_at))::int AS days_stale
FROM cases
WHERE status NOT IN ('final', 'new', 'exported')
AND archived_at IS NULL
AND updated_at < now() - make_interval(days => $1)
ORDER BY updated_at ASC -- oldest stale first (longest overdue = highest priority)
""",
days,
)
cases = [
{
"case_number": r["case_number"],
"title": r["title"],
"status": r["status"],
"days_stale": r["days_stale"],
}
for r in rows
]
return {"cases": cases, "total": len(cases)}
@app.post("/api/cases/{case_number}/archive")
async def api_archive_case(case_number: str):
"""Move a case to the archive. Also archives the matching Paperclip project."""
case = await db.get_case_by_number(case_number)
if not case:
raise HTTPException(404, f"Case {case_number} not found")
updated = await db.archive_case(UUID(case["id"]))
paperclip_result: dict = {"status": "skipped"}
try:
paperclip_result = await pc_archive_project(case_number)
except Exception as e:
logger.warning("paperclip archive sync failed for %s: %s", case_number, e)
paperclip_result = {"status": "error", "message": str(e)}
return {
"status": "archived",
"case_number": case_number,
"archived_at": updated["archived_at"].isoformat() if updated and updated.get("archived_at") else None,
"paperclip": paperclip_result,
}
@app.post("/api/cases/{case_number}/restore")
async def api_restore_case(case_number: str):
"""Restore an archived case. Also restores the matching Paperclip project."""
case = await db.get_case_by_number(case_number)
if not case:
raise HTTPException(404, f"Case {case_number} not found")
await db.restore_case(UUID(case["id"]))
paperclip_result: dict = {"status": "skipped"}
try:
paperclip_result = await pc_restore_project(case_number)
except Exception as e:
logger.warning("paperclip restore sync failed for %s: %s", case_number, e)
paperclip_result = {"status": "error", "message": str(e)}
return {
"status": "restored",
"case_number": case_number,
"paperclip": paperclip_result,
}
# ── Paperclip Integration API ─────────────────────────────────────
class CaseCreateRequest(BaseModel):
case_number: str
title: str
appellants: list[str] | None = None
respondents: list[str] | None = None
subject: str = ""
property_address: str = ""
permit_number: str = ""
committee_type: str = "ועדה מקומית"
hearing_date: str = ""
notes: str = ""
expected_outcome: str = ""
# Empty default → cases_tools.case_create auto-derives the domain
# practice_area from the case_number prefix (1xxx→rishuy_uvniya,
# 8xxx→betterment_levy, 9xxx→compensation_197). Callers can still
# send a domain value explicitly.
practice_area: str = ""
appeal_subtype: str = ""
# proceeding_type: 'ערר' / 'בל"מ'. Empty → auto-derived from
# appeal_subtype / subject downstream.
proceeding_type: str = ""
class CaseUpdateRequest(BaseModel):
status: str = ""
title: str = ""
subject: str = ""
notes: str = ""
hearing_date: str = ""
decision_date: str = ""
tags: list[str] | None = None
expected_outcome: str = ""
appellants: list[str] | None = None
respondents: list[str] | None = None
property_address: str = ""
permit_number: str = ""
proceeding_type: str = ""
@app.post("/api/cases/create")
async def api_case_create(req: CaseCreateRequest):
"""Create a new appeal case."""
result = await cases_tools.case_create(
case_number=req.case_number,
title=req.title,
appellants=req.appellants,
respondents=req.respondents,
subject=req.subject,
property_address=req.property_address,
permit_number=req.permit_number,
committee_type=req.committee_type,
hearing_date=req.hearing_date,
notes=req.notes,
expected_outcome=req.expected_outcome,
practice_area=req.practice_area,
appeal_subtype=req.appeal_subtype,
proceeding_type=req.proceeding_type,
)
# GAP-48: case_create now returns the {status,data,message} envelope; unwrap
# to the case object so the existing gitea/appeal_subtype/paperclip wiring works.
parsed = envelope_unwrap(json.loads(result))
# Auto-create Paperclip project for the new case. case_create may have
# auto-derived appeal_subtype from the case-number prefix; prefer the
# resolved value over the (possibly empty) request value.
appeal_type = parsed.get("appeal_subtype") or req.appeal_subtype or "רישוי"
try:
pc_result = await pc_create_project(
case_number=req.case_number,
title=req.title,
appeal_type=appeal_type,
)
parsed["paperclip"] = pc_result
logger.info("Auto-created Paperclip project for case %s: %s", req.case_number, pc_result.get("url"))
except Exception as e:
logger.warning("Failed to auto-create Paperclip project for case %s: %s", req.case_number, e)
parsed["paperclip_error"] = str(e)
# Gitea result was attached by case_create itself; log if it failed so
# ops can spot stale-token issues without scanning every response.
gitea_result = parsed.get("gitea") or {}
if not gitea_result.get("ok"):
logger.warning(
"Gitea repo not created for case %s: %s",
req.case_number, gitea_result.get("error") or "unknown",
)
return parsed
@app.get("/api/cases/{case_number}/git-status")
async def api_case_git_status(case_number: str):
"""Git sync status for a case repo."""
case_dir = config.find_case_dir(case_number)
git_dir = case_dir / ".git"
if not git_dir.exists():
return {"synced": False, "error": "no_repo"}
env = {
"PATH": os.environ.get("PATH", "/usr/bin:/bin"),
"HOME": os.environ.get("HOME", "/root"),
"GIT_TERMINAL_PROMPT": "0",
"GIT_CONFIG_GLOBAL": "/dev/null",
}
# Ensure git trusts the case directory regardless of ownership
env["GIT_CONFIG_COUNT"] = "1"
env["GIT_CONFIG_KEY_0"] = "safe.directory"
env["GIT_CONFIG_VALUE_0"] = str(case_dir)
# Last commit info
log = subprocess.run(
["git", "log", "-1", "--format=%H%n%aI%n%s"],
cwd=case_dir, capture_output=True, text=True, env=env,
)
lines = log.stdout.strip().splitlines() if log.returncode == 0 else []
last_commit_time = lines[1] if len(lines) > 1 else None
last_commit_msg = lines[2] if len(lines) > 2 else None
# Dirty files count
status = subprocess.run(
["git", "status", "--porcelain"],
cwd=case_dir, capture_output=True, text=True, env=env,
)
dirty = len([l for l in status.stdout.splitlines() if l.strip()]) if status.returncode == 0 else 0
# Check if remote exists and if we're ahead
has_remote = False
ahead = 0
remote_url = None
remote_check = subprocess.run(
["git", "remote", "get-url", "origin"],
cwd=case_dir, capture_output=True, text=True, env=env,
)
if remote_check.returncode == 0:
has_remote = True
# Sanitize token from URL
raw = remote_check.stdout.strip()
remote_url = raw.split("@")[-1] if "@" in raw else raw
ahead_check = subprocess.run(
["git", "rev-list", "HEAD", "--not", "--remotes", "--count"],
cwd=case_dir, capture_output=True, text=True, env=env,
)
if ahead_check.returncode == 0:
ahead = int(ahead_check.stdout.strip() or "0")
synced = has_remote and dirty == 0 and ahead == 0
return {
"synced": synced,
"has_remote": has_remote,
"remote_url": remote_url,
"dirty_files": dirty,
"commits_ahead": ahead,
"last_commit_time": last_commit_time,
"last_commit_msg": last_commit_msg,
}
@app.get("/api/cases/{case_number}/details")
async def api_case_get(case_number: str):
"""Get full case details including documents."""
result = await cases_tools.case_get(case_number)
parsed = json.loads(result)
if isinstance(parsed, dict) and parsed.get("status") == "error": # GAP-48
raise HTTPException(404, parsed.get("message") or result)
return envelope_unwrap(parsed)
@app.put("/api/cases/{case_number}")
async def api_case_update(case_number: str, req: CaseUpdateRequest, background_tasks: BackgroundTasks):
"""Update case details."""
# Capture old status before the update so we can detect changes.
existing = await db.get_case_by_number(case_number)
old_status = (existing or {}).get("status", "")
try:
result = await cases_tools.case_update(
case_number=case_number,
status=req.status,
title=req.title,
subject=req.subject,
notes=req.notes,
hearing_date=req.hearing_date,
decision_date=req.decision_date,
tags=req.tags,
expected_outcome=req.expected_outcome,
appellants=req.appellants,
respondents=req.respondents,
property_address=req.property_address,
permit_number=req.permit_number,
proceeding_type=req.proceeding_type,
)
except ValueError as exc:
raise HTTPException(422, str(exc))
parsed = json.loads(result)
if isinstance(parsed, dict) and parsed.get("status") == "error": # GAP-48
raise HTTPException(404, parsed.get("message") or result)
parsed = envelope_unwrap(parsed)
# Paperclip sync: update project name when title changes (fire-and-forget).
old_title = (existing or {}).get("title", "")
if req.title and req.title != old_title:
background_tasks.add_task(
paperclip_client.update_project_name,
case_number=case_number,
new_title=req.title,
)
# Emit webhook when status changes (fire-and-forget via BackgroundTasks).
new_status = req.status
if new_status and old_status != new_status:
prefix = case_number[:1]
company_id = (
PAPERCLIP_COMPANIES["licensing"] if prefix == "1"
else PAPERCLIP_COMPANIES["betterment"] if prefix in ("8", "9")
else None
)
background_tasks.add_task(
emit_case_status_webhook,
case_number=case_number,
old_status=old_status,
new_status=new_status,
company_id=company_id, # None is safe — plugin handles unknown company gracefully
)
logger.debug("webhook scheduled: case %s %s%s", case_number, old_status, new_status)
return parsed
@app.delete("/api/cases")
async def api_case_delete(case_number: str, remove_files: bool = False):
"""Delete a case, identified by case_number in the query string.
Uses a query param (not a path segment) because case numbers may contain
characters like `/` that FastAPI path routing cannot capture even when
URL-encoded (%2F). Dependent documents/chunks/qa_results cascade via
FK ON DELETE CASCADE; audit_log rows nullify their case_id.
Pass `remove_files=true` to also rm -rf the on-disk case directory."""
result = await cases_tools.case_delete(case_number, remove_files)
data = envelope_unwrap(json.loads(result)) # GAP-48
if not data.get("deleted"):
raise HTTPException(404, data.get("message") or data.get("reason") or f"תיק {case_number} לא נמצא")
return data
@app.get("/api/cases/{case_number}/status")
async def api_case_status(case_number: str):
"""Get full workflow status for a case."""
result = await workflow_tools.workflow_status(case_number)
parsed = json.loads(result)
if isinstance(parsed, dict) and parsed.get("status") == "error": # GAP-48
raise HTTPException(404, parsed.get("message") or result)
return envelope_unwrap(parsed)
@app.get("/api/search")
async def api_search(query: str, limit: int = 10, section_type: str = ""):
"""Semantic search across decisions and documents."""
result = await search_tools.search_decisions(query, limit, section_type)
try:
# GAP-48: tool now returns the {status,data,message} envelope; unwrap it
# to preserve the legacy API shape (list on hits, {"message"} otherwise).
return envelope_unwrap(json.loads(result))
except json.JSONDecodeError:
return {"message": result}
@app.get("/api/cases/{case_number}/search")
async def api_case_search(case_number: str, query: str, limit: int = 10):
"""Semantic search within a specific case's documents."""
result = await search_tools.search_case_documents(case_number, query, limit)
try:
# GAP-48: unwrap the tool envelope, keep the legacy API shape.
return envelope_unwrap(json.loads(result))
except json.JSONDecodeError:
return {"message": result}
@app.get("/api/search/cases")
async def api_search_cases(q: str, limit: int = 10):
"""Lightweight SQL search over cases — by case number, address, parties, title.
Powers the global-search dropdown in the header. Returns small projections,
not full case objects.
"""
q = q.strip()
if len(q) < 2:
return {"items": [], "count": 0}
needle = f"%{q}%"
prefix = f"{q}%"
pool = await db.get_pool()
async with pool.acquire() as conn:
rows = await conn.fetch(
"""
SELECT case_number, title, property_address, status,
practice_area, appeal_subtype, updated_at
FROM cases
WHERE case_number ILIKE $1
OR property_address ILIKE $1
OR title ILIKE $1
OR subject ILIKE $1
OR appellants::text ILIKE $1
OR respondents::text ILIKE $1
ORDER BY
CASE WHEN case_number ILIKE $2 THEN 0 ELSE 1 END,
updated_at DESC NULLS LAST
LIMIT $3
""",
needle, prefix, limit,
)
items = [
{
"case_number": r["case_number"],
"title": r["title"],
"property_address": r["property_address"],
"status": r["status"],
"practice_area": r["practice_area"],
"appeal_subtype": r["appeal_subtype"],
}
for r in rows
]
return {"items": items, "count": len(items)}
@app.get("/api/cases/{case_number}/template")
async def api_case_template(case_number: str):
"""Get outcome-aware decision template for a case."""
result = await drafting_tools.get_decision_template(case_number)
parsed = json.loads(result) # GAP-48
if parsed.get("status") == "error":
raise HTTPException(404, parsed.get("message") or "")
return {"template": envelope_unwrap(parsed)}
@app.get("/api/processing-status")
async def api_processing_status():
"""Get overall processing status."""
result = await workflow_tools.processing_status()
return envelope_unwrap(json.loads(result)) # GAP-48
@app.get("/api/system/diagnostics")
async def system_diagnostics():
"""System health snapshot: DB counts, recent failures, task queue."""
pool = await db.get_pool()
async with pool.acquire() as conn:
db_ok = False
try:
await conn.fetchval("SELECT 1")
db_ok = True
except Exception:
pass
tables = {}
for t in ("cases", "documents", "document_chunks", "style_corpus", "style_patterns"):
try:
tables[t] = await conn.fetchval(f"SELECT count(*) FROM {t}")
except Exception:
tables[t] = None
# Documents that failed extraction or are stuck
failed_docs = await conn.fetch(
"SELECT d.id, d.title, d.extraction_status, d.created_at, "
" c.case_number "
"FROM documents d LEFT JOIN cases c ON d.case_id = c.id "
"WHERE d.extraction_status IN ('failed', 'error') "
"ORDER BY d.created_at DESC LIMIT 20"
)
stuck_docs = await conn.fetch(
"SELECT d.id, d.title, d.extraction_status, d.created_at, "
" c.case_number "
"FROM documents d LEFT JOIN cases c ON d.case_id = c.id "
"WHERE d.extraction_status IN ('pending', 'processing') "
" AND d.created_at < now() - interval '10 minutes' "
"ORDER BY d.created_at DESC LIMIT 20"
)
# Halacha review backlog (GAP-14 / INV-QA1 / G10) — human gate visibility
halacha_backlog = await metrics_service.halacha_backlog(conn)
active_tasks = [
{"task_id": tid, "filename": d.get("filename", ""),
"status": d.get("status", ""), "step": d.get("step", "")}
for tid, d in await _progress.active()
]
return {
"db_ok": db_ok,
"tables": tables,
"halacha_backlog": halacha_backlog,
"failed_documents": [
{
"id": str(r["id"]),
"title": r["title"] or "",
"status": r["extraction_status"],
"case_number": r["case_number"] or "",
"created_at": r["created_at"].isoformat() if r["created_at"] else None,
}
for r in failed_docs
],
"stuck_documents": [
{
"id": str(r["id"]),
"title": r["title"] or "",
"status": r["extraction_status"],
"case_number": r["case_number"] or "",
"created_at": r["created_at"].isoformat() if r["created_at"] else None,
}
for r in stuck_docs
],
"active_tasks": active_tasks,
}
@app.get("/api/system/recent-activity")
async def system_recent_activity(limit: int = 8):
"""Derive a feed of recent events from cases + style_corpus + style_patterns.
Each event has: type, label, timestamp, target.
"""
pool = await db.get_pool()
events: list[dict] = []
async with pool.acquire() as conn:
# Recent cases
cases = await conn.fetch(
"SELECT case_number, title, created_at FROM cases "
"ORDER BY created_at DESC LIMIT $1", limit
)
for c in cases:
events.append({
"type": "case_created",
"label": f"תיק חדש: ערר {c['case_number']}",
"detail": c["title"] or "",
"timestamp": c["created_at"].isoformat() if c["created_at"] else None,
"target": f"/#/case/{c['case_number']}",
})
# Recent corpus additions
corpus = await conn.fetch(
"SELECT decision_number, created_at FROM style_corpus "
"ORDER BY created_at DESC LIMIT $1", limit
)
for r in corpus:
events.append({
"type": "corpus_added",
"label": f"החלטה נוספה לקורפוס: {r['decision_number'] or 'ללא מספר'}",
"detail": "",
"timestamp": r["created_at"].isoformat() if r["created_at"] else None,
"target": "/#/training",
})
# Last style analysis run (if any)
last_pattern = await conn.fetchrow(
"SELECT created_at FROM style_patterns "
"ORDER BY created_at DESC LIMIT 1"
)
if last_pattern and last_pattern["created_at"]:
count = await conn.fetchval("SELECT count(*) FROM style_patterns")
events.append({
"type": "analysis_run",
"label": f"ניתוח סגנון — {count} דפוסים חולצו",
"detail": "",
"timestamp": last_pattern["created_at"].isoformat(),
"target": "/#/style-report",
})
# Sort by timestamp desc, take top N
events.sort(key=lambda e: e["timestamp"] or "", reverse=True)
return {"events": events[:limit]}
# ── Workflow API — outcome, direction, claims, QA, learning ──────
class OutcomeRequest(BaseModel):
outcome: str # rejection / full_acceptance / partial_acceptance
reasoning: str = ""
class DirectionRequest(BaseModel):
direction_doc: dict # JSON document with main_reasoning, reasoning_order, key_precedents, notes
@app.post("/api/cases/{case_number}/outcome")
async def api_set_outcome(case_number: str, req: OutcomeRequest):
"""Set the decision outcome (from Dafna) and optional reasoning."""
case = await db.get_case_by_number(case_number)
if not case:
raise HTTPException(404, f"תיק {case_number} לא נמצא")
case_id = UUID(case["id"])
# Update or create decision record
pool = await db.get_pool()
async with pool.acquire() as conn:
existing = await conn.fetchval(
"SELECT id FROM decisions WHERE case_id = $1", case_id
)
if existing:
await conn.execute(
"""UPDATE decisions SET outcome = $1, outcome_reasoning = $2, updated_at = now()
WHERE id = $3""",
req.outcome, req.reasoning, existing,
)
else:
await conn.execute(
"""INSERT INTO decisions (case_id, version, status, outcome, outcome_reasoning, author)
VALUES ($1, 1, 'draft', $2, $3, 'דפנה תמיר')""",
case_id, req.outcome, req.reasoning,
)
# Update case status
new_status = "direction_approved" if req.reasoning else "outcome_set"
await conn.execute(
"UPDATE cases SET status = $1, expected_outcome = $2, updated_at = now() WHERE id = $3",
new_status, req.outcome, case_id,
)
return {"status": new_status, "outcome": req.outcome, "has_reasoning": bool(req.reasoning)}
@app.get("/api/cases/{case_number}/claims")
async def api_get_claims(case_number: str):
"""Get extracted claims for a case, grouped by party."""
case = await db.get_case_by_number(case_number)
if not case:
raise HTTPException(404, f"תיק {case_number} לא נמצא")
pool = await db.get_pool()
async with pool.acquire() as conn:
rows = await conn.fetch(
"""SELECT party_role, claim_text, claim_index, source_document, addressed_in_paragraph
FROM claims WHERE case_id = $1 ORDER BY party_role, claim_index""",
UUID(case["id"]),
)
claims_by_party = {}
for r in rows:
role = r["party_role"]
if role not in claims_by_party:
claims_by_party[role] = []
claims_by_party[role].append(dict(r))
return {"case_number": case_number, "claims": claims_by_party, "total": len(rows)}
# ── Legal Arguments (aggregated claims) ────────────────────────────
# The aggregator groups raw ``claims`` rows into ~6-12 distinct legal
# arguments per party. The heavy lifting (LLM call) runs in the local
# MCP server context where Claude CLI is available; here we expose
# read + trigger endpoints. The trigger is a BackgroundTask only when
# Claude CLI is actually present in the runtime (i.e. dev box) — inside
# the FastAPI container it short-circuits with status="llm_unavailable".
@app.post("/api/cases/{case_number}/aggregate-arguments")
async def api_aggregate_arguments(
case_number: str,
background_tasks: BackgroundTasks,
force: bool = False,
):
"""Aggregate raw claims into distinct legal arguments via Claude.
Runs as a BackgroundTask because the LLM pass can take 30-90 seconds.
"""
case = await db.get_case_by_number(case_number)
if not case:
raise HTTPException(404, f"תיק {case_number} לא נמצא")
async def _run() -> None:
try:
from legal_mcp.services import argument_aggregator
result = await argument_aggregator.aggregate_claims_to_arguments(
UUID(case["id"]), force=force,
)
logger.info(
"aggregate_arguments[%s] finished: %s",
case_number, result,
)
except Exception as e: # noqa: BLE001
logger.exception(
"aggregate_arguments[%s] failed: %s", case_number, e,
)
background_tasks.add_task(_run)
return {
"status": "started",
"case_number": case_number,
"force": force,
"message": "Aggregation started in background. Poll /legal-arguments for results.",
}
@app.get("/api/cases/{case_number}/legal-arguments")
async def api_get_legal_arguments(case_number: str, party: str = ""):
"""Return aggregated legal arguments for a case, grouped by party."""
case = await db.get_case_by_number(case_number)
if not case:
raise HTTPException(404, f"תיק {case_number} לא נמצא")
from legal_mcp.services import argument_aggregator
args = await argument_aggregator.get_legal_arguments(
UUID(case["id"]), party=party,
)
# Group by party for the UI.
by_party: dict[str, list[dict]] = {}
for a in args:
by_party.setdefault(a["party"], []).append(a)
return {
"case_number": case_number,
"total": len(args),
"by_party": by_party,
"arguments": args,
}
@app.post("/api/cases/{case_number}/direction")
async def api_set_direction(case_number: str, req: DirectionRequest):
"""Save the approved direction document for the discussion block."""
case = await db.get_case_by_number(case_number)
if not case:
raise HTTPException(404, f"תיק {case_number} לא נמצא")
pool = await db.get_pool()
async with pool.acquire() as conn:
await conn.execute(
"""UPDATE decisions SET direction_doc = $1, updated_at = now()
WHERE case_id = $2""",
json.dumps(req.direction_doc, ensure_ascii=False),
UUID(case["id"]),
)
await conn.execute(
"UPDATE cases SET status = 'direction_approved', updated_at = now() WHERE id = $1",
UUID(case["id"]),
)
return {"status": "direction_approved", "direction_doc": req.direction_doc}
@app.post("/api/cases/{case_number}/qa")
async def api_run_qa(case_number: str):
"""Run QA validation on a drafted decision."""
case = await db.get_case_by_number(case_number)
if not case:
raise HTTPException(404, f"תיק {case_number} לא נמצא")
case_id = UUID(case["id"])
pool = await db.get_pool()
async with pool.acquire() as conn:
decision = await conn.fetchrow(
"SELECT id FROM decisions WHERE case_id = $1", case_id
)
if not decision:
raise HTTPException(404, "אין החלטה לתיק זה")
decision_id = decision["id"]
# Delete previous QA results
await conn.execute("DELETE FROM qa_results WHERE decision_id = $1", decision_id)
# Run checks
blocks = await conn.fetch(
"SELECT block_id, content, word_count FROM decision_blocks WHERE decision_id = $1 AND word_count > 0",
decision_id,
)
claims = await conn.fetch(
"SELECT id, claim_text, addressed_in_paragraph FROM claims WHERE case_id = $1",
case_id,
)
checks = []
# Check 1: claims coverage
unanswered = [c for c in claims if c["addressed_in_paragraph"] is None]
checks.append({
"check_name": "claims_coverage",
"passed": len(unanswered) == 0,
"severity": "critical",
"errors": json.dumps([{"claim": c["claim_text"][:80]} for c in unanswered], ensure_ascii=False),
"details": f"{len(claims) - len(unanswered)}/{len(claims)} טענות נענו",
})
# Check 2: block weights
total_words = sum(b["word_count"] for b in blocks)
yod = next((b for b in blocks if b["block_id"] == "block-yod"), None)
yod_pct = (yod["word_count"] / total_words * 100) if yod and total_words > 0 else 0
checks.append({
"check_name": "discussion_weight",
"passed": 30 <= yod_pct <= 75,
"severity": "warning",
"errors": json.dumps([]),
"details": f"בלוק דיון: {yod_pct:.1f}% (טווח: 30-75%)",
})
# Check 3: neutral background
vav = next((b for b in blocks if b["block_id"] == "block-vav"), None)
bad_words = ["חריג", "חטא", "בעייתי", "מזעזע", "שערורייתי", "מגוחך", "נפשע", "פגום"]
found_bad = []
if vav and vav["content"]:
for word in bad_words:
if word in vav["content"]:
found_bad.append(word)
checks.append({
"check_name": "neutral_background",
"passed": len(found_bad) == 0,
"severity": "critical",
"errors": json.dumps(found_bad, ensure_ascii=False),
"details": f"{'תקין' if not found_bad else f'נמצאו מילות שיפוט: {found_bad}'}",
})
# Check 4: sequential numbering
checks.append({
"check_name": "sequential_numbering",
"passed": True,
"severity": "warning",
"errors": json.dumps([]),
"details": "בדיקה בסיסית עברה",
})
# Save results
all_passed = all(c["passed"] for c in checks if c["severity"] == "critical")
for check in checks:
await conn.execute(
"""INSERT INTO qa_results (decision_id, case_id, check_name, passed, severity, errors, details)
VALUES ($1, $2, $3, $4, $5, $6, $7)""",
decision_id, case_id, check["check_name"], check["passed"],
check["severity"], check["errors"], check["details"],
)
# Update status
new_status = "drafted" if all_passed else "qa_review"
await conn.execute(
"UPDATE cases SET status = $1, updated_at = now() WHERE id = $2",
new_status, case_id,
)
return {"passed": all_passed, "checks": checks, "status": new_status}
# ── Decision blocks — interactive view + inline edit ──
class BlockUpdateRequest(BaseModel):
content: str
def _serialize_block(row: dict, cfg: dict) -> dict:
"""Merge a decision_blocks DB row with its BLOCK_CONFIG skeleton."""
updated = row.get("updated_at") if row else None
return {
"block_id": cfg["block_id"],
"block_index": cfg["index"],
# Prefer the DB title (may be hand-edited); fall back to the canonical config title.
"title": (row.get("title") if row and row.get("title") else cfg["title"]),
"content": (row.get("content") if row else "") or "",
"word_count": (row.get("word_count") if row else 0) or 0,
"status": (row.get("status") if row else "empty") or "empty",
"generation_type": (row.get("generation_type") if row else cfg["gen_type"]),
"model_used": (row.get("model_used") if row else cfg["model"]),
"updated_at": updated.isoformat() if updated else None,
}
@app.get("/api/cases/{case_number}/decision-blocks")
async def api_get_decision_blocks(case_number: str):
"""Return all 12 decision blocks as JSON (empty blocks included).
Read path for the interactive block viewer — content lives in
decision_blocks but was previously only reachable via DOCX export.
"""
from legal_mcp.services.block_writer import BLOCK_CONFIG
case = await db.get_case_by_number(case_number)
if not case:
raise HTTPException(404, f"תיק {case_number} לא נמצא")
case_id = UUID(case["id"])
# Canonical skeleton, ordered by block_index. Carries block_id into each cfg.
skeleton = [
{**cfg, "block_id": bid}
for bid, cfg in sorted(BLOCK_CONFIG.items(), key=lambda kv: kv[1]["index"])
]
decision = await db.get_decision_by_case(case_id)
active_draft_path = await db.get_active_draft_path(case_id)
by_id: dict[str, dict] = {}
decision_id = None
if decision:
decision_id = decision["id"]
pool = await db.get_pool()
async with pool.acquire() as conn:
rows = await conn.fetch(
"""SELECT block_id, block_index, title, content, word_count, status,
generation_type, model_used, updated_at
FROM decision_blocks WHERE decision_id = $1""",
UUID(decision_id),
)
by_id = {r["block_id"]: dict(r) for r in rows}
blocks = [_serialize_block(by_id.get(cfg["block_id"]), cfg) for cfg in skeleton]
return {
"case_number": case["case_number"],
"has_decision": decision is not None,
"decision_id": decision_id,
"active_draft_path": active_draft_path,
"source_of_truth": "docx" if active_draft_path else "blocks",
"blocks": blocks,
}
@app.put("/api/cases/{case_number}/decision-blocks/{block_id}")
async def api_update_decision_block(
case_number: str, block_id: str, req: BlockUpdateRequest
):
"""Save inline-edited content for a single decision block.
Writes to decision_blocks (upsert, status='draft') and rebuilds the
on-disk decision.md. Creates a decision row if none exists yet.
"""
from legal_mcp.services import block_writer
from legal_mcp.services.block_writer import BLOCK_CONFIG
if block_id not in BLOCK_CONFIG:
raise HTTPException(404, f"בלוק לא ידוע: {block_id}")
case = await db.get_case_by_number(case_number)
if not case:
raise HTTPException(404, f"תיק {case_number} לא נמצא")
case_id = UUID(case["id"])
active_draft_path = await db.get_active_draft_path(case_id)
if active_draft_path:
logger.warning(
"Inline block edit on %s/%s while active_draft_path is set (%s) — "
"DB and DOCX may diverge.",
case_number, block_id, active_draft_path,
)
try:
result = await block_writer.save_block_content(case_id, block_id, req.content)
except ValueError as e:
raise HTTPException(400, str(e))
cfg = {**BLOCK_CONFIG[block_id], "block_id": block_id}
block = _serialize_block(
{
**result,
"status": "draft",
"updated_at": datetime.now(timezone.utc),
},
cfg,
)
return {"block": block, "active_draft_warning": bool(active_draft_path)}
@app.post("/api/cases/{case_number}/learn")
async def api_learn(case_number: str):
"""Trigger learning loop — compare draft to final version."""
case = await db.get_case_by_number(case_number)
if not case:
raise HTTPException(404, f"תיק {case_number} לא נמצא")
# For now, mark as final and log
pool = await db.get_pool()
async with pool.acquire() as conn:
await conn.execute(
"UPDATE cases SET status = 'final', updated_at = now() WHERE id = $1",
UUID(case["id"]),
)
return {"status": "final", "message": "לולאת למידה הופעלה — גרסה סופית נקלטה"}
# ── Local files API — research, drafts, proofread ──
@app.get("/api/cases/{case_number}/local-files")
async def api_local_files(case_number: str):
"""List local files from case subdirectories (research, drafts, proofread)."""
case_dir = config.find_case_dir(case_number)
result = {}
for folder in ("research", "proofread"):
folder_path = case_dir / "documents" / folder
if folder_path.exists():
files = []
for f in sorted(folder_path.iterdir()):
if f.is_file() and not f.name.startswith("."):
stat = f.stat()
files.append({
"filename": f.name,
"size": stat.st_size,
"modified_at": stat.st_mtime,
"folder": folder,
})
if files:
result[folder] = files
# Drafts are at case level, not under documents
drafts_path = case_dir / "drafts"
if drafts_path.exists():
files = []
for f in sorted(drafts_path.iterdir()):
if f.is_file() and not f.name.startswith("."):
stat = f.stat()
files.append({
"filename": f.name,
"size": stat.st_size,
"modified_at": stat.st_mtime,
"folder": "drafts",
})
if files:
result["drafts"] = files
return result
@app.get("/api/cases/{case_number}/local-files/{folder}/{filename}")
async def api_read_local_file(case_number: str, folder: str, filename: str):
"""Read contents of a local case file."""
if folder not in ("research", "proofread", "drafts"):
raise HTTPException(400, "Invalid folder")
case_dir = config.find_case_dir(case_number)
if folder == "drafts":
path = case_dir / "drafts" / filename
else:
path = case_dir / "documents" / folder / filename
if not path.exists() or not path.is_file():
raise HTTPException(404, "קובץ לא נמצא")
return FileResponse(path, media_type="text/plain; charset=utf-8", filename=filename)
# ── Research analysis (analysis-and-research.md) — parse + edit ────
def _research_file_path(case_number: str) -> Path:
"""Resolve analysis-and-research.md path for a case."""
case_dir = config.find_case_dir(case_number)
return case_dir / "documents" / "research" / "analysis-and-research.md"
@app.get("/api/cases/{case_number}/research/analysis")
async def api_research_analysis(case_number: str):
"""Return parsed structure of analysis-and-research.md for UI rendering."""
path = _research_file_path(case_number)
if not path.exists():
raise HTTPException(404, "טרם בוצע ניתוח משפטי לתיק זה")
try:
return research_md.parse(path)
except Exception as e:
logger.exception("Failed to parse %s", path)
raise HTTPException(500, f"שגיאה בעיבוד הקובץ: {e}")
@app.get("/api/cases/{case_number}/research/analysis/download")
async def api_research_analysis_download(case_number: str):
"""Download the raw analysis-and-research.md file."""
path = _research_file_path(case_number)
if not path.exists():
raise HTTPException(404, "טרם בוצע ניתוח משפטי לתיק זה")
return FileResponse(
path,
media_type="text/markdown; charset=utf-8",
filename=f"analysis-{case_number}.md",
)
@app.get("/api/cases/{case_number}/research/analysis/export-docx")
async def api_research_analysis_export_docx(case_number: str):
"""Export the legal analysis as a DOCX using דפנה's decision template styles."""
from legal_mcp.services.analysis_docx_exporter import build_analysis_docx
try:
path = await build_analysis_docx(case_number)
except FileNotFoundError as e:
raise HTTPException(404, str(e))
except Exception as e:
logger.exception("Failed to export analysis DOCX for %s", case_number)
raise HTTPException(500, f"שגיאה בייצוא: {e}")
case_dir = config.find_case_dir(case_number)
if case_dir.exists():
commit_and_push(case_dir, f"ניתוח משפטי: {path.name}")
return FileResponse(
path,
media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
filename=path.name,
)
@app.put("/api/cases/{case_number}/research/analysis/upload")
async def api_research_analysis_upload(
case_number: str,
file: UploadFile = File(...),
):
"""Upload an updated analysis-and-research.md file.
Validates that:
1. The file is markdown (text)
2. It can be parsed by the research_md parser
3. It contains at least one structural section (issues or threshold_claims)
4. The case number in the file matches the URL
On success, backs up the existing file and replaces it.
"""
if not file.filename or not file.filename.endswith(".md"):
raise HTTPException(400, "הקובץ חייב להיות בפורמט Markdown (.md)")
content = await file.read()
if len(content) > 5 * 1024 * 1024:
raise HTTPException(400, "הקובץ גדול מדי — מקסימום 5MB")
try:
text = content.decode("utf-8")
except UnicodeDecodeError:
raise HTTPException(400, "הקובץ חייב להיות בקידוד UTF-8")
if len(text.strip()) < 100:
raise HTTPException(400, "הקובץ ריק מדי — נראה שחסר תוכן")
# Write to a temp file so parse() can work on it
dest = _research_file_path(case_number)
tmp = dest.with_suffix(".md.upload-tmp")
try:
dest.parent.mkdir(parents=True, exist_ok=True)
tmp.write_text(text, encoding="utf-8")
parsed = research_md.parse(tmp)
except Exception as e:
tmp.unlink(missing_ok=True)
raise HTTPException(
400,
f"שגיאה בפרסור הקובץ — המבנה לא תקין: {e}",
)
# Validate structure
issues = parsed.get("issues", [])
thresholds = parsed.get("threshold_claims", [])
if not issues and not thresholds:
tmp.unlink(missing_ok=True)
raise HTTPException(
400,
"הקובץ חייב להכיל לפחות סעיף אחד של טענות סף או סוגיות להכרעה",
)
# Validate case number matches
file_case = parsed.get("header", {}).get("case_number", "")
if file_case and file_case != case_number:
tmp.unlink(missing_ok=True)
raise HTTPException(
400,
f"מספר התיק בקובץ ({file_case}) לא תואם לתיק הנוכחי ({case_number})",
)
# Backup existing file
if dest.exists():
backup_dir = dest.parent / "backup"
backup_dir.mkdir(exist_ok=True)
ts = time.strftime("%Y%m%d-%H%M%S")
backup_path = backup_dir / f"analysis-and-research-{ts}.md"
shutil.copy2(dest, backup_path)
# Replace with uploaded file
tmp.replace(dest)
return {
"status": "ok",
"sections": {
"threshold_claims": len(thresholds),
"issues": len(issues),
"has_conclusions": bool(parsed.get("conclusions", "").strip()),
},
"file_size": len(content),
}
class ChairPositionRequest(BaseModel):
section_id: str
position: str = ""
@app.patch("/api/cases/{case_number}/research/analysis/chair-position")
async def api_research_chair_position(case_number: str, req: ChairPositionRequest):
"""Update the chair_position field of a specific subsection, writing
directly to analysis-and-research.md (atomic rename)."""
path = _research_file_path(case_number)
if not path.exists():
raise HTTPException(404, "הקובץ לא נמצא")
if not re.match(r"^(threshold|issue)_\d+$", req.section_id):
raise HTTPException(400, "section_id לא תקין")
try:
return research_md.update_chair_position(path, req.section_id, req.position)
except ValueError as e:
raise HTTPException(404, str(e))
except Exception as e:
logger.exception("Failed to update chair position")
raise HTTPException(500, f"שגיאה בשמירה: {e}")
# ── Precedents API — attached case-law quotes for the compose phase ──
class PrecedentCreateRequest(BaseModel):
quote: str
citation: str
section_id: str = "" # empty = case-level / general discussion
chair_note: str = ""
pdf_document_id: str = "" # UUID string, empty = no PDF
@app.post("/api/cases/{case_number}/precedents")
async def api_precedent_attach(case_number: str, req: PrecedentCreateRequest):
"""Attach a legal precedent (quote + citation) to a case, optionally
scoped to a specific threshold_claim / issue section. Cross-case
library reuse happens at the search endpoint — this one always
inserts a new row."""
if req.section_id and not re.match(r"^(threshold|issue)_\d+$", req.section_id):
raise HTTPException(400, "section_id לא תקין")
if not req.quote.strip() or not req.citation.strip():
raise HTTPException(400, "quote ו-citation חובה")
result = await precedents_tools.precedent_attach(
case_number=case_number,
quote=req.quote,
citation=req.citation,
section_id=req.section_id,
chair_note=req.chair_note,
pdf_document_id=req.pdf_document_id,
)
parsed = json.loads(result) # GAP-48
if parsed.get("status") == "error":
raise HTTPException(404, parsed.get("message") or "")
return envelope_unwrap(parsed)
@app.post("/api/cases/{case_number}/precedents/upload-pdf")
async def api_precedent_upload_pdf(
case_number: str,
file: UploadFile = File(...),
):
"""One-shot PDF upload for a precedent attachment. Stores the file
on disk alongside other case documents and creates a `documents`
row with doc_type='precedent_archive'. Returns {document_id} so the
frontend can pass it into POST /precedents. No SSE / background
processing — archive only, no text extraction."""
case = await db.get_case_by_number(case_number)
if not case:
raise HTTPException(404, f"תיק {case_number} לא נמצא")
if not file.filename:
raise HTTPException(400, "No filename provided")
ext = Path(file.filename).suffix.lower()
if ext not in {".pdf", ".docx", ".doc"}:
raise HTTPException(400, f"סוג קובץ לא נתמך לפסיקה: {ext}")
content = await file.read()
if len(content) > MAX_FILE_SIZE:
raise HTTPException(400, f"קובץ גדול מדי. מקסימום: {MAX_FILE_SIZE // (1024*1024)}MB")
# Save under a dedicated precedents/ subdirectory so they don't mix
# with extracted originals.
case_dir = config.find_case_dir(case_number) / "documents" / "precedents"
case_dir.mkdir(parents=True, exist_ok=True)
safe_name = re.sub(r"[^\w\u0590-\u05FF\s.\-()]", "", Path(file.filename).stem).strip()
dest = case_dir / f"{safe_name or 'precedent'}{ext}"
counter = 1
while dest.exists():
dest = case_dir / f"{safe_name or 'precedent'}-{counter}{ext}"
counter += 1
dest.write_bytes(content)
case_id = UUID(case["id"])
doc = await db.create_document(
case_id=case_id,
doc_type="precedent_archive",
title=safe_name or "precedent",
file_path=str(dest),
)
return {"document_id": doc["id"], "filename": dest.name}
@app.get("/api/cases/{case_number}/precedents")
async def api_precedent_list(case_number: str):
"""List all precedents attached to a case, grouped client-side by section_id."""
result = await precedents_tools.precedent_list(case_number)
parsed = json.loads(result) # GAP-48
if isinstance(parsed, dict) and parsed.get("status") == "error":
raise HTTPException(404, parsed.get("message") or "")
return envelope_unwrap(parsed)
@app.delete("/api/precedents/{precedent_id}")
async def api_precedent_delete(precedent_id: str):
"""Delete a precedent attachment. The archived PDF (if any) stays
in the documents table — orphaned references nullify via FK
ON DELETE SET NULL — so we keep the audit trail of the file."""
result = await precedents_tools.precedent_remove(precedent_id)
parsed = json.loads(result) # GAP-48
if parsed.get("status") == "error":
raise HTTPException(400, parsed.get("message") or "")
data = envelope_unwrap(parsed)
if not data.get("deleted"):
raise HTTPException(404, "לא נמצא")
return data
@app.get("/api/precedents/search")
async def api_precedent_search(q: str, practice_area: str = "", limit: int = 10):
"""Cross-case library typeahead. Returns one row per distinct citation."""
result = await precedents_tools.search_case_precedents(q, practice_area, limit) # GAP-49 rename
parsed = json.loads(result) # GAP-48: typeahead expects an array
if isinstance(parsed, dict) and parsed.get("status") == "error":
raise HTTPException(400, parsed.get("message") or "")
return parsed.get("data") or []
# ── Exports API — drafts, versions, download, upload, mark-final ──
@app.get("/api/cases/{case_number}/exports")
async def api_list_exports(case_number: str):
"""List all exported drafts and versions for a case."""
export_dir = config.find_case_dir(case_number) / "exports"
if not export_dir.exists():
return []
files = []
for f in sorted(export_dir.iterdir(), key=lambda p: p.stat().st_mtime, reverse=True):
if f.is_file() and f.suffix.lower() == ".docx":
stat = f.stat()
files.append({
"filename": f.name,
"size": stat.st_size,
"created_at": stat.st_mtime,
"is_final": f.name.startswith("סופי-"),
})
return files
@app.get("/api/cases/{case_number}/exports/{filename}/download")
async def api_download_export(case_number: str, filename: str):
"""Download an exported file."""
export_dir = config.find_case_dir(case_number) / "exports"
path = export_dir / filename
if not path.exists() or not path.parent.samefile(export_dir):
raise HTTPException(404, "קובץ לא נמצא")
return FileResponse(
path,
media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
filename=filename,
)
@app.delete("/api/cases/{case_number}/exports/{filename}")
async def api_delete_export(case_number: str, filename: str):
"""Delete an exported draft file."""
export_dir = config.find_case_dir(case_number) / "exports"
path = export_dir / filename
if not path.exists() or not path.parent.samefile(export_dir):
raise HTTPException(404, "קובץ לא נמצא")
path.unlink()
return {"deleted": True, "filename": filename}
@app.post("/api/cases/{case_number}/exports/upload")
async def api_upload_export(case_number: str, file: UploadFile = File(...)):
"""Upload a revised version of a draft.
After saving, the file is automatically registered as the case's
active_draft (source of truth) and bookmarks are retrofitted so that
future revise_draft calls can anchor Track Changes to the 12 blocks.
"""
case = await db.get_case_by_number(case_number)
if not case:
raise HTTPException(404, f"תיק {case_number} לא נמצא")
if not file.filename:
raise HTTPException(400, "No filename provided")
ext = Path(file.filename).suffix.lower()
if ext != ".docx":
raise HTTPException(400, "רק קבצי DOCX נתמכים")
content = await file.read()
if len(content) > MAX_FILE_SIZE:
raise HTTPException(400, f"קובץ גדול מדי. מקסימום: {MAX_FILE_SIZE // (1024*1024)}MB")
export_dir = config.find_case_dir(case_number) / "exports"
export_dir.mkdir(parents=True, exist_ok=True)
# Version numbering for uploads
existing = sorted(export_dir.glob("עריכה-v*.docx"))
next_ver = 1
for p in existing:
try:
ver = int(p.stem.split("-v")[1])
next_ver = max(next_ver, ver + 1)
except (IndexError, ValueError):
pass
dest = export_dir / f"עריכה-v{next_ver}.docx"
dest.write_bytes(content)
# Auto-register as active_draft + retrofit bookmarks
auto_result: dict = {"status": "ok"}
try:
raw = await drafting_tools.apply_user_edit(case_number, dest.name)
parsed = json.loads(raw) # GAP-48
if parsed.get("status") == "error":
auto_result = {"status": "error", "message": parsed.get("message", "")}
else:
auto_result = {**(envelope_unwrap(parsed) or {}), "status": "completed"}
except Exception as e:
auto_result = {"status": "error", "message": str(e)}
return {
"filename": dest.name,
"size": len(content),
"version": next_ver,
"active_draft": auto_result.get("active_draft_path"),
"bookmarks_added": auto_result.get("bookmarks_added", []),
"missing_blocks": auto_result.get("missing_blocks", []),
"structural_fallback": auto_result.get("structural_fallback", []),
"apply_status": auto_result.get("status", "error"),
}
class ReviseRequest(BaseModel):
revisions: list[dict]
author: str = "מערכת AI"
@app.post("/api/cases/{case_number}/exports/revise")
async def api_revise_draft(case_number: str, req: ReviseRequest):
"""Apply a batch of Track Changes revisions to the active draft."""
raw = await drafting_tools.revise_draft(
case_number,
json.dumps(req.revisions, ensure_ascii=False),
req.author,
)
parsed = json.loads(raw) # GAP-48
if parsed.get("status") == "error":
raise HTTPException(400, parsed.get("message", "revise failed"))
return envelope_unwrap(parsed)
@app.get("/api/cases/{case_number}/exports/bookmarks")
async def api_list_bookmarks(case_number: str):
"""List bookmarks in the case's active draft (anchors for revisions)."""
raw = await drafting_tools.list_bookmarks(case_number)
parsed = json.loads(raw) # GAP-48
data = envelope_unwrap(parsed)
return data if isinstance(data, dict) else {"bookmarks": []}
@app.post("/api/cases/{case_number}/exports/{filename}/retrofit")
async def api_retrofit_bookmarks(case_number: str, filename: str):
"""Manually trigger retrofit of bookmarks on an existing file."""
raw = await drafting_tools.apply_user_edit(case_number, filename)
parsed = json.loads(raw) # GAP-48
if parsed.get("status") == "error":
raise HTTPException(400, parsed.get("message", "retrofit failed"))
return envelope_unwrap(parsed)
@app.get("/api/cases/{case_number}/active-draft")
async def api_get_active_draft(case_number: str):
"""Get the current active_draft_path for a case."""
case = await db.get_case_by_number(case_number)
if not case:
raise HTTPException(404, f"תיק {case_number} לא נמצא")
path = await db.get_active_draft_path(UUID(case["id"]))
if not path:
return {"active_draft_path": None, "filename": None, "exists": False}
filename = Path(path).name
return {
"active_draft_path": path,
"filename": filename,
"exists": Path(path).exists(),
}
@app.post("/api/cases/{case_number}/exports/{filename}/mark-final")
async def api_mark_final(case_number: str, filename: str):
"""Mark an export as the final version — copies to training corpus."""
case = await db.get_case_by_number(case_number)
if not case:
raise HTTPException(404, f"תיק {case_number} לא נמצא")
export_dir = config.find_case_dir(case_number) / "exports"
source = export_dir / filename
if not source.exists() or not source.parent.samefile(export_dir):
raise HTTPException(404, "קובץ לא נמצא")
# Rename/copy to final
final_name = f"סופי-{case_number}.docx"
final_path = export_dir / final_name
shutil.copy2(str(source), str(final_path))
# Also copy to training directory for future style learning
config.TRAINING_DIR.mkdir(parents=True, exist_ok=True)
training_dest = config.TRAINING_DIR / f"החלטה-{case_number}.docx"
shutil.copy2(str(source), str(training_dest))
# Update case status to final
pool = await db.get_pool()
async with pool.acquire() as conn:
await conn.execute(
"UPDATE cases SET status = 'final', updated_at = now() WHERE id = $1",
UUID(case["id"]),
)
# T5/INV-LRN4 — reconciliation ledger: snapshot the AI draft NOW (before any
# later edit can overwrite decision_blocks) and open a draft↔final pair. The
# LLM distillation (curator) fills final_text/diff_stats/analysis afterwards.
pair_id: str | None = None
try:
decision = await db.get_decision_by_case(UUID(case["id"]))
draft_text = ""
if decision:
async with pool.acquire() as conn:
brows = await conn.fetch(
"SELECT content FROM decision_blocks "
"WHERE decision_id = $1 AND word_count > 0 ORDER BY block_index",
UUID(decision["id"]),
)
draft_text = "\n\n".join(b["content"] for b in brows if b["content"])
pair_id = await db.create_draft_final_pair(
UUID(case["id"]), draft_text, str(final_path),
)
except Exception as e:
logger.warning("draft_final_pair snapshot failed for %s: %s", case_number, e)
case_dir = config.find_case_dir(case_number)
if case_dir.exists():
commit_and_push(case_dir, f"גרסה סופית: {final_name}")
# ingest_final_version into the case_law corpus is NOT called here:
# it uses claude_session under the hood, which only works when invoked
# from the local MCP server (Claude CLI present), not from this
# FastAPI container. Run it manually via Claude Code / MCP when needed.
# See: ~/.claude/projects/-home-chaim-legal-ai/memory/feedback_claude_session_local_only.md
ingest_status: dict = {"status": "skipped", "reason": "container_no_claude_cli"}
# Best-effort: wake the Knowledge Curator (Hermes) to analyze the
# signed final and propose updates to skills/lessons. Non-fatal on
# failure so marking final never breaks for the user.
curator_status: dict = {"status": "skipped"}
try:
# Company by case-number prefix: 1xxx=CMP (licensing), 8/9xxx=CMPA (betterment)
prefix = case_number[:1]
company_id = (
PAPERCLIP_COMPANIES["licensing"] if prefix == "1"
else PAPERCLIP_COMPANIES["betterment"] if prefix in ("8", "9")
else ""
)
curator_status = await pc_wake_curator_for_final(
case_number, final_name, company_id=company_id
)
except Exception as e:
logger.warning("curator wakeup failed for %s: %s", case_number, e)
curator_status = {"status": "error", "error": str(e)}
return {
"final_filename": final_name,
"training_copy": str(training_dest),
"status": "final",
"ingest_final": ingest_status,
"curator": curator_status,
}
@app.post("/api/cases/{case_number}/export-docx")
async def api_export_docx(case_number: str, background_tasks: BackgroundTasks):
"""Trigger DOCX export for a case.
On a successful export, fires a fire-and-forget webhook to the
Paperclip plugin so it can attach a "final-decision" document
(markdown body + download link) to the linked issue.
"""
result = await drafting_tools.export_docx(case_number)
parsed = json.loads(result) # GAP-48: tool returns the {status,data,message} envelope
# FU-6: a QA gate (or another error) can block the export. export_docx
# signals this with envelope status == "error". Returning the existing 200
# here would let the UI show a false "exported successfully" toast, so we map
# a block to 409 Conflict carrying the Hebrew message + failed_gates (in data).
if parsed.get("status") == "error":
detail = {"message": parsed.get("message", "ייצוא נחסם.")}
inner = parsed.get("data") or {}
if inner.get("failed_gates"):
detail["failed_gates"] = inner["failed_gates"]
raise HTTPException(409, detail)
data = envelope_unwrap(parsed) # success payload: {path, active_draft_path, message}
# Notify the Paperclip plugin to attach the final-decision document.
docx_filename = (
data.get("filename")
or data.get("docx_filename")
or data.get("file")
or ""
)
if docx_filename:
prefix = case_number[:1]
company_id = (
PAPERCLIP_COMPANIES["licensing"] if prefix == "1"
else PAPERCLIP_COMPANIES["betterment"] if prefix in ("8", "9")
else None
)
background_tasks.add_task(
paperclip_api.emit_export_complete_webhook,
case_number=case_number,
docx_filename=docx_filename,
company_id=company_id,
)
return data
@app.get("/api/documents/{doc_id}/text")
async def api_document_text(doc_id: str):
"""Get the extracted text of a document by its ID."""
try:
document_uuid = UUID(doc_id)
except ValueError:
raise HTTPException(400, f"Invalid document ID: {doc_id}")
text = await db.get_document_text(document_uuid)
if not text:
raise HTTPException(404, f"Document {doc_id} not found or has no text")
return {"doc_id": doc_id, "text": text}
# ── Integration Endpoints — Gitea & Paperclip ────────────────────
DOC_TYPE_NAMES = {
"appeal": "כתב-ערר",
"response": "תשובת",
"protocol": "פרוטוקול-דיון",
"plan": "תכנית",
"decision": "החלטה",
"court_decision": "פסק-דין",
"permit": "היתר",
"appraisal": "שומה",
"exhibit": "נספח",
"objection": "התנגדות",
"reference": "מסמך-עזר",
}
def generate_doc_filename(doc_type: str, case_number: str, party_name: str = "", ext: str = ".pdf") -> str:
"""Generate a clear Hebrew filename for a document."""
base = DOC_TYPE_NAMES.get(doc_type, doc_type)
parts = [base]
if party_name:
safe_party = re.sub(r"[^\w\u0590-\u05FF\s]", "", party_name).strip().replace(" ", "-")
parts.append(safe_party)
parts.append(case_number)
return "-".join(parts) + ext
class GiteaRepoRequest(BaseModel):
case_number: str
title: str
description: str = ""
@app.post("/api/integrations/gitea/create-repo")
async def api_gitea_create_repo(req: GiteaRepoRequest):
"""Create a Gitea repo in the 'cases' org and link it to the local case directory."""
try:
repo = await create_repo(req.case_number, req.title, req.description)
except Exception as e:
raise HTTPException(502, f"Gitea error: {e}")
clone_url = repo.get("clone_url") or repo.get("html_url", "")
case_dir = config.find_case_dir(req.case_number)
pushed = False
if case_dir.exists():
pushed = setup_remote_and_push(case_dir, clone_url)
return {
"repo_url": repo.get("html_url", ""),
"clone_url": clone_url,
"pushed": pushed,
}
class PaperclipProjectRequest(BaseModel):
case_number: str
title: str
description: str = ""
appeal_type: str = "רישוי"
@app.post("/api/integrations/paperclip/create-project")
async def api_paperclip_create_project(req: PaperclipProjectRequest):
"""Create a project in Paperclip's embedded DB."""
try:
project = await pc_create_project(
case_number=req.case_number,
title=req.title,
description=req.description,
appeal_type=req.appeal_type,
)
except Exception as e:
raise HTTPException(502, f"Paperclip error: {e}")
return project
@app.post("/api/cases/{case_number}/start-workflow")
async def api_start_workflow(case_number: str):
"""Start the CEO agent workflow for a case.
Creates a workflow issue in Paperclip and wakes the CEO agent.
Only works when case status is 'new' or 'documents_ready'.
"""
# 1. Verify case exists and status is appropriate
case_raw = await cases_tools.case_get(case_number)
case_env = json.loads(case_raw) # GAP-48
if isinstance(case_env, dict) and case_env.get("status") == "error":
raise HTTPException(404, f"תיק {case_number} לא נמצא")
case_data = envelope_unwrap(case_env)
status = case_data.get("status", "")
allowed = {"new", "documents_ready"}
if status not in allowed:
raise HTTPException(
409,
f"לא ניתן להתחיל תהליך — סטטוס נוכחי: {status}. נדרש: {', '.join(allowed)}",
)
# 2. Create workflow issue in Paperclip
try:
issue = await pc_create_workflow_issue(case_number, case_data.get("title", ""))
except ValueError as e:
raise HTTPException(404, str(e))
except Exception as e:
raise HTTPException(502, f"שגיאת Paperclip: {e}")
# 3. Wake the CEO agent — must succeed before marking case as processing
try:
wakeup = await pc_wake_ceo(issue["issue_id"], case_number, issue.get("company_id", ""))
except Exception as e:
logger.error("CEO wakeup failed for case %s: %s", case_number, e)
raise HTTPException(
502,
f"נוצר issue {issue['identifier']} אך עירור ה-CEO נכשל: {e}. ניתן לנסות שנית.",
)
# 4. Update case status to processing (only after wakeup confirmed)
await cases_tools.case_update(case_number, status="processing")
return {
"case_number": case_number,
"status": "processing",
"issue_id": issue["issue_id"],
"issue_identifier": issue["identifier"],
"project_url": issue["project_url"],
"wakeup": wakeup,
}
# ── Agent Activity Mirror ─────────────────────────────────────────
@app.get("/api/cases/{case_number}/agents")
async def api_case_agents(case_number: str):
"""Get all Paperclip agent activity for a case: issues, comments, interactions, agent status."""
issues = await pc_get_case_issues(case_number)
if not issues:
return {"issues": [], "comments": [], "agents": [], "interactions": []}
issue_ids = [i["id"] for i in issues]
company_id = issues[0]["company_id"]
comments, agents, interactions = await asyncio.gather(
pc_get_issue_comments(issue_ids),
pc_get_agents_for_case(company_id, issue_ids),
pc_get_issue_interactions(issue_ids),
)
return {
"issues": issues,
"comments": comments,
"agents": agents,
"interactions": interactions,
}
class AgentCommentRequest(BaseModel):
body: str
issue_id: str | None = None
@app.post("/api/cases/{case_number}/agents/comment")
async def api_post_agent_comment(case_number: str, req: AgentCommentRequest):
"""Post a comment on a Paperclip issue linked to a case.
If issue_id is omitted, the most recent non-done issue is used.
"""
issues = await pc_get_case_issues(case_number)
if not issues:
raise HTTPException(404, f"לא נמצא פרויקט Paperclip לתיק {case_number}")
if req.issue_id:
target = next((i for i in issues if i["id"] == req.issue_id), None)
if not target:
raise HTTPException(404, f"Issue {req.issue_id} לא שייך לתיק {case_number}")
else:
# Pick the most recent non-done issue, or the last one
active = [i for i in issues if i["status"] != "done"]
target = active[-1] if active else issues[-1]
result = await pc_post_comment(target["id"], target["company_id"], req.body)
# Find the identifier for the response
result["issue_identifier"] = target.get("identifier", "")
return result
class InteractionResponseRequest(BaseModel):
issue_id: str
interaction_id: str
action: Literal["respond", "accept", "reject"]
payload: dict[str, Any]
@app.post("/api/cases/{case_number}/agents/interaction-response")
async def api_post_interaction_response(
case_number: str, req: InteractionResponseRequest,
):
"""Submit a user's answer to a Paperclip issue-thread interaction.
Routes to /respond | /accept | /reject based on `action`. Paperclip
auto-wakes the issue assignee after a successful submission.
"""
issues = await pc_get_case_issues(case_number)
if not any(i["id"] == req.issue_id for i in issues):
raise HTTPException(404, f"Issue {req.issue_id} לא שייך לתיק {case_number}")
handlers = {
"respond": pc_respond_to_interaction,
"accept": pc_accept_interaction,
"reject": pc_reject_interaction,
}
try:
return await handlers[req.action](
req.issue_id, req.interaction_id, req.payload,
)
except httpx.HTTPStatusError as e:
body = e.response.text or ""
raise HTTPException(e.response.status_code, body[:500])
except Exception as e:
raise HTTPException(502, f"שגיאת Paperclip: {e}")
# ── Settings: MCP Server Configuration ────────────────────────────
#
# Source of truth for legal-ai env vars is Coolify (see memory:
# reference_legal_ai_env_architecture). The container's os.environ is
# populated by Coolify at startup. We read & write through the Coolify
# API. Drift = (Coolify env value != container os.environ value), which
# means a Coolify update was made without a redeploy.
# Module-level guard: minimum interval between redeploys (60 seconds).
# Prevents accidental double-clicks or automated retry loops from queueing
# multiple redundant Coolify builds.
_LAST_REDEPLOY_AT: float = 0.0
_REDEPLOY_MIN_INTERVAL_SEC: float = 60.0
def _coolify_ctx() -> tuple[str, str, str]:
"""Return (base_url, app_uuid, token). Token may be empty."""
return (
os.environ.get("COOLIFY_URL", "https://coolify.nautilus.marcusgroup.org"),
os.environ.get("COOLIFY_APP_UUID", "gyjo0mtw2c42ej3xxvbz8zio"),
os.environ.get("COOLIFY_API_TOKEN", ""),
)
async def _read_coolify_envs() -> tuple[dict[str, list[dict[str, Any]]], list[str]]:
"""Read env vars from Coolify API.
Returns (grouped_by_key, errors). grouped_by_key maps env key →
list of {uuid, key, value} dicts (Coolify may have duplicates per
key for build-time vs runtime — we surface them all).
"""
base_url, app_uuid, token = _coolify_ctx()
if not token:
return {}, ["coolify_token_missing"]
try:
async with httpx.AsyncClient(timeout=20.0) as http:
resp = await http.get(
f"{base_url}/api/v1/applications/{app_uuid}/envs",
headers={"Authorization": f"Bearer {token}"},
)
except Exception as e:
logger.warning("coolify_envs_unreachable: %s", e)
return {}, ["coolify_unreachable"]
if resp.status_code >= 400:
logger.warning(
"coolify_envs_failed status=%s body=%s",
resp.status_code, (resp.text or "")[:200],
)
return {}, ["coolify_envs_failed"]
try:
items = resp.json()
except Exception as e:
logger.warning("coolify_envs_parse_failed: %s", e)
return {}, ["coolify_envs_parse_failed"]
grouped: dict[str, list[dict[str, Any]]] = {}
for item in items if isinstance(items, list) else []:
key = item.get("key")
if not key or key not in ENV_CATALOG:
continue
grouped.setdefault(key, []).append(item)
return grouped, []
def _coolify_authoritative_value(entries: list[dict[str, Any]]) -> str | None:
"""Pick the authoritative value when Coolify has multiple entries for a key.
Strategy: if all entries have the same value, return it. If they
differ, return the LAST one (Coolify's own runtime injection order
treats later definitions as overrides) and log a warning so the
UI can display the conflict.
"""
if not entries:
return None
values = {e.get("value") for e in entries}
if len(values) > 1:
logger.warning(
"coolify_env_duplicate_conflict key=%s values=%s",
entries[0].get("key"),
[str(v)[:20] for v in values],
)
return entries[-1].get("value")
def _build_env_var_row(
spec: EnvSpec,
coolify_entries: list[dict[str, Any]],
container_value: str | None,
coolify_available: bool,
) -> dict[str, Any]:
"""Build a single response row for an env var.
`coolify_value` = authoritative value from Coolify (source of truth).
`container_value` = what the running container sees in os.environ.
Drift = coolify_value != container_value (common cause: Coolify env
updated without a redeploy).
When `coolify_available=False` we cannot detect drift; the row
surfaces only container_value with drift=False (UI shows a banner
via the `errors` field).
"""
coolify_raw = _coolify_authoritative_value(coolify_entries)
has_duplicates = len(coolify_entries) > 1
if not coolify_available:
coolify_display: str | None = None
container_display: str | None = (
mask_secret(container_value) if (spec.is_secret and container_value)
else container_value
)
drift = False
elif spec.is_secret:
coolify_display = mask_secret(coolify_raw) if coolify_raw else None
container_display = mask_secret(container_value) if container_value else None
drift = bool(coolify_raw or container_value) and (
(coolify_raw or "") != (container_value or "")
)
else:
coolify_display = coolify_raw
container_display = container_value
drift = (
normalize_for_compare(spec, coolify_raw)
!= normalize_for_compare(spec, container_value)
)
if coolify_raw is None and container_value is None:
drift = False
row = spec.to_public_dict()
row.update({
"coolify_value": coolify_display,
"container_value": container_display,
"drift": drift,
"has_duplicates": has_duplicates,
})
return row
@app.get("/api/settings/mcp/env")
async def api_mcp_env():
"""List all catalog env vars with Coolify (authoritative) + container values."""
coolify_envs, errors = await _read_coolify_envs()
_, app_uuid, _ = _coolify_ctx()
coolify_available = not errors
rows = []
for key, spec in ENV_CATALOG.items():
rows.append(
_build_env_var_row(
spec,
coolify_envs.get(key, []),
os.environ.get(key),
coolify_available=coolify_available,
)
)
return {
"vars": rows,
"coolify_app_uuid": app_uuid,
"errors": errors,
}
class McpEnvUpdateRequest(BaseModel):
value: Any
@app.patch("/api/settings/mcp/env/{key}")
async def api_mcp_env_update(key: str, req: McpEnvUpdateRequest):
"""Update a non-secret env var in Coolify. Requires redeploy to take effect."""
spec = ENV_CATALOG.get(key)
if spec is None:
raise HTTPException(404, f"Unknown env key: {key}")
if spec.is_secret:
raise HTTPException(400, f"Cannot edit secret: {key}")
if not spec.is_editable:
raise HTTPException(400, f"Read-only: {key}")
try:
coerced = coerce(spec, req.value)
except ValueError as e:
raise HTTPException(400, str(e))
str_value = "true" if coerced is True else (
"false" if coerced is False else str(coerced)
)
base_url, app_uuid, token = _coolify_ctx()
if not token:
raise HTTPException(503, "COOLIFY_API_TOKEN not configured")
# Coolify's PATCH endpoint upserts by key (creates if not exists,
# updates if exists). For keys with duplicates, this updates ALL
# entries with that key — which is what we want.
try:
async with httpx.AsyncClient(timeout=15.0) as http:
resp = await http.patch(
f"{base_url}/api/v1/applications/{app_uuid}/envs",
headers={"Authorization": f"Bearer {token}"},
json={"key": key, "value": str_value},
)
except Exception as e:
logger.exception("coolify_env_write_unreachable key=%s", key)
raise HTTPException(502, f"Coolify unreachable: {e}")
if resp.status_code >= 400:
body_preview = (resp.text or "")[:200]
logger.warning(
"coolify_env_write_failed key=%s status=%s body=%s",
key, resp.status_code, body_preview,
)
raise HTTPException(
502, f"Coolify update failed: {resp.status_code}{body_preview}"
)
logger.info("mcp_env_update key=%s value=%s", key, str_value)
return {
"ok": True,
"key": key,
"saved_value": str_value,
"requires_redeploy": True,
"message": "נשמר ב-Coolify. נדרש redeploy כדי שהקונטיינר יקרא את הערך החדש.",
}
@app.post("/api/settings/mcp/env/redeploy")
async def api_mcp_env_redeploy():
"""Trigger Coolify redeploy of the legal-ai app."""
global _LAST_REDEPLOY_AT
now = time.time()
elapsed = now - _LAST_REDEPLOY_AT
if elapsed < _REDEPLOY_MIN_INTERVAL_SEC:
wait = int(_REDEPLOY_MIN_INTERVAL_SEC - elapsed)
raise HTTPException(
429, f"Redeploy בהמתנה: נסה שוב בעוד {wait} שניות."
)
base_url, app_uuid, token = _coolify_ctx()
if not token:
raise HTTPException(503, "COOLIFY_API_TOKEN not configured")
async with httpx.AsyncClient(timeout=30.0) as http:
try:
resp = await http.post(
f"{base_url}/api/v1/deploy",
params={"uuid": app_uuid, "force": "false"},
headers={"Authorization": f"Bearer {token}"},
)
except Exception as e:
raise HTTPException(502, f"Coolify unreachable: {e}")
if resp.status_code >= 400:
body_preview = (resp.text or "")[:200]
raise HTTPException(
502, f"Coolify deploy failed: {resp.status_code}{body_preview}"
)
data = resp.json() if resp.content else {}
deployment_uuid = (
data.get("deployment_uuid")
or (data.get("deployments") or [{}])[0].get("deployment_uuid")
)
logger.info("mcp_env_redeploy triggered uuid=%s", deployment_uuid)
_LAST_REDEPLOY_AT = now
return {
"ok": True,
"deployment_uuid": deployment_uuid,
"message": "Redeploy הופעל. הקונטיינר יחזור תוך 2-4 דקות.",
}
@app.get("/api/settings/mcp/tools")
async def api_mcp_tools():
"""List all MCP tools registered in legal_mcp."""
from web.mcp_introspection import list_mcp_tools
try:
tools = await list_mcp_tools()
except Exception as e:
logger.exception("mcp_tools_introspection_failed")
raise HTTPException(500, f"Tools introspection failed: {e}")
return {"tools": tools, "count": len(tools)}
@app.get("/api/settings/mcp/registrations")
async def api_mcp_registrations():
"""List MCP server registrations from host config files."""
from web.mcp_registrations import list_registrations
return list_registrations()
@app.get("/api/settings/mcp/blocks")
async def api_mcp_blocks():
"""List the 12-block decision schema (read-only reference)."""
from legal_mcp.services.block_writer import BLOCK_CONFIG
# CREAC role per block (from docs/block-schema.md). Static map —
# kept here rather than in BLOCK_CONFIG to avoid coupling LLM
# generation config to documentation metadata.
CREAC_ROLE = {
"block-alef": None, "block-bet": None, "block-gimel": None,
"block-dalet": None, "block-yod-bet": None,
"block-he": "Conclusion (preview)",
"block-vav": "Facts (R-context)",
"block-zayin": "Arguments",
"block-chet": "Procedural record",
"block-tet": "Rule (R)",
"block-yod": "C → R → E → A → C (full CREAC)",
"block-yod-alef": "Conclusion (final)",
}
# JWM functional purpose (Federal Judicial Center mapping)
JWM_PURPOSE = {
"block-alef": "Orientation", "block-bet": "Orientation",
"block-gimel": "Orientation", "block-dalet": "Orientation",
"block-he": "Orientation",
"block-vav": "Framing", "block-zayin": "Argumentation",
"block-chet": "Procedural record",
"block-tet": "Deliberation (rules)",
"block-yod": "Deliberation (analysis)",
"block-yod-alef": "Disposition",
"block-yod-bet": "Disposition (signatures)",
}
blocks = []
for block_id, cfg in sorted(BLOCK_CONFIG.items(), key=lambda kv: kv[1]["index"]):
blocks.append({
"id": block_id,
"index": cfg["index"],
"title": cfg["title"],
"gen_type": cfg["gen_type"],
"model": cfg["model"],
"temperature": cfg.get("temp"),
"max_tokens": cfg.get("max_tokens"),
"creac_role": CREAC_ROLE.get(block_id),
"jwm_purpose": JWM_PURPOSE.get(block_id),
})
return {"blocks": blocks, "count": len(blocks)}
# ── Settings: Tag → Company Mappings ──────────────────────────────
@app.get("/api/settings/paperclip-companies")
async def api_paperclip_companies():
"""List all companies from Paperclip's DB."""
pc_url = require_paperclip_db_url() # INV-ENV4 / GAP-57: fail loud, no creds default
try:
conn = await asyncpg.connect(pc_url)
try:
rows = await conn.fetch(
"SELECT id, name, issue_prefix FROM companies ORDER BY name"
)
return [{"id": str(r["id"]), "name": r["name"], "prefix": r.get("issue_prefix", "")} for r in rows]
finally:
await conn.close()
except Exception as e:
raise HTTPException(502, f"Cannot reach Paperclip DB: {e}")
@app.get("/api/settings/tag-mappings")
async def api_get_tag_mappings():
"""Get all tag → company mappings."""
pool = await db.get_pool()
rows = await pool.fetch(
"SELECT id, tag, tag_label, company_id, company_name, created_at FROM tag_company_mappings ORDER BY tag"
)
return [dict(r) for r in rows]
class TagMappingRequest(BaseModel):
tag: str
tag_label: str = ""
company_id: str
company_name: str = ""
@app.post("/api/settings/tag-mappings")
async def api_add_tag_mapping(req: TagMappingRequest):
"""Add a tag → company mapping."""
pool = await db.get_pool()
try:
row = await pool.fetchrow(
"""INSERT INTO tag_company_mappings (tag, tag_label, company_id, company_name)
VALUES ($1, $2, $3, $4)
ON CONFLICT (tag, company_id) DO UPDATE SET tag_label = $2, company_name = $4
RETURNING id, tag, tag_label, company_id, company_name""",
req.tag, req.tag_label, req.company_id, req.company_name,
)
return dict(row)
except Exception as e:
raise HTTPException(400, str(e))
@app.delete("/api/settings/tag-mappings/{mapping_id}")
async def api_delete_tag_mapping(mapping_id: str):
"""Delete a tag → company mapping."""
pool = await db.get_pool()
result = await pool.execute("DELETE FROM tag_company_mappings WHERE id = $1::uuid", mapping_id)
if result == "DELETE 0":
raise HTTPException(404, "Mapping not found")
return {"ok": True}
# ── Methodology Settings ───────────────────────────────────────────
from legal_mcp.services.lessons import (
GOLDEN_RATIOS,
DISCUSSION_RULES,
CONTENT_CHECKLISTS,
ANTI_PATTERNS,
TRANSITION_PHRASES,
)
def _transition_phrases_by_bucket() -> dict[str, list[str]]:
"""Group TRANSITION_PHRASES into editable buckets by outcome (None→universal)."""
out: dict[str, list[str]] = {}
for p in TRANSITION_PHRASES:
bucket = p.get("outcome") or "universal"
out.setdefault(bucket, []).append(p["phrase"])
return out
_METHODOLOGY_DEFAULTS: dict[str, dict] = {
"golden_ratios": {k: {s: list(v) for s, v in sec.items()} for k, sec in GOLDEN_RATIOS.items()},
"discussion_rules": dict(DISCUSSION_RULES),
"content_checklists": dict(CONTENT_CHECKLISTS),
# T12 — editable abstract-profile categories the writer (T15) + metric (T7) consume.
"transition_phrases": _transition_phrases_by_bucket(),
"anti_patterns": {ap["name"]: {"regex": ap["regex"], "note": ap["note"]} for ap in ANTI_PATTERNS},
}
_VALID_CATEGORIES = set(_METHODOLOGY_DEFAULTS.keys())
@app.get("/api/methodology/{category}")
async def api_get_methodology(category: str):
"""Get methodology settings with DB overrides merged over defaults."""
if category not in _VALID_CATEGORIES:
raise HTTPException(400, f"Unknown category: {category}. Valid: {sorted(_VALID_CATEGORIES)}")
defaults = _METHODOLOGY_DEFAULTS[category]
pool = await db.get_pool()
rows = await pool.fetch(
"SELECT rule_key, rule_value, created_at FROM appeal_type_rules "
"WHERE appeal_type = '_global' AND rule_category = $1",
category,
)
overrides = {r["rule_key"]: r for r in rows}
items = {}
for key, default_val in defaults.items():
if key in overrides:
raw = overrides[key]["rule_value"]
# asyncpg returns JSONB as a raw JSON string when no codec is registered.
# Parse it back to a Python object so the frontend receives the correct type.
if isinstance(raw, str):
try:
raw = json.loads(raw)
except (json.JSONDecodeError, TypeError):
pass
items[key] = {
"value": raw,
"is_override": True,
"updated_at": overrides[key]["created_at"].isoformat() if overrides[key]["created_at"] else None,
}
else:
items[key] = {"value": default_val, "is_override": False, "updated_at": None}
return {"items": items}
class MethodologyUpdateRequest(BaseModel):
value: Any
@app.put("/api/methodology/{category}/{key}")
async def api_update_methodology(category: str, key: str, req: MethodologyUpdateRequest):
"""Upsert a methodology override. Validates value shape per category."""
if category not in _VALID_CATEGORIES:
raise HTTPException(400, f"Unknown category: {category}")
if key not in _METHODOLOGY_DEFAULTS[category]:
raise HTTPException(400, f"Unknown key '{key}' for category '{category}'")
# Validate value shape
if category == "golden_ratios":
if not isinstance(req.value, dict):
raise HTTPException(422, "golden_ratios value must be a dict of section → [min, max]")
for sec, rng in req.value.items():
if not (isinstance(rng, list) and len(rng) == 2 and all(isinstance(x, (int, float)) for x in rng)):
raise HTTPException(422, f"Section '{sec}' must be [min, max] (integers 0-100)")
elif category == "discussion_rules":
if not isinstance(req.value, list) or not all(isinstance(s, str) and s.strip() for s in req.value):
raise HTTPException(422, "discussion_rules value must be a list of non-empty strings")
elif category == "content_checklists":
if not isinstance(req.value, str) or not req.value.strip():
raise HTTPException(422, "content_checklists value must be a non-empty string")
pool = await db.get_pool()
# json.dumps → text, then PostgreSQL casts text→jsonb.
# Passing a Python list directly causes "expected str, got list" in asyncpg;
# passing a str with ::jsonb causes double-encoding (stored as JSONB string).
# ::text::jsonb bypasses asyncpg's codec and lets PostgreSQL parse the JSON.
await pool.execute(
"INSERT INTO appeal_type_rules (id, appeal_type, rule_category, rule_key, rule_value) "
"VALUES (gen_random_uuid(), '_global', $1, $2, $3::text::jsonb) "
"ON CONFLICT (appeal_type, rule_category, rule_key) DO UPDATE SET rule_value = $3::text::jsonb",
category, key, json.dumps(req.value, ensure_ascii=False),
)
return {"key": key, "value": req.value, "is_override": True}
@app.delete("/api/methodology/{category}/{key}")
async def api_reset_methodology(category: str, key: str):
"""Delete methodology override, restoring the hardcoded default."""
if category not in _VALID_CATEGORIES:
raise HTTPException(400, f"Unknown category: {category}")
if key not in _METHODOLOGY_DEFAULTS[category]:
raise HTTPException(400, f"Unknown key '{key}' for category '{category}'")
pool = await db.get_pool()
await pool.execute(
"DELETE FROM appeal_type_rules WHERE appeal_type = '_global' AND rule_category = $1 AND rule_key = $2",
category, key,
)
return {"key": key, "value": _METHODOLOGY_DEFAULTS[category][key], "is_override": False}
# ── Style-acquisition learning surface (T6/T13) ────────────────────
@app.get("/api/learning/pairs")
async def api_learning_pairs(status: str = "", limit: int = 200):
"""פנקס-ההתאמה (INV-LRN4) — כל ההחלטות וסטטוס ההשוואה מול הסופי.
status אופציונלי: final_received / analyzed / lessons_folded."""
rows = await db.list_draft_final_pairs(status=status or None, limit=limit)
items = []
for r in rows:
ds = r.get("diff_stats")
if isinstance(ds, str):
try:
ds = json.loads(ds)
except (json.JSONDecodeError, TypeError):
ds = None
items.append({
"id": str(r["id"]),
"case_id": str(r["case_id"]) if r.get("case_id") else None,
"case_number": r.get("case_number") or "",
"title": r.get("title") or "",
"status": r.get("status") or "",
"change_percent": (ds or {}).get("change_percent") if ds else None,
"created_at": r["created_at"].isoformat() if r.get("created_at") else None,
"updated_at": r["updated_at"].isoformat() if r.get("updated_at") else None,
})
return {"items": items, "count": len(items)}
@app.get("/api/learning/style-distance/{case_number}")
async def api_learning_style_distance(case_number: str):
"""מדד מרחק-סגנון (T7) לתיק — האם הטיוטה מתכנסת לדפנה."""
from legal_mcp.services import style_distance as _sd
return await _sd.style_distance(case_number)
def _coerce_json(raw):
if isinstance(raw, str):
try:
return json.loads(raw)
except (json.JSONDecodeError, TypeError):
return None
return raw
@app.get("/api/learning/pairs/{pair_id}")
async def api_learning_pair_detail(pair_id: str):
"""פירוט שורת-פנקס כולל הצעת-הדיסטילציה (analysis) לאישור יו"ר (T14)."""
try:
pid = UUID(pair_id)
except ValueError:
raise HTTPException(400, "pair_id לא תקין")
p = await db.get_draft_final_pair(pid)
if not p:
raise HTTPException(404, "לא נמצא")
analysis = _coerce_json(p.get("analysis")) or {}
# surface only style_method changes (INV-LRN5 — substance never enters the voice)
changes = [c for c in analysis.get("changes", []) if c.get("domain") == "style_method"]
return {
"id": str(p["id"]),
"case_number": p.get("case_number") or "",
"title": p.get("title") or "",
"status": p.get("status") or "",
"diff_stats": _coerce_json(p.get("diff_stats")),
"overall_assessment": analysis.get("overall_assessment", ""),
"changes": changes,
"new_expressions": analysis.get("new_expressions", []),
}
class PromoteLearningRequest(BaseModel):
lessons: list[str] = [] # style_method lessons → discussion_rules['universal']
phrases: list[str] = [] # new transition phrases → transition_phrases['universal']
async def _append_methodology_override(category: str, key: str, items: list[str]) -> None:
"""Read current (override-or-default) list value, append new items, upsert override.
Shared by the T14 approval gate to fold approved learnings into writer-consumed channels."""
pool = await db.get_pool()
row = await pool.fetchrow(
"SELECT rule_value FROM appeal_type_rules "
"WHERE appeal_type = '_global' AND rule_category = $1 AND rule_key = $2",
category, key,
)
if row:
current = _coerce_json(row["rule_value"]) or []
else:
current = list(_METHODOLOGY_DEFAULTS.get(category, {}).get(key, []))
if not isinstance(current, list):
current = []
merged = current + [s for s in items if s and s not in current]
await pool.execute(
"INSERT INTO appeal_type_rules (id, appeal_type, rule_category, rule_key, rule_value) "
"VALUES (gen_random_uuid(), '_global', $1, $2, $3::text::jsonb) "
"ON CONFLICT (appeal_type, rule_category, rule_key) DO UPDATE SET rule_value = $3::text::jsonb",
category, key, json.dumps(merged, ensure_ascii=False),
)
@app.post("/api/learning/pairs/{pair_id}/promote")
async def api_learning_promote(pair_id: str, req: PromoteLearningRequest):
"""שער-יו"ר (INV-G10/LRN1): מאשר לקחי-סגנון + ביטויי-מעבר מהצעת-הדיסטילציה
ומטמיע אותם בערוצים שהכותב צורך (methodology overrides → T15). מקדם status."""
try:
pid = UUID(pair_id)
except ValueError:
raise HTTPException(400, "pair_id לא תקין")
p = await db.get_draft_final_pair(pid)
if not p:
raise HTTPException(404, "לא נמצא")
if req.lessons:
await _append_methodology_override("discussion_rules", "universal", req.lessons)
if req.phrases:
await _append_methodology_override("transition_phrases", "universal", req.phrases)
await db.update_draft_final_pair(pid, status="lessons_folded")
return {
"id": pair_id, "status": "lessons_folded",
"folded_lessons": len(req.lessons), "folded_phrases": len(req.phrases),
}
# ── Skill Management API ───────────────────────────────────────────
# INV-ENV4 / GAP-57: no hard-coded credential default — fail loud if unset.
PAPERCLIP_DB_URL = require_paperclip_db_url()
# Paperclip skills directory. In the Coolify container this is bind-mounted from
# the host's ~/.paperclip/instances/default/skills (see PAPERCLIP_SKILLS_DIR env).
# Fallback to the host path for local/dev use.
PAPERCLIP_SKILLS_DIR = Path(
os.environ.get(
"PAPERCLIP_SKILLS_DIR",
str(Path.home() / ".paperclip" / "instances" / "default" / "skills"),
)
)
# Default company ID for skills
SKILLS_COMPANY_ID = os.environ.get("PAPERCLIP_COMPANY_ID", "42a7acd0-30c5-4cbd-ac97-7424f65df294")
@app.get("/api/admin/skills")
async def api_list_skills():
"""List installed Paperclip skills with DB sync status."""
try:
conn = await asyncpg.connect(PAPERCLIP_DB_URL, timeout=5)
try:
rows = await conn.fetch(
"SELECT slug, name, length(markdown) as md_chars, file_inventory, updated_at "
"FROM company_skills WHERE company_id = $1::uuid ORDER BY slug",
SKILLS_COMPANY_ID,
)
finally:
await conn.close()
except (OSError, asyncpg.PostgresError, asyncpg.InterfaceError, TimeoutError) as e:
logger.exception("Paperclip DB unreachable while listing skills")
raise HTTPException(
status_code=503,
detail=f"Paperclip database unreachable: {type(e).__name__}: {e}",
) from e
skills = []
for r in rows:
slug = r["slug"]
skill_dir = PAPERCLIP_SKILLS_DIR / SKILLS_COMPANY_ID / slug
disk_exists = skill_dir.exists()
disk_skill_md = None
if disk_exists:
skill_md = skill_dir / "SKILL.md"
if skill_md.exists():
disk_skill_md = skill_md.stat().st_size
skills.append({
"slug": slug,
"name": r["name"],
"db_markdown_chars": r["md_chars"],
"file_inventory": json.loads(r["file_inventory"]) if isinstance(r["file_inventory"], str) else r["file_inventory"],
"updated_at": r["updated_at"].isoformat() if r["updated_at"] else None,
"disk_exists": disk_exists,
"disk_skill_md_bytes": disk_skill_md,
})
# Also check for skills on disk that aren't in DB
company_dir = PAPERCLIP_SKILLS_DIR / SKILLS_COMPANY_ID
if company_dir.exists():
db_slugs = {s["slug"] for s in skills}
for d in sorted(company_dir.iterdir()):
if d.is_dir() and d.name not in db_slugs:
skill_md = d / "SKILL.md"
skills.append({
"slug": d.name,
"name": d.name,
"db_markdown_chars": 0,
"file_inventory": [],
"updated_at": None,
"disk_exists": True,
"disk_skill_md_bytes": skill_md.stat().st_size if skill_md.exists() else None,
"not_in_db": True,
})
return skills
# ---------------------------------------------------------------------------
# Paperclip agents — read-only admin view (Task #29)
# ---------------------------------------------------------------------------
# Display order for the 7 agent roles (master+mirror pairs grouped by name).
# Matches the legal pipeline: CEO → analysis → research → writing → QA → export → proof.
_AGENT_NAME_ORDER = {
"עוזר משפטי": 1,
"מנתח משפטי": 2,
"חוקר תקדימים": 3,
"כותב החלטה": 4,
"בודק איכות": 5,
"מייצא טיוטה": 6,
"הגהת מסמכים": 7,
}
# Fields that should match between master (CMP) and mirror (CMPA). Drift = bug.
# `status` is intentionally excluded — it's runtime state (running/idle/paused),
# not config, and changes constantly.
_DRIFT_FIELDS = (
"model",
"effort",
"timeoutSec",
"maxTurnsPerRun",
"desiredSkills",
"instructionsBundleMode",
"instructionsEntryFile",
"graceSec",
"cooldownSec",
"wakeOnDemand",
"maxConcurrentRuns",
"budget_monthly_cents",
)
def _portable_skills(skills: list[str]) -> list[str]:
"""Return only the skills whose drift across companies is meaningful.
`local/*` skills carry per-install hashes (different IDs per company even
when the underlying skill is identical); `company/{cid}/*` skills are
scoped to a single company by construction. Both are expected to differ
between master and mirror — comparing them produces noise. Only
`paperclipai/*` (vendor-shipped) skills should match exactly.
"""
return sorted(s for s in skills if s.startswith("paperclipai/"))
def _shape_paperclip_agent(raw: dict, company_id: str, company_name: str) -> dict:
"""Flatten a Paperclip agent row into the shape the UI consumes."""
ac = raw.get("adapterConfig") or {}
rc = raw.get("runtimeConfig") or {}
hb = rc.get("heartbeat") or {}
skill_sync = ac.get("paperclipSkillSync") or {}
return {
"id": raw.get("id"),
"company_id": company_id,
"company_name": company_name,
"name": raw.get("name"),
"role": raw.get("role"),
"status": raw.get("status"),
"pause_reason": raw.get("pauseReason"),
"adapter_type": raw.get("adapterType"),
"model": ac.get("model"),
"effort": ac.get("effort"),
"timeoutSec": ac.get("timeoutSec"),
"maxTurnsPerRun": ac.get("maxTurnsPerRun"),
"desiredSkills": sorted(skill_sync.get("desiredSkills") or []),
"instructionsBundleMode": ac.get("instructionsBundleMode"),
"instructionsRootPath": ac.get("instructionsRootPath"),
"instructionsEntryFile": ac.get("instructionsEntryFile"),
"instructionsFilePath": ac.get("instructionsFilePath"),
"graceSec": hb.get("graceSec"),
"cooldownSec": hb.get("cooldownSec"),
"wakeOnDemand": hb.get("wakeOnDemand"),
"maxConcurrentRuns": hb.get("maxConcurrentRuns"),
"intervalSec": hb.get("intervalSec"),
"enabled": hb.get("enabled"),
"budget_monthly_cents": raw.get("budgetMonthlyCents"),
"spent_monthly_cents": raw.get("spentMonthlyCents"),
"last_heartbeat_at": raw.get("lastHeartbeatAt"),
"updated_at": raw.get("updatedAt"),
}
def _compute_drift(master: dict | None, mirror: dict | None) -> list[dict]:
if master is None or mirror is None:
return [{"field": "_pair_missing", "master": master is not None, "mirror": mirror is not None}]
drift = []
for field in _DRIFT_FIELDS:
m_val = master.get(field)
i_val = mirror.get(field)
if field == "desiredSkills":
m_val = _portable_skills(m_val or [])
i_val = _portable_skills(i_val or [])
if m_val != i_val:
drift.append({"field": field, "master": m_val, "mirror": i_val})
return drift
@app.get("/api/admin/paperclip-agents")
async def api_list_paperclip_agents():
"""List all Paperclip agents grouped into master+mirror pairs with drift detection.
Read-only. Source of truth: Paperclip ``GET /api/companies/{id}/agents`` API
(not direct DB) — keeps us decoupled from Paperclip's schema changes.
"""
company_labels = {
PAPERCLIP_COMPANIES["licensing"]: "CMP — רישוי ובניה",
PAPERCLIP_COMPANIES["betterment"]: "CMPA — היטלי השבחה",
}
by_name: dict[str, dict[str, dict]] = {}
for cid, cname in company_labels.items():
try:
resp = await pc_request("GET", f"/api/companies/{cid}/agents", raise_on_error=True)
except (httpx.HTTPError, RuntimeError) as e:
logger.exception("Paperclip API failed for company %s", cid)
raise HTTPException(
status_code=503,
detail=f"Paperclip API error for company {cname}: {type(e).__name__}: {e}",
) from e
rows = resp.json()
if not isinstance(rows, list):
raise HTTPException(status_code=502, detail=f"Unexpected Paperclip response for {cname}")
for raw in rows:
shaped = _shape_paperclip_agent(raw, cid, cname)
slot = "master" if cid == PAPERCLIP_COMPANIES["licensing"] else "mirror"
by_name.setdefault(shaped["name"], {})[slot] = shaped
pairs = []
for name, group in by_name.items():
master = group.get("master")
mirror = group.get("mirror")
primary = master or mirror
pairs.append({
"name": name,
"role": primary.get("role") if primary else None,
"master": master,
"mirror": mirror,
"drift": _compute_drift(master, mirror),
})
pairs.sort(key=lambda p: (_AGENT_NAME_ORDER.get(p["name"], 99), p["name"]))
return {
"pairs": pairs,
"companies": [
{"id": cid, "label": label, "slot": "master" if cid == PAPERCLIP_COMPANIES["licensing"] else "mirror"}
for cid, label in company_labels.items()
],
}
@app.post("/api/admin/skills/install")
async def api_install_skill(file: UploadFile = File(...)):
"""Install or update a Paperclip skill from a ZIP file.
The ZIP should contain a SKILL.md at root (or in a single subdirectory).
The skill slug is derived from the directory name or ZIP filename.
"""
if not file.filename:
raise HTTPException(400, "No filename provided")
if not file.filename.lower().endswith(".zip"):
raise HTTPException(400, "Only ZIP files are supported")
content = await file.read()
if len(content) > 100 * 1024 * 1024: # 100MB limit
raise HTTPException(400, "File too large (max 100MB)")
import io
try:
zf = zipfile.ZipFile(io.BytesIO(content))
except zipfile.BadZipFile:
raise HTTPException(400, "Invalid ZIP file")
# Find SKILL.md and determine the skill root
skill_md_path = None
skill_root = ""
names = zf.namelist()
for name in names:
basename = name.split("/")[-1]
if basename == "SKILL.md":
skill_md_path = name
# Root is everything before SKILL.md
skill_root = name[: -len("SKILL.md")]
break
if not skill_md_path:
zf.close()
raise HTTPException(400, "ZIP must contain a SKILL.md file")
# Determine slug: from directory name in ZIP, or from ZIP filename
if skill_root and skill_root.strip("/"):
slug = skill_root.strip("/").split("/")[0]
else:
slug = Path(file.filename).stem.lower()
slug = re.sub(r"[^\w\-]", "-", slug).strip("-")
# Extract to skill directory
skill_dir = PAPERCLIP_SKILLS_DIR / SKILLS_COMPANY_ID / slug
skill_dir.mkdir(parents=True, exist_ok=True)
# Clear existing contents
for item in skill_dir.rglob("*"):
if item.is_file():
item.unlink()
# Remove empty subdirs
for item in sorted(skill_dir.rglob("*"), reverse=True):
if item.is_dir():
try:
item.rmdir()
except OSError:
pass
# Extract files, stripping the skill_root prefix
extracted_files = []
for name in names:
if name.endswith("/"):
continue # skip directories
if not name.startswith(skill_root):
continue # skip files outside skill root
rel_path = name[len(skill_root):]
if not rel_path:
continue
# Skip macOS metadata
if "/__MACOSX/" in name or rel_path.startswith("__MACOSX/") or rel_path.startswith("."):
continue
dest = skill_dir / rel_path
dest.parent.mkdir(parents=True, exist_ok=True)
dest.write_bytes(zf.read(name))
extracted_files.append(rel_path)
zf.close()
# Read SKILL.md content
skill_md_file = skill_dir / "SKILL.md"
if not skill_md_file.exists():
raise HTTPException(500, "SKILL.md was not extracted properly")
markdown_content = skill_md_file.read_text(encoding="utf-8")
# Build file_inventory
file_inventory = []
for rel in sorted(extracted_files):
if rel == "SKILL.md":
kind = "skill"
elif rel.startswith("scripts/"):
kind = "script"
elif rel.startswith("references/"):
kind = "reference"
elif rel.endswith(".zip"):
kind = "archive"
else:
kind = "resource"
file_inventory.append({"kind": kind, "path": rel})
# Update DB
conn = await asyncpg.connect(PAPERCLIP_DB_URL)
try:
existing = await conn.fetchval(
"SELECT id FROM company_skills WHERE company_id = $1::uuid AND slug = $2",
SKILLS_COMPANY_ID, slug,
)
if existing:
await conn.execute(
"""UPDATE company_skills
SET markdown = $1, file_inventory = $2::jsonb, updated_at = now()
WHERE id = $3""",
markdown_content,
json.dumps(file_inventory, ensure_ascii=False),
existing,
)
action = "updated"
else:
await conn.execute(
"""INSERT INTO company_skills
(company_id, key, slug, name, markdown, source_type, file_inventory)
VALUES ($1::uuid, $2, $3, $4, $5, 'local_path', $6::jsonb)""",
SKILLS_COMPANY_ID, slug, slug, slug,
markdown_content,
json.dumps(file_inventory, ensure_ascii=False),
)
action = "installed"
finally:
await conn.close()
return {
"slug": slug,
"action": action,
"files_extracted": len(extracted_files),
"file_inventory": file_inventory,
"markdown_chars": len(markdown_content),
}
@app.post("/api/admin/skills/{slug}/sync")
async def api_sync_skill(slug: str):
"""Sync a skill from disk into the DB (for skills that exist on disk but not in DB)."""
skill_dir = PAPERCLIP_SKILLS_DIR / SKILLS_COMPANY_ID / slug
if not skill_dir.exists():
raise HTTPException(404, f"Skill directory not found on disk: {slug}")
skill_md_file = skill_dir / "SKILL.md"
if not skill_md_file.exists():
raise HTTPException(400, f"No SKILL.md found in {slug}")
markdown_content = skill_md_file.read_text(encoding="utf-8")
# Build file_inventory from disk
file_inventory = []
for f in sorted(skill_dir.rglob("*")):
if not f.is_file():
continue
rel = str(f.relative_to(skill_dir))
if rel.startswith(".") or "/__MACOSX/" in rel:
continue
if rel == "SKILL.md":
kind = "skill"
elif rel.startswith("scripts/"):
kind = "script"
elif rel.startswith("references/"):
kind = "reference"
elif rel.endswith(".zip"):
kind = "archive"
else:
kind = "resource"
file_inventory.append({"kind": kind, "path": rel})
conn = await asyncpg.connect(PAPERCLIP_DB_URL)
try:
existing = await conn.fetchval(
"SELECT id FROM company_skills WHERE company_id = $1::uuid AND slug = $2",
SKILLS_COMPANY_ID, slug,
)
if existing:
await conn.execute(
"""UPDATE company_skills
SET markdown = $1, file_inventory = $2::jsonb, updated_at = now()
WHERE id = $3""",
markdown_content,
json.dumps(file_inventory, ensure_ascii=False),
existing,
)
action = "updated"
else:
await conn.execute(
"""INSERT INTO company_skills
(company_id, key, slug, name, markdown, source_type, file_inventory)
VALUES ($1::uuid, $2, $3, $4, $5, 'local_path', $6::jsonb)""",
SKILLS_COMPANY_ID, slug, slug, slug,
markdown_content,
json.dumps(file_inventory, ensure_ascii=False),
)
action = "inserted"
finally:
await conn.close()
return {
"slug": slug,
"action": action,
"file_inventory": file_inventory,
"markdown_chars": len(markdown_content),
}
@app.delete("/api/admin/skills/{slug}")
async def api_delete_skill(slug: str):
"""Delete a skill from the DB. Does NOT delete files from disk."""
conn = await asyncpg.connect(PAPERCLIP_DB_URL)
try:
result = await conn.execute(
"DELETE FROM company_skills WHERE company_id = $1::uuid AND slug = $2",
SKILLS_COMPANY_ID, slug,
)
finally:
await conn.close()
if result == "DELETE 0":
raise HTTPException(404, f"Skill '{slug}' not found in DB")
return {"slug": slug, "action": "deleted"}
@app.post("/api/admin/paperclip/restart")
async def api_restart_paperclip():
"""Restart the Paperclip PM2 process.
Tries pm2 directly (works when running locally on the host).
In Docker, writes a restart flag file that the host watcher picks up.
"""
# Try pm2 directly (works when running outside Docker)
result = subprocess.run(
["pm2", "restart", "paperclip"],
capture_output=True, text=True, timeout=15,
)
if result.returncode == 0:
return {"status": "restarted", "method": "pm2", "output": result.stdout.strip()}
# Fallback: write a flag file that host-side watcher picks up
flag_file = PAPERCLIP_SKILLS_DIR / ".restart-requested"
try:
flag_file.write_text(str(time.time()))
return {
"status": "restart_requested",
"method": "flag_file",
"message": "Restart requested — the host watcher will restart Paperclip shortly.",
}
except Exception:
raise HTTPException(500, "שגיאה בהפעלת restart. הרץ ידנית: pm2 restart paperclip")
@app.post("/api/cases/{case_number}/documents/upload-tagged")
async def api_upload_tagged_document(
case_number: str,
file: UploadFile = File(...),
doc_type: str = Form("auto"),
party_name: str = Form(""),
title: str = Form(""),
):
"""Upload a document to a case with tagging and auto-rename."""
case = await db.get_case_by_number(case_number)
if not case:
raise HTTPException(404, f"תיק {case_number} לא נמצא")
if not file.filename:
raise HTTPException(400, "No filename provided")
ext = Path(file.filename).suffix.lower()
if ext not in ALLOWED_EXTENSIONS:
raise HTTPException(400, f"סוג קובץ לא נתמך: {ext}")
content = await file.read()
if len(content) > MAX_FILE_SIZE:
raise HTTPException(400, f"קובץ גדול מדי. מקסימום: {MAX_FILE_SIZE // (1024*1024)}MB")
# Generate smart filename — keep original name for auto classification
if doc_type == "auto":
safe_name = re.sub(r"[^\w\u0590-\u05FF\s.\-()]", "", Path(file.filename).stem).strip()
new_filename = f"{safe_name or 'document'}{ext}"
else:
new_filename = generate_doc_filename(doc_type, case_number, party_name, ext)
# Save to case directory
case_dir = config.find_case_dir(case_number) / "documents" / "originals"
case_dir.mkdir(parents=True, exist_ok=True)
dest = case_dir / new_filename
# Handle duplicates
counter = 1
while dest.exists():
stem = new_filename.rsplit(".", 1)[0]
dest = case_dir / f"{stem}-{counter}{ext}"
counter += 1
dest.write_bytes(content)
# Create document record
case_id = UUID(case["id"])
doc_title = title or new_filename.rsplit(".", 1)[0].replace("-", " ")
doc = await db.create_document(
case_id=case_id,
doc_type=doc_type if doc_type != "auto" else "reference",
title=doc_title,
file_path=str(dest),
)
# Process in background
task_id = str(uuid4())
await _progress.set(task_id, {"status": "queued", "filename": new_filename})
asyncio.create_task(_process_tagged_document(task_id, dest, case_number, case_id, UUID(doc["id"]), doc_type, new_filename))
return {
"task_id": task_id,
"filename": new_filename,
"original_name": file.filename,
"doc_type": doc_type,
}
async def _process_tagged_document(task_id: str, dest: Path, case_number: str, case_id: UUID, doc_id: UUID, doc_type: str, display_name: str):
"""Process an uploaded tagged document in the background."""
try:
await _progress.set(task_id, {"status": "processing", "filename": display_name, "step": "extracting"})
result = await processor.process_document(doc_id, case_id)
try:
repo_dir = config.find_case_dir(case_number)
if repo_dir.exists():
doc_type_hebrew = DOC_TYPE_NAMES.get(doc_type, doc_type)
commit_and_push(repo_dir, f"הוספת {doc_type_hebrew}: {display_name}")
except Exception:
logger.warning("Git commit/push failed for %s (non-critical)", display_name)
await _progress.set(task_id, {
"status": "completed",
"filename": display_name,
"result": result,
"case_number": case_number,
"doc_type": doc_type,
})
except Exception as e:
logger.exception("Processing failed for %s", display_name)
await _progress.set(task_id, {"status": "failed", "error": str(e), "filename": display_name})
@app.post("/api/cases/{case_number}/documents/{doc_id}/reprocess")
async def api_reprocess_document(case_number: str, doc_id: str):
"""Reprocess a failed document."""
case = await db.get_case_by_number(case_number)
if not case:
raise HTTPException(404, f"תיק {case_number} לא נמצא")
case_id = UUID(case["id"])
document_id = UUID(doc_id)
doc = await db.get_document(document_id)
if not doc or UUID(doc["case_id"]) != case_id:
raise HTTPException(404, "מסמך לא נמצא בתיק")
# Reset status and clean old chunks
await db.update_document(document_id, extraction_status="pending")
await db.delete_document_chunks(document_id)
# Process in background
asyncio.create_task(processor.process_document(document_id, case_id))
return {"status": "reprocessing"}
_ALLOWED_APPRAISER_SIDES = {"committee", "appellant", "deciding"}
class DocumentPatchRequest(BaseModel):
"""Patch payload for a single document. Both fields are optional."""
doc_type: str | None = None
appraiser_side: str | None = None # committee | appellant | deciding | "" to clear
@app.patch("/api/cases/{case_number}/documents/{doc_id}")
async def api_patch_document(case_number: str, doc_id: str, req: DocumentPatchRequest):
"""Update a document's tags. Currently supports doc_type and the
metadata.appraiser_side flag (used by extract_appraiser_facts).
Returns the refreshed document row.
"""
case = await db.get_case_by_number(case_number)
if not case:
raise HTTPException(404, f"תיק {case_number} לא נמצא")
try:
document_id = UUID(doc_id)
except ValueError:
raise HTTPException(400, f"doc_id לא תקין: {doc_id}")
doc = await db.get_document(document_id)
if not doc or UUID(doc["case_id"]) != UUID(case["id"]):
raise HTTPException(404, "מסמך לא נמצא בתיק")
updates: dict = {}
if req.doc_type is not None:
if req.doc_type not in DOC_TYPE_NAMES:
raise HTTPException(
422,
f"doc_type לא תקין: {req.doc_type}. ערכים מותרים: "
f"{', '.join(sorted(DOC_TYPE_NAMES.keys()))}",
)
updates["doc_type"] = req.doc_type
if req.appraiser_side is not None:
if req.appraiser_side and req.appraiser_side not in _ALLOWED_APPRAISER_SIDES:
raise HTTPException(
422,
f"appraiser_side לא תקין: {req.appraiser_side}. ערכים מותרים: "
f"{', '.join(sorted(_ALLOWED_APPRAISER_SIDES))}",
)
metadata = doc.get("metadata") or {}
if isinstance(metadata, str):
metadata = json.loads(metadata)
if req.appraiser_side:
metadata["appraiser_side"] = req.appraiser_side
else:
metadata.pop("appraiser_side", None)
updates["metadata"] = metadata
if not updates:
return {"status": "noop", "document": doc}
await db.update_document(document_id, **updates)
fresh = await db.get_document(document_id)
return {"status": "completed", "document": fresh}
@app.post("/api/cases/{case_number}/extract-appraiser-facts")
async def api_extract_appraiser_facts(case_number: str):
"""Queue appraiser-fact extraction by waking the legal-analyst agent.
The extraction itself calls `claude_session.query_json()`, which shells
out to the local `claude` CLI — present on the agent host, **absent in
this FastAPI container**. So we cannot run the extractor inline here.
Instead we delegate: create a child Paperclip issue under the case's
main issue, assigned to the analyst of the correct company, and trigger
a wakeup with `mutation: extract_appraiser_facts`. The analyst runs the
MCP tool locally and posts results as a comment.
Pre-check: short-circuits with `sides_missing` if any appraisal is
untagged, so the chair gets immediate feedback without spinning up an
agent for nothing. The check uses `_validate_sides_tagged` against the
documents already in the DB — no LLM call, safe to run in-container.
Response shape:
{"status": "queued", "sub_issue_id", "analyst_id", "main_issue_id"}
or {"status": "sides_missing", "missing": [...], "message": "..."}
or {"status": "no_appraisals", ...}
or {"status": "skipped", "reason": "no_api_key"|"no_analyst"|"no_issue"}
"""
from legal_mcp.services import appraiser_facts_extractor
from legal_mcp.services import db as mcp_db
case = await db.get_case_by_number(case_number)
if not case:
raise HTTPException(404, f"תיק {case_number} לא נמצא")
# Pre-validate without touching Claude — surface sides_missing directly
# so the UI can show the list of untagged appraisals immediately.
docs = await mcp_db.list_documents(UUID(case["id"]))
appraisals = [d for d in docs if d.get("doc_type") == "appraisal"]
if not appraisals:
return {
"status": "no_appraisals",
"appraisal_count": 0,
"total_facts": 0,
"conflicts": [],
}
missing = appraiser_facts_extractor._validate_sides_tagged(appraisals)
if missing:
return {
"status": "sides_missing",
"appraisal_count": len(appraisals),
"missing": missing,
"message": (
"חסר תיוג appraiser_side במסמכי שומה. תייג כל שומה דרך ה-UI "
"(ועדה / עורר / מכריע) והרץ שוב."
),
}
# Route to the analyst of the correct company by case-number prefix
prefix = case_number[:1]
company_id = (
PAPERCLIP_COMPANIES["licensing"] if prefix == "1"
else PAPERCLIP_COMPANIES["betterment"] if prefix in ("8", "9")
else ""
)
try:
result = await pc_wake_analyst_for_appraiser_facts(
case_number, company_id=company_id,
)
except Exception as e:
logger.exception("analyst wakeup failed for %s", case_number)
raise HTTPException(500, f"לא ניתן לשלוח לאנליטיקאי: {e}")
return result
@app.delete("/api/cases/{case_number}/documents/{doc_id}")
async def api_delete_document(case_number: str, doc_id: str):
"""Delete a single document from a case (including its chunks and file)."""
case = await db.get_case_by_number(case_number)
if not case:
raise HTTPException(404, f"תיק {case_number} לא נמצא")
case_id = UUID(case["id"])
document_id = UUID(doc_id)
doc = await db.get_document(document_id)
if not doc or UUID(doc["case_id"]) != case_id:
raise HTTPException(404, "מסמך לא נמצא בתיק")
# Try to remove the physical file
file_path = doc.get("file_path")
if file_path:
import pathlib
p = pathlib.Path(file_path)
if p.exists():
p.unlink(missing_ok=True)
await db.delete_document(document_id)
return {"deleted": True, "doc_id": doc_id}
# ── Chair feedback endpoints ──────────────────────────────────────
@app.get("/api/feedback")
async def api_list_feedback(
case_number: str = "",
category: str = "",
unresolved_only: bool = False,
):
"""List chair feedback, optionally filtered by case/category."""
case_id = None
if case_number:
case = await db.get_case_by_number(case_number)
if case:
case_id = UUID(case["id"])
feedbacks = await db.list_chair_feedback(
case_id=case_id,
category=category or None,
unresolved_only=unresolved_only,
)
items = []
# Build case_number lookup
case_numbers: dict[str, str] = {}
pool = await db.get_pool()
for fb in feedbacks:
cid = fb.get("case_id")
cn = ""
if cid and str(cid) not in case_numbers:
async with pool.acquire() as conn:
row = await conn.fetchrow(
"SELECT case_number, title FROM cases WHERE id = $1", cid,
)
if row:
case_numbers[str(cid)] = row["case_number"]
if cid:
cn = case_numbers.get(str(cid), "")
items.append({
"id": str(fb["id"]),
"case_id": str(fb["case_id"]) if fb["case_id"] else None,
"case_number": cn,
"block_id": fb["block_id"],
"category": fb["category"],
"feedback_text": fb["feedback_text"],
"lesson_extracted": fb["lesson_extracted"],
"resolved": fb["resolved"],
"applied_to": fb.get("applied_to", []),
"created_at": fb["created_at"].isoformat() if fb.get("created_at") else None,
})
return items
@app.post("/api/feedback")
async def api_create_feedback(
case_number: str = Form(""),
block_id: str = Form("block-yod"),
feedback_text: str = Form(...),
category: str = Form("missing_content"),
lesson_extracted: str = Form(""),
):
"""Record a new chair feedback entry."""
case_id = None
if case_number:
case = await db.get_case_by_number(case_number)
if case:
case_id = UUID(case["id"])
valid_categories = [
"missing_content", "wrong_tone", "wrong_structure",
"factual_error", "style", "other",
]
if category not in valid_categories:
raise HTTPException(400, f"קטגוריה לא חוקית. אפשרויות: {', '.join(valid_categories)}")
feedback_id = await db.record_chair_feedback(
case_id=case_id,
block_id=block_id,
feedback_text=feedback_text,
category=category,
lesson_extracted=lesson_extracted,
)
return {"id": str(feedback_id), "status": "created"}
@app.post("/api/feedback/json")
async def api_create_feedback_json(body: dict):
"""Record a new chair feedback entry (JSON body)."""
case_number = body.get("case_number", "")
case_id = None
if case_number:
case = await db.get_case_by_number(case_number)
if case:
case_id = UUID(case["id"])
valid_categories = [
"missing_content", "wrong_tone", "wrong_structure",
"factual_error", "style", "other",
]
category = body.get("category", "missing_content")
if category not in valid_categories:
raise HTTPException(400, f"קטגוריה לא חוקית. אפשרויות: {', '.join(valid_categories)}")
feedback_id = await db.record_chair_feedback(
case_id=case_id,
block_id=body.get("block_id", "block-yod"),
feedback_text=body.get("feedback_text", ""),
category=category,
lesson_extracted=body.get("lesson_extracted", ""),
)
return {"id": str(feedback_id), "status": "created"}
@app.patch("/api/feedback/{feedback_id}/resolve")
async def api_resolve_feedback(
feedback_id: str,
body: dict,
background_tasks: BackgroundTasks,
):
"""Mark feedback as resolved. When ``fold`` is true (default) and the entry
has an extracted lesson, also wake the CEO to fold that lesson into the
right knowledge file (the feedback→agent-knowledge loop).
The fold is fire-and-forget (BackgroundTask) and best-effort — resolving
never fails because Paperclip is down. Pass ``fold=false`` for pure
bookkeeping resolves (e.g. from the per-case drafts panel) to avoid
spawning a CEO run per click."""
fid = UUID(feedback_id)
fold = body.get("fold", True)
fb = await db.get_chair_feedback(fid)
if not fb:
raise HTTPException(404, "הערה לא נמצאה")
await db.resolve_chair_feedback(
feedback_id=fid,
applied_to=body.get("applied_to", []),
)
# Guard: only fold a real, lesson-bearing entry, and only when asked.
lesson = (fb.get("lesson_extracted") or "").strip()
fold_queued = False
if fold and lesson:
async def _fold():
try:
await pc_wake_ceo_for_feedback_fold(
feedback_id=str(fid),
feedback_text=fb.get("feedback_text") or "",
lesson_extracted=lesson,
category=fb.get("category") or "other",
block_id=fb.get("block_id") or "",
case_number=fb.get("case_number") or "",
practice_area=fb.get("case_appeal_type") or "",
)
except Exception:
logger.exception("feedback-fold wakeup failed (non-fatal) for %s", fid)
background_tasks.add_task(_fold)
fold_queued = True
return {"status": "resolved", "fold_queued": fold_queued}
@app.get("/api/chair-feedback/weekly-summary")
async def api_chair_feedback_weekly_summary(days: int = 7, limit: int = 100):
"""Return chair feedback from the last N days as a text summary for the CEO agent."""
if days <= 0:
return {"summary": "", "entry_count": 0}
pool = await db.get_pool()
async with pool.acquire() as conn:
rows = await conn.fetch(
"""
SELECT cf.feedback_text, c.case_number, c.title
FROM chair_feedback cf
LEFT JOIN cases c ON c.id = cf.case_id
WHERE cf.created_at >= now() - make_interval(days => $1)
ORDER BY cf.created_at DESC
LIMIT $2
""",
days,
limit,
)
if not rows:
return {"summary": "", "entry_count": 0}
lines = [
f"- תיק {r['case_number'] or ''} ({r['title'] or ''}): {r['feedback_text']}"
for r in rows
]
return {"summary": "\n".join(lines), "entry_count": len(rows)}
@app.get("/api/chair/pending")
async def api_chair_pending():
"""מרכז אישורים — דפנה: מאגד את כל השערים האנושיים (INV-G10) הממתינים להכרעת
היו"ר במקום אחד, כדי שאף פריט לא יישכח. כל קטגוריה מחזירה ספירה + מדגם + קישור
למקום הטיפול. כל ספירה היא שאילתת-מקור ישירה (לא נגזרת מטמונה)."""
pool = await db.get_pool()
categories: list[dict] = []
async with pool.acquire() as conn:
# 1) הלכות הממתינות לאישור (INV-QA1 / G10)
h_count = await conn.fetchval(
"SELECT count(*) FROM halachot WHERE review_status='pending_review'")
h_oldest = await conn.fetchval(
"SELECT min(created_at) FROM halachot WHERE review_status='pending_review'")
h_sample = await conn.fetch(
"SELECT h.rule_statement, coalesce(cl.case_name,'') AS case_name "
"FROM halachot h LEFT JOIN case_law cl ON cl.id=h.case_law_id "
"WHERE h.review_status='pending_review' ORDER BY h.created_at ASC LIMIT 5")
categories.append({
"key": "halachot", "label": "הלכות הממתינות לאישור",
"description": "הלכות שחולצו אוטומטית מפסיקה — נראות בחיפוש רק לאחר אישורך.",
"count": h_count, "severity": "high" if h_count else "ok",
"href": "/precedents", "oldest_at": h_oldest.isoformat() if h_oldest else None,
"sample": [{"text": (r["rule_statement"] or "")[:120], "source": r["case_name"]} for r in h_sample],
})
# 2) פסיקה חסרה בקורפוס (פתוחה)
mp_count = await conn.fetchval(
"SELECT count(*) FROM missing_precedents WHERE status='open'")
mp_sample = await conn.fetch(
"SELECT coalesce(citation,'') AS cite, coalesce(legal_topic, case_name, '') AS topic "
"FROM missing_precedents WHERE status='open' ORDER BY created_at DESC LIMIT 5")
categories.append({
"key": "missing_precedents", "label": "פסיקה חסרה בקורפוס",
"description": "ציטוטים מכתבי-טענות שעדיין אינם בקורפוס — להעלאה/סגירה.",
"count": mp_count, "severity": "medium" if mp_count else "ok",
"href": "/missing-precedents",
"sample": [{"text": r["cite"][:120], "source": r["topic"]} for r in mp_sample],
})
# 3) הערות יו"ר שטרם יושמו
cf_count = await conn.fetchval("SELECT count(*) FROM chair_feedback WHERE NOT resolved")
cf_sample = await conn.fetch(
"SELECT cf.feedback_text, coalesce(c.case_number,'') AS case_number "
"FROM chair_feedback cf LEFT JOIN cases c ON c.id=cf.case_id "
"WHERE NOT cf.resolved ORDER BY cf.created_at DESC LIMIT 5")
categories.append({
"key": "chair_feedback", "label": "הערות יו\"ר שטרם יושמו",
"description": "הערות שרשמת על טיוטות וטרם הופקו מהן לקחים/תיקונים.",
"count": cf_count, "severity": "medium" if cf_count else "ok",
"href": "/feedback", "sample": [{"text": (r["feedback_text"] or "")[:120], "source": r["case_number"]} for r in cf_sample],
})
# 4) תיקים שנכשלו ב-QA
qa_rows = await conn.fetch(
"SELECT case_number, coalesce(title,'') AS title FROM cases WHERE status='qa_failed' ORDER BY updated_at DESC")
# Single failed case → link straight to it; multiple → home dashboard
# (the donut/table surface them). Each sample row links to its own case.
qa_href = f"/cases/{qa_rows[0]['case_number']}" if len(qa_rows) == 1 else "/"
categories.append({
"key": "qa_failed", "label": "תיקים שנכשלו ב-QA",
"description": "תיקים שבדיקת-האיכות חסמה — דורשים התייחסותך לפני המשך.",
"count": len(qa_rows), "severity": "high" if qa_rows else "ok", "href": qa_href,
"sample": [{"text": r["case_number"], "source": r["title"],
"href": f"/cases/{r['case_number']}"} for r in qa_rows[:5]],
})
total_pending = sum(c["count"] for c in categories)
return {
"total_pending": total_pending,
"generated_at": datetime.now(timezone.utc).isoformat(),
"categories": categories,
}
# ── Background Processing ─────────────────────────────────────────
async def _process_file(task_id: str, source: Path, req: ClassifyRequest):
"""Process a classified file in the background."""
try:
if req.category == "case":
await _process_case_document(task_id, source, req)
else:
await _process_training_document(task_id, source, req)
except Exception as e:
logger.exception("Processing failed for %s", req.filename)
await _progress.set(task_id, {"status": "failed", "error": str(e), "filename": req.filename})
async def _process_case_document(task_id: str, source: Path, req: ClassifyRequest):
"""Process a case document (mirrors documents.document_upload logic)."""
await _progress.set(task_id, {"status": "validating", "filename": req.filename})
case = await db.get_case_by_number(req.case_number)
if not case:
await _progress.set(task_id, {"status": "failed", "error": f"Case {req.case_number} not found"})
return
case_id = UUID(case["id"])
title = req.title or source.stem.split("_", 1)[-1] # Remove timestamp prefix
# Copy to case directory
await _progress.set(task_id, {"status": "copying", "filename": req.filename})
case_dir = config.find_case_dir(req.case_number) / "documents" / "originals"
case_dir.mkdir(parents=True, exist_ok=True)
# Use original name without timestamp prefix
original_name = re.sub(r"^\d+_", "", source.name)
dest = case_dir / original_name
shutil.copy2(str(source), str(dest))
# Create document record
await _progress.set(task_id, {"status": "registering", "filename": req.filename})
doc = await db.create_document(
case_id=case_id,
doc_type=req.doc_type,
title=title,
file_path=str(dest),
)
# Process (extract → chunk → embed → store)
await _progress.set(task_id, {"status": "processing", "filename": req.filename, "step": "extracting"})
result = await processor.process_document(UUID(doc["id"]), case_id)
# Git commit (best-effort)
try:
repo_dir = config.find_case_dir(req.case_number)
if repo_dir.exists():
doc_type_hebrew = {
"appeal": "כתב ערר", "response": "תשובה", "decision": "החלטה",
"reference": "מסמך עזר", "exhibit": "נספח",
}.get(req.doc_type, req.doc_type)
commit_and_push(repo_dir, f"הוספת {doc_type_hebrew}: {title}")
except Exception:
logger.warning("Git commit/push failed for %s (non-critical)", req.filename)
# Remove from uploads
source.unlink(missing_ok=True)
await _progress.set(task_id, {
"status": "completed",
"filename": req.filename,
"result": result,
"case_number": req.case_number,
"doc_type": req.doc_type,
})
async def _process_training_document(task_id: str, source: Path, req: ClassifyRequest):
"""Process a training document (mirrors documents.document_upload_training logic)."""
from datetime import date as date_type
title = req.title or source.stem.split("_", 1)[-1]
# Copy to training directory
await _progress.set(task_id, {"status": "copying", "filename": req.filename})
config.TRAINING_DIR.mkdir(parents=True, exist_ok=True)
original_name = re.sub(r"^\d+_", "", source.name)
dest = config.TRAINING_DIR / original_name
shutil.copy2(str(source), str(dest))
# Extract text
await _progress.set(task_id, {"status": "processing", "filename": req.filename, "step": "extracting"})
text, page_count, _ = await extractor.extract_text(str(dest))
# Parse date
d_date = None
if req.decision_date:
d_date = date_type.fromisoformat(req.decision_date)
# Add to style corpus
await _progress.set(task_id, {"status": "processing", "filename": req.filename, "step": "corpus"})
corpus_id = await db.add_to_style_corpus(
document_id=None,
decision_number=req.decision_number,
decision_date=d_date,
subject_categories=req.subject_categories,
full_text=text,
)
# Chunk and embed
await _progress.set(task_id, {"status": "processing", "filename": req.filename, "step": "chunking"})
chunks = chunker.chunk_document(text)
chunk_count = 0
if chunks:
doc = await db.create_document(
case_id=None,
doc_type="decision",
title=f"[קורפוס] {title}",
file_path=str(dest),
page_count=page_count,
)
doc_id = UUID(doc["id"])
await db.update_document(doc_id, extracted_text=text, extraction_status="completed")
await _progress.set(task_id, {"status": "processing", "filename": req.filename, "step": "embedding"})
texts = [c.content for c in chunks]
embs = await embeddings.embed_texts(texts, input_type="document")
chunk_dicts = [
{
"content": c.content,
"section_type": c.section_type,
"embedding": emb,
"page_number": c.page_number,
"chunk_index": c.chunk_index,
}
for c, emb in zip(chunks, embs)
]
await db.store_chunks(doc_id, None, chunk_dicts)
chunk_count = len(chunks)
# Remove from uploads
source.unlink(missing_ok=True)
await _progress.set(task_id, {
"status": "completed",
"filename": req.filename,
"result": {
"corpus_id": str(corpus_id),
"title": title,
"pages": page_count,
"text_length": len(text),
"chunks": chunk_count,
},
})
# ── External Precedent Library ────────────────────────────────────
# Chair-uploaded court rulings + appeals committee decisions, with
# automatic halacha extraction. Distinct from /api/training (style
# corpus) and /api/cases/{n}/precedents (chair-attached quotes).
from legal_mcp.services import precedent_library as plib_service # noqa: E402
from legal_mcp.services.precedent_metadata_extractor import ( # noqa: E402
PLACEHOLDER_PENDING_EXTRACTION,
)
_PRACTICE_AREAS = {"", "rishuy_uvniya", "betterment_levy", "compensation_197"}
_SOURCE_TYPES = {"", "court_ruling", "appeals_committee"}
def _make_progress_publisher(task_id: str, filename: str):
"""Build an async callback that pipes ingestion progress to Redis."""
async def publish(status: str, percent: int, message: str) -> None:
await _progress.set(task_id, {
"status": status if status in ("completed", "failed") else "processing",
"stage": status,
"filename": filename,
"step": message,
"percent": percent,
})
return publish
class PrecedentUpdateRequest(BaseModel):
# case_number is the canonical identifier (e.g. "8027-25"). It is editable
# so a wrong identifier captured at upload (e.g. the full citation pasted
# into the field) can be corrected from the edit screen. update_case_law
# already whitelists it.
case_number: str | None = None
case_name: str | None = None
court: str | None = None
decision_date: str | None = None
practice_area: str | None = None
appeal_subtype: str | None = None
subject_tags: list[str] | None = None
summary: str | None = None
headnote: str | None = None
key_quote: str | None = None
source_url: str | None = None
source_type: str | None = None
precedent_level: str | None = None
is_binding: bool | None = None
district: str | None = None
chair_name: str | None = None
citation_formatted: str | None = None
class HalachaUpdateRequest(BaseModel):
review_status: str | None = None
reviewer: str | None = "דפנה"
rule_statement: str | None = None
reasoning_summary: str | None = None
subject_tags: list[str] | None = None
practice_areas: list[str] | None = None
class HalachaBatchReviewRequest(BaseModel):
"""#84 — apply one review status to many halachot at once (group action)."""
halacha_ids: list[str]
review_status: str
reviewer: str | None = "דפנה"
@app.post("/api/precedent-library/upload")
async def precedent_library_upload(
file: UploadFile = File(...),
citation: str = Form(...),
case_name: str = Form(""),
court: str = Form(""),
decision_date: str = Form(""),
source_type: str = Form(""),
precedent_level: str = Form(""),
practice_area: str = Form(""),
appeal_subtype: str = Form(""),
subject_tags: str = Form("[]"), # JSON array string
is_binding: bool = Form(True),
headnote: str = Form(""),
summary: str = Form(""),
):
"""Upload a court ruling / appeals committee decision to the
authoritative precedent library. Halachot are extracted in the
background and queued for chair approval.
"""
if practice_area not in _PRACTICE_AREAS:
raise HTTPException(400, "practice_area לא תקין")
if source_type not in _SOURCE_TYPES:
raise HTTPException(400, "source_type לא תקין")
if not citation.strip():
raise HTTPException(400, "citation חובה")
# Reject re-upload of an already-manually-ingested citation so the
# chair can consciously choose between deletion and re-extraction.
existing = await db.get_external_case_law_by_citation(citation.strip())
if existing:
raise HTTPException(409, detail={
"error": "duplicate_external_upload",
"case_law_id": str(existing["id"]),
"citation": existing.get("case_number") or citation.strip(),
"case_name": existing.get("case_name") or "",
"court": existing.get("court") or "",
"date": (existing["date"].isoformat()
if existing.get("date") else None),
"halacha_extraction_status": existing.get("halacha_extraction_status") or "",
})
suffix = Path(file.filename or "").suffix.lower()
if suffix not in ALLOWED_EXTENSIONS:
raise HTTPException(400, f"סוג קובץ לא נתמך: {suffix}")
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
staged = UPLOAD_DIR / f"plib_{uuid4().hex[:8]}_{file.filename}"
size = 0
with staged.open("wb") as out:
while chunk := await file.read(1024 * 1024):
size += len(chunk)
if size > MAX_FILE_SIZE:
staged.unlink(missing_ok=True)
raise HTTPException(413, "קובץ גדול מדי")
out.write(chunk)
try:
tags = json.loads(subject_tags) if subject_tags else []
if not isinstance(tags, list):
tags = []
except json.JSONDecodeError:
tags = []
task_id = str(uuid4())
await _progress.set(task_id, {
"status": "queued", "filename": file.filename or "",
"stage": "queued", "percent": 0,
})
publish = _make_progress_publisher(task_id, file.filename or "")
async def _run():
try:
result = await plib_service.ingest_precedent(
file_path=staged,
citation=citation.strip(),
case_name=case_name.strip(),
court=court.strip(),
decision_date=decision_date or None,
source_type=source_type,
precedent_level=precedent_level,
practice_area=practice_area,
appeal_subtype=appeal_subtype.strip(),
subject_tags=tags,
is_binding=is_binding,
headnote=headnote.strip(),
summary=summary.strip(),
progress=publish,
)
# Auto-trigger Claude (via Paperclip) to extract halachot+metadata.
# Best-effort — failures are logged but don't surface to the user;
# `precedent_process_pending` can always be run manually.
case_law_id = result.get("case_law_id") if isinstance(result, dict) else None
if case_law_id:
try:
wake = await pc_wake_for_precedent_extraction(
case_law_id=case_law_id,
citation=citation.strip(),
practice_area=practice_area,
)
# The wake helper returns {ok: False, ...} on a skipped /
# failed wakeup WITHOUT raising — previously that path was
# silently dropped and the precedent sat at 'pending'
# forever. Surface it as a WARNING with the reason.
if not wake.get("ok"):
logger.warning(
"precedent %s: extraction wakeup did not queue (%s) — "
"halachot stay pending until precedent_process_pending runs",
case_law_id, wake.get("skipped") or wake.get("error"),
)
except Exception:
logger.exception("precedent-extraction wakeup failed (non-fatal)")
except Exception as e:
logger.exception("precedent-library upload failed")
await _progress.set(task_id, {
"status": "failed", "error": str(e),
"filename": file.filename or "",
})
finally:
staged.unlink(missing_ok=True)
asyncio.create_task(_run())
return {"task_id": task_id}
@app.get("/api/precedent-library")
async def precedent_library_list(
practice_area: str = "",
court: str = "",
precedent_level: str = "",
source_type: str = "",
search: str = "",
source_kind: str = "external_upload",
limit: int = 100,
offset: int = 0,
):
rows = await db.list_external_case_law(
practice_area=practice_area, court=court,
precedent_level=precedent_level, source_type=source_type,
search=search, source_kind=source_kind,
limit=limit, offset=offset,
)
return {"items": rows, "count": len(rows)}
@app.get("/api/precedent-library/stats")
async def precedent_library_stats():
return await db.precedent_library_stats()
@app.get("/api/precedent-library/search")
async def precedent_library_search(
q: str,
practice_area: str = "",
court: str = "",
precedent_level: str = "",
appeal_subtype: str = "",
subject_tag: str = "",
limit: int = 10,
include_halachot: bool = True,
):
if not q or len(q.strip()) < 2:
return {"items": [], "count": 0}
results = await plib_service.search_library(
query=q.strip(),
practice_area=practice_area,
court=court,
precedent_level=precedent_level,
appeal_subtype=appeal_subtype,
subject_tag=subject_tag,
limit=limit,
include_halachot=include_halachot,
)
return {"items": results, "count": len(results)}
@app.get("/api/precedent-library/{case_law_id}")
async def precedent_library_get(case_law_id: str):
try:
cid = UUID(case_law_id)
except ValueError:
raise HTTPException(400, "case_law_id לא תקין")
record = await plib_service.get_precedent(cid)
if not record:
raise HTTPException(404, "פסיקה לא נמצאה")
return record
@app.patch("/api/precedent-library/{case_law_id}")
async def precedent_library_update(case_law_id: str, req: PrecedentUpdateRequest):
try:
cid = UUID(case_law_id)
except ValueError:
raise HTTPException(400, "case_law_id לא תקין")
fields = {k: v for k, v in req.model_dump(exclude_unset=True).items() if v is not None}
if "practice_area" in fields and fields["practice_area"] not in _PRACTICE_AREAS:
raise HTTPException(400, "practice_area לא תקין")
if "decision_date" in fields and fields["decision_date"]:
try:
from datetime import date as date_type
fields["date"] = date_type.fromisoformat(fields.pop("decision_date")[:10])
except ValueError:
raise HTTPException(400, "decision_date לא תקין")
record = await db.update_case_law(cid, **fields)
if not record:
raise HTTPException(404, "פסיקה לא נמצאה")
return record
@app.delete("/api/precedent-library/{case_law_id}")
async def precedent_library_delete(case_law_id: str):
try:
cid = UUID(case_law_id)
except ValueError:
raise HTTPException(400, "case_law_id לא תקין")
ok = await plib_service.delete_precedent(cid)
if not ok:
raise HTTPException(404, "פסיקה לא נמצאה")
return {"deleted": True, "case_law_id": case_law_id}
class PrecedentRelationRequest(BaseModel):
related_id: str
relation_type: str = "same_case_chain"
@app.post("/api/precedent-library/{case_law_id}/relations")
async def precedent_add_relation(case_law_id: str, req: PrecedentRelationRequest):
try:
a = UUID(case_law_id)
b = UUID(req.related_id)
except ValueError:
raise HTTPException(400, "case_law_id לא תקין")
if not await db.get_case_law(a):
raise HTTPException(404, "פסיקה לא נמצאה")
if not await db.get_case_law(b):
raise HTTPException(404, f"פסיקה קשורה {req.related_id} לא נמצאה")
await db.add_case_law_relation(a, b, req.relation_type)
return {"linked": True, "case_law_id": case_law_id, "related_id": req.related_id}
@app.delete("/api/precedent-library/{case_law_id}/relations/{related_id}")
async def precedent_remove_relation(case_law_id: str, related_id: str):
try:
a = UUID(case_law_id)
b = UUID(related_id)
except ValueError:
raise HTTPException(400, "case_law_id לא תקין")
await db.remove_case_law_relation(a, b)
return {"unlinked": True, "case_law_id": case_law_id, "related_id": related_id}
# Halacha and metadata extraction are LLM-driven and rely on the local
# `claude` CLI via mcp-server/services/claude_session.py — they CANNOT run
# from this container (no CLI, no claude.ai session). The endpoints below
# DON'T run extraction; they only stamp a request in the queue. The
# corresponding MCP tools (`precedent_process_pending_metadata`,
# `precedent_process_pending_halachot`), invoked from local Claude Code,
# drain the queue.
async def _wake_ceo_for_precedent(case_law_id: UUID, kind: str) -> dict:
"""Trigger Paperclip CEO to drain the precedent extraction queue, mirroring
the upload flow (see ``precedent_library_upload`` → ``pc_wake_for_precedent_extraction``).
Best-effort — any failure is logged but doesn't surface to the user, who
can still invoke ``mcp__legal-ai__precedent_process_pending`` manually.
Returns a dict with the wakeup outcome for inclusion in the API response.
"""
record = await db.get_case_law(case_law_id)
if not record:
return {"ok": False, "skipped": "record_missing"}
try:
return await pc_wake_for_precedent_extraction(
case_law_id=str(case_law_id),
citation=str(record.get("case_number") or ""),
practice_area=str(record.get("practice_area") or ""),
)
except Exception:
logger.exception("precedent-extraction wakeup failed (non-fatal, kind=%s)", kind)
return {"ok": False, "error": "wakeup_failed"}
@app.post("/api/precedent-library/{case_law_id}/request-metadata")
async def precedent_request_metadata(case_law_id: str):
"""Stamp the case_law row as needing metadata extraction AND wake the
Paperclip CEO so extraction runs automatically — same flow as upload."""
try:
cid = UUID(case_law_id)
except ValueError:
raise HTTPException(400, "case_law_id לא תקין")
ok = await db.request_metadata_extraction(cid)
if not ok:
raise HTTPException(404, "פסיקה לא נמצאה")
wakeup = await _wake_ceo_for_precedent(cid, kind="metadata")
return {"queued": True, "case_law_id": case_law_id, "kind": "metadata", "wakeup": wakeup}
@app.post("/api/precedent-library/{case_law_id}/request-halachot")
async def precedent_request_halachot(case_law_id: str):
"""Same, for halacha re-extraction."""
try:
cid = UUID(case_law_id)
except ValueError:
raise HTTPException(400, "case_law_id לא תקין")
ok = await db.request_halacha_extraction(cid)
if not ok:
raise HTTPException(404, "פסיקה לא נמצאה")
wakeup = await _wake_ceo_for_precedent(cid, kind="halacha")
return {"queued": True, "case_law_id": case_law_id, "kind": "halacha", "wakeup": wakeup}
@app.get("/api/precedent-library/queue/pending")
async def precedent_queue_pending(kind: str = "metadata", limit: int = 20):
"""Read-only view of the queue. The MCP worker reads this too, but the
UI calls it to show 'X ממתינות לעיבוד מקומי' badges."""
if kind not in {"metadata", "halacha"}:
raise HTTPException(400, "kind חייב להיות metadata או halacha")
items = await db.list_pending_extraction_requests(kind=kind, limit=limit)
return {"items": items, "count": len(items)}
from legal_mcp.services import internal_decisions as int_decisions_service # noqa: E402
@app.post("/api/internal-decisions/upload")
async def internal_decisions_upload(
file: UploadFile = File(...),
case_number: str = Form(...),
case_name: str = Form(""),
citation: str = Form(""),
court: str = Form(""),
decision_date: str = Form(""),
chair_name: str = Form(""),
district: str = Form(""),
practice_area: str = Form(""),
appeal_subtype: str = Form(""),
subject_tags: str = Form("[]"),
is_binding: bool = Form(True),
summary: str = Form(""),
):
"""Upload a planning appeals-committee decision to the internal corpus.
``case_number`` is the canonical identifier (e.g. "8027-25"); ``citation``
is the full מראה-מקום (e.g. "ערר ... 8027/25 פלוני נ' הוועדה ..."). They
are distinct fields — previously the UI sent the citation as case_number,
leaving the identifier polluted and citation_formatted empty until the
metadata extractor ran. citation is stored as citation_formatted up-front
so it survives even if extraction is delayed."""
if practice_area and practice_area not in _PRACTICE_AREAS:
raise HTTPException(400, "practice_area לא תקין")
if not case_number.strip():
raise HTTPException(400, "case_number חובה")
suffix = Path(file.filename or "").suffix.lower()
if suffix not in ALLOWED_EXTENSIONS:
raise HTTPException(400, f"סוג קובץ לא נתמך: {suffix}")
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
staged = UPLOAD_DIR / f"int_{uuid4().hex[:8]}_{file.filename}"
size = 0
with staged.open("wb") as out:
while chunk := await file.read(1024 * 1024):
size += len(chunk)
if size > MAX_FILE_SIZE:
staged.unlink(missing_ok=True)
raise HTTPException(413, "קובץ גדול מדי")
out.write(chunk)
try:
tags = json.loads(subject_tags) if subject_tags else []
if not isinstance(tags, list):
tags = []
except json.JSONDecodeError:
tags = []
task_id = str(uuid4())
await _progress.set(task_id, {
"status": "queued", "filename": file.filename or "",
"stage": "queued", "percent": 0,
})
async def _run():
try:
result = await int_decisions_service.ingest_internal_decision(
case_number=case_number.strip(),
case_name=case_name.strip(),
court=court.strip(),
decision_date=decision_date or None,
chair_name=chair_name.strip(),
district=district.strip(),
practice_area=practice_area,
appeal_subtype=appeal_subtype.strip(),
subject_tags=tags,
is_binding=is_binding,
summary=summary.strip(),
file_path=staged,
)
# Auto-trigger halacha+metadata extraction via the CEO. The
# extractor needs the local `claude` CLI (absent in this
# container), so the only automatic path is a Paperclip wakeup.
# Without this, committee decisions stay stuck forever at
# halacha_extraction_status='pending' (the other two upload paths
# — precedent_library_upload and missing-precedent — already do
# this; this path was missing it).
case_law_id = result.get("case_law_id") if isinstance(result, dict) else None
# Persist the מראה-מקום the chair typed, up-front. The metadata
# extractor only fills citation_formatted when it is empty, so this
# preserves the user's exact citation rather than waiting on (or
# being overwritten by) extraction.
if case_law_id and citation.strip():
try:
await db.update_case_law(
UUID(case_law_id), citation_formatted=citation.strip()
)
except Exception:
logger.warning(
"internal-decision %s: storing citation_formatted failed",
case_number,
)
extraction_queued = True
if case_law_id:
# Route to the correct company CEO. _get_company_id keys off
# practice_area; committee case numbers are reliably prefixed
# (1xxx→רישוי, 8xxx→היטל, 9xxx→פיצויים), so derive a routing
# tag from the prefix when practice_area is empty — otherwise
# an 8xxx case wrongly routes to the licensing CEO.
routing_pa = practice_area or {
"1": "rishuy_uvniya", "8": "betterment_levy", "9": "compensation_197",
}.get(case_number.strip()[:1], "")
wake = await pc_wake_for_precedent_extraction(
case_law_id=case_law_id,
citation=case_number.strip(),
practice_area=routing_pa,
)
if not wake.get("ok"):
extraction_queued = False
logger.warning(
"internal-decision %s: extraction wakeup did not queue (%s) — "
"halachot stay pending until precedent_process_pending runs",
case_number, wake.get("skipped") or wake.get("error"),
)
await _progress.set(task_id, {
"status": "completed", "percent": 100,
"extraction_queued": extraction_queued,
})
except Exception as e:
logger.exception("internal-decisions upload failed")
await _progress.set(task_id, {"status": "failed", "error": str(e)})
finally:
staged.unlink(missing_ok=True)
asyncio.create_task(_run())
return {"task_id": task_id}
@app.post("/api/internal-decisions/migrate")
async def internal_decisions_migrate(
source: str = "both",
dry_run: bool = True,
):
"""Migrate existing data to the internal committee corpus.
source: 'style_corpus' | 'external_corpus' | 'both'
dry_run: if true, only report what would be done (no writes)
"""
if source not in {"style_corpus", "external_corpus", "both"}:
raise HTTPException(400, "source חייב להיות style_corpus / external_corpus / both")
results: dict = {}
if source in {"style_corpus", "both"}:
results["style_corpus"] = await int_decisions_service.migrate_from_style_corpus(dry_run=dry_run)
if source in {"external_corpus", "both"}:
results["external_corpus"] = await int_decisions_service.migrate_from_external_corpus(dry_run=dry_run)
return results
@app.get("/api/internal-decisions")
async def internal_decisions_list(
district: str = "",
chair_name: str = "",
practice_area: str = "",
limit: int = 100,
):
"""List internal committee decisions with optional filters."""
pool = await db.get_pool()
async with pool.acquire() as conn:
conditions = ["source_kind = 'internal_committee'"]
params: list = []
idx = 1
if district:
conditions.append(f"district = ${idx}")
params.append(district)
idx += 1
if chair_name:
conditions.append(f"chair_name = ${idx}")
params.append(chair_name)
idx += 1
if practice_area:
conditions.append(f"practice_area = ${idx}")
params.append(practice_area)
idx += 1
params.append(limit)
where = " AND ".join(conditions)
rows = await conn.fetch(
f"SELECT id, case_number, case_name, court, district, chair_name, "
f"date, practice_area, appeal_subtype, extraction_status, halacha_extraction_status "
f"FROM case_law WHERE {where} ORDER BY date DESC NULLS LAST LIMIT ${idx}",
*params,
)
total = await conn.fetchval(
"SELECT COUNT(*) FROM case_law WHERE source_kind = 'internal_committee'"
)
return {
"total": total,
"items": [dict(r) for r in rows],
}
@app.get("/api/halachot")
async def halachot_list(
case_law_id: str = "",
review_status: str = "",
practice_area: str = "",
limit: int = 200,
offset: int = 0,
exclude_low_quality: bool = False,
order_by_priority: bool = False,
cluster: bool = False,
include_equivalents: bool = False,
):
"""List halachot. ``exclude_low_quality`` hides flagged items (#84.1),
``order_by_priority`` switches to the active-learning order (#84.3),
``cluster`` annotates near-duplicate groups for one-card review (#84.2), and
``include_equivalents`` attaches cross-precedent parallel-authority links. All
default off so existing callers are unaffected; the review queue opts in."""
cid: UUID | None = None
if case_law_id:
try:
cid = UUID(case_law_id)
except ValueError:
raise HTTPException(400, "case_law_id לא תקין")
rows = await db.list_halachot(
case_law_id=cid,
review_status=review_status or None,
practice_area=practice_area or None,
limit=limit, offset=offset,
exclude_low_quality=exclude_low_quality,
order_by_priority=order_by_priority,
cluster=cluster,
include_equivalents=include_equivalents,
)
return {"items": rows, "count": len(rows)}
class EquivalentLinkRequest(BaseModel):
other_id: str
note: str = ""
@app.get("/api/halachot/{halacha_id}/equivalents")
async def halacha_equivalents_list(halacha_id: str):
"""Cross-precedent parallel-authority links for a halacha (#84.2)."""
try:
hid = UUID(halacha_id)
except ValueError:
raise HTTPException(400, "halacha_id לא תקין")
return {"items": await db.list_equivalent_for_halacha(hid)}
@app.post("/api/halachot/{halacha_id}/equivalents")
async def halacha_equivalents_link(halacha_id: str, req: EquivalentLinkRequest):
"""Chair links two halachot as the same principle across precedents (#84.2)."""
try:
hid = UUID(halacha_id)
oid = UUID(req.other_id)
except ValueError:
raise HTTPException(400, "מזהה הלכה לא תקין")
ok = await db.link_equivalent_halachot(hid, oid, note=req.note, created_by="chair")
if not ok:
raise HTTPException(
400, "לא ניתן לקשר — אותה הלכה או שתי הלכות מאותו פסק (קישור-מקביל הוא חוצה-פסקים)")
return {"ok": True}
@app.delete("/api/halachot/{halacha_id}/equivalents/{other_id}")
async def halacha_equivalents_unlink(halacha_id: str, other_id: str):
try:
hid, oid = UUID(halacha_id), UUID(other_id)
except ValueError:
raise HTTPException(400, "מזהה הלכה לא תקין")
return {"ok": await db.unlink_equivalent_halachot(hid, oid)}
# ── Gold-set tagging (#81.7 / #81.8) ─────────────────────────────────────────
class GoldsetSampleRequest(BaseModel):
n: int = 150
batch: str = "default"
reset: bool = False
class GoldsetTagRequest(BaseModel):
is_holding: bool | None = None
correct_type: str | None = None
quote_complete: bool | None = None
tagged_by: str = "chair"
@app.get("/api/goldset")
async def goldset_list_ep(batch: str = "default"):
"""The gold-set tagging queue (halacha content + machine labels + human tags)."""
return {"items": await db.goldset_list(batch), "batch": batch}
@app.post("/api/goldset/sample")
async def goldset_sample_ep(req: GoldsetSampleRequest):
"""Create/extend a stratified gold-set batch for tagging (#81.7)."""
return await db.goldset_create_sample(n=req.n, batch=req.batch, reset=req.reset)
@app.get("/api/goldset/score")
async def goldset_score_ep(batch: str = "default"):
"""Measure the extraction validators against the human tags (#81.8)."""
return await db.goldset_score(batch)
@app.patch("/api/goldset/{goldset_id}")
async def goldset_tag_ep(goldset_id: str, req: GoldsetTagRequest):
"""Save one human tag on a gold-set item."""
try:
gid = UUID(goldset_id)
except ValueError:
raise HTTPException(400, "מזהה לא תקין")
if req.correct_type and req.correct_type not in (
"binding", "interpretive", "obiter", "application", "procedural", "persuasive",
):
raise HTTPException(400, "correct_type לא תקין")
row = await db.goldset_tag(
gid, is_holding=req.is_holding, correct_type=req.correct_type,
quote_complete=req.quote_complete, tagged_by=req.tagged_by,
)
if not row:
raise HTTPException(404, "פריט לא נמצא")
return {"ok": True}
@app.patch("/api/halachot/{halacha_id}")
async def halacha_update(halacha_id: str, req: HalachaUpdateRequest):
"""Approve / reject / edit a halacha. Used by the chair review queue."""
try:
hid = UUID(halacha_id)
except ValueError:
raise HTTPException(400, "halacha_id לא תקין")
if req.review_status and req.review_status not in db.HALACHA_REVIEW_STATUSES:
raise HTTPException(400, "review_status לא תקין")
row = await db.update_halacha(
halacha_id=hid,
review_status=req.review_status,
reviewer=req.reviewer or "",
rule_statement=req.rule_statement,
reasoning_summary=req.reasoning_summary,
subject_tags=req.subject_tags,
practice_areas=req.practice_areas,
)
if not row:
raise HTTPException(404, "הלכה לא נמצאה")
return row
@app.post("/api/halachot/batch")
async def halacha_batch_review(req: HalachaBatchReviewRequest):
"""Apply one review status to many halachot at once (#84 group action)."""
if req.review_status not in db.HALACHA_REVIEW_STATUSES:
raise HTTPException(400, "review_status לא תקין")
if not req.halacha_ids:
return {"updated": 0}
try:
ids = [str(UUID(i)) for i in req.halacha_ids]
except ValueError:
raise HTTPException(400, "halacha_id לא תקין ברשימה")
updated = await db.update_halachot_batch(
ids, review_status=req.review_status, reviewer=req.reviewer or "",
)
return {"updated": updated}
# ── Missing Precedents (TaskMaster #35) ────────────────────────────
# Track citations from party briefs that aren't yet in the precedent
# corpus. Researcher logs gaps; chair closes them by uploading the
# actual decision via /api/precedent-library/upload or
# /api/internal-decisions/upload, then links via the upload endpoint
# here which delegates to one of those depending on the citation type.
_ALLOWED_MP_PARTIES = {
"appellant", "respondent", "committee", "permit_applicant", "unknown",
}
_ALLOWED_MP_STATUS = {"open", "uploaded", "closed", "irrelevant"}
class MissingPrecedentCreate(BaseModel):
citation: str
case_number: str = "" # cited-in case
cited_in_document_id: str | None = None
cited_by_party: Literal[
"appellant", "respondent", "committee", "permit_applicant", "unknown",
] = "unknown"
cited_by_party_name: str | None = None
legal_topic: str | None = None
legal_issue: str | None = None
claim_quote: str | None = None
case_name: str | None = None
notes: str | None = None
class MissingPrecedentPatch(BaseModel):
legal_topic: str | None = None
legal_issue: str | None = None
notes: str | None = None
cited_by_party: Literal[
"appellant", "respondent", "committee", "permit_applicant", "unknown",
] | None = None
cited_by_party_name: str | None = None
case_name: str | None = None
status: Literal["open", "uploaded", "closed", "irrelevant"] | None = None
citation: str | None = None
claim_quote: str | None = None
def _is_internal_committee_citation(citation: str) -> bool:
"""Detect ועדת ערר citations — must go through internal_decision_upload
so they get chair_name + district. The legacy library upload doesn't
enforce those fields and the records end up un-searchable by chair."""
norm = citation.strip()
committee_prefixes = ("ערר ", "ערר(", "בל\"מ ", "בל\"מ(", "ARAR ")
return any(norm.startswith(p) for p in committee_prefixes)
@app.post("/api/missing-precedents")
async def missing_precedent_create(
req: MissingPrecedentCreate, background_tasks: BackgroundTasks,
):
"""Log a new missing precedent (status='open'). Dedupes by
(citation, cited_in_case_id) — duplicate POST returns the existing row.
On first insert (non-duplicate) emits a webhook to the Paperclip
plugin so it can ask Daphna via an ``askUserQuestions`` interaction
whether to upload the missing precedent.
"""
if not req.citation.strip():
raise HTTPException(400, "citation חובה")
case_id: UUID | None = None
case_number_for_webhook = req.case_number.strip()
company_id_for_webhook: str | None = None
if case_number_for_webhook:
c = await db.get_case_by_number(case_number_for_webhook)
if not c:
raise HTTPException(404, f"תיק לא נמצא: {req.case_number}")
case_id = UUID(c["id"])
prefix = case_number_for_webhook[:1]
company_id_for_webhook = (
PAPERCLIP_COMPANIES["licensing"] if prefix == "1"
else PAPERCLIP_COMPANIES["betterment"] if prefix in ("8", "9")
else None
)
doc_id: UUID | None = None
if req.cited_in_document_id:
try:
doc_id = UUID(req.cited_in_document_id)
except ValueError:
raise HTTPException(400, "cited_in_document_id לא תקין")
existing = await db.find_missing_precedent_by_citation(
citation=req.citation.strip(),
case_id=case_id,
)
if existing:
return {**existing, "_duplicate": True}
row = await db.create_missing_precedent(
citation=req.citation.strip(),
case_name=req.case_name,
cited_in_case_id=case_id,
cited_in_document_id=doc_id,
cited_by_party=req.cited_by_party,
cited_by_party_name=req.cited_by_party_name,
legal_topic=req.legal_topic,
legal_issue=req.legal_issue,
claim_quote=req.claim_quote,
notes=req.notes,
)
# Trigger plugin to ask Daphna via askUserQuestions interaction.
if case_number_for_webhook and row.get("id"):
background_tasks.add_task(
paperclip_api.emit_missing_precedent_webhook,
case_number=case_number_for_webhook,
missing_precedent_id=str(row["id"]),
citation=req.citation.strip(),
cited_by_party=req.cited_by_party,
cited_by_party_name=req.cited_by_party_name,
legal_topic=req.legal_topic,
legal_issue=req.legal_issue,
company_id=company_id_for_webhook,
)
return row
@app.get("/api/missing-precedents")
async def missing_precedents_list(
status: str = "",
case_id: str = "",
case_number: str = "",
legal_topic: str = "",
limit: int = 200,
offset: int = 0,
):
"""List missing precedents, optionally filtered by status / case."""
s = status.strip() or None
if s and s not in _ALLOWED_MP_STATUS:
raise HTTPException(400, f"status לא תקין: {status}")
case_uuid: UUID | None = None
if case_id.strip():
try:
case_uuid = UUID(case_id.strip())
except ValueError:
raise HTTPException(400, "case_id לא תקין")
elif case_number.strip():
c = await db.get_case_by_number(case_number.strip())
if not c:
raise HTTPException(404, f"תיק לא נמצא: {case_number}")
case_uuid = UUID(c["id"])
rows = await db.list_missing_precedents(
status=s,
case_id=case_uuid,
legal_topic=legal_topic.strip() or None,
limit=max(1, min(int(limit), 500)),
offset=max(0, int(offset)),
)
# Counters useful for the sidebar badge.
pool = await db.get_pool()
async with pool.acquire() as conn:
counts = await conn.fetch(
"SELECT status, COUNT(*) AS n FROM missing_precedents GROUP BY status"
)
by_status = {r["status"]: r["n"] for r in counts}
return {
"items": rows,
"count": len(rows),
"by_status": by_status,
"total_open": by_status.get("open", 0),
}
@app.get("/api/missing-precedents/{mp_id}")
async def missing_precedent_get(mp_id: str):
try:
uid = UUID(mp_id)
except ValueError:
raise HTTPException(400, "id לא תקין")
row = await db.get_missing_precedent(uid)
if not row:
raise HTTPException(404, "רשומה לא נמצאה")
return row
@app.patch("/api/missing-precedents/{mp_id}")
async def missing_precedent_update(mp_id: str, req: MissingPrecedentPatch):
try:
uid = UUID(mp_id)
except ValueError:
raise HTTPException(400, "id לא תקין")
fields = {k: v for k, v in req.model_dump(exclude_unset=True).items() if v is not None}
if not fields:
row = await db.get_missing_precedent(uid)
if not row:
raise HTTPException(404, "רשומה לא נמצאה")
return row
try:
row = await db.update_missing_precedent(uid, **fields)
except ValueError as e:
raise HTTPException(400, str(e))
if not row:
raise HTTPException(404, "רשומה לא נמצאה")
return row
@app.delete("/api/missing-precedents/{mp_id}")
async def missing_precedent_delete(mp_id: str):
try:
uid = UUID(mp_id)
except ValueError:
raise HTTPException(400, "id לא תקין")
pool = await db.get_pool()
async with pool.acquire() as conn:
result = await conn.execute(
"DELETE FROM missing_precedents WHERE id = $1", uid,
)
deleted = int(result.split()[-1]) > 0
if not deleted:
raise HTTPException(404, "רשומה לא נמצאה")
return {"deleted": True, "id": mp_id}
@app.post("/api/missing-precedents/{mp_id}/upload")
async def missing_precedent_upload(
mp_id: str,
file: UploadFile = File(...),
case_number: str = Form(""), # for internal-committee path
chair_name: str = Form(""),
district: str = Form(""),
case_name: str = Form(""),
court: str = Form(""),
decision_date: str = Form(""),
practice_area: str = Form(""),
appeal_subtype: str = Form(""),
subject_tags: str = Form("[]"),
is_binding: bool = Form(True),
headnote: str = Form(""),
summary: str = Form(""),
precedent_level: str = Form(""),
source_type: str = Form(""),
):
"""Upload the decision file behind a missing-precedent and link it.
Routes to ingest_internal_decision if the citation looks like a
committee decision (ערר / בל"מ prefix), otherwise to ingest_precedent.
Once the case_law row is created, the missing_precedents row is marked
status='closed' with linked_case_law_id pointing to the new row.
"""
try:
uid = UUID(mp_id)
except ValueError:
raise HTTPException(400, "id לא תקין")
mp = await db.get_missing_precedent(uid)
if not mp:
raise HTTPException(404, "רשומה לא נמצאה")
if mp["status"] in {"closed", "uploaded"} and mp.get("linked_case_law_id"):
raise HTTPException(409, "הרשומה כבר נסגרה — הסר קישור לפני העלאה חוזרת")
suffix = Path(file.filename or "").suffix.lower()
if suffix not in ALLOWED_EXTENSIONS:
raise HTTPException(400, f"סוג קובץ לא נתמך: {suffix}")
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
staged = UPLOAD_DIR / f"mp_{uuid4().hex[:8]}_{file.filename}"
size = 0
with staged.open("wb") as out:
while chunk := await file.read(1024 * 1024):
size += len(chunk)
if size > MAX_FILE_SIZE:
staged.unlink(missing_ok=True)
raise HTTPException(413, "קובץ גדול מדי")
out.write(chunk)
try:
tags = json.loads(subject_tags) if subject_tags else []
if not isinstance(tags, list):
tags = []
except json.JSONDecodeError:
tags = []
citation = mp["citation"]
is_committee = _is_internal_committee_citation(citation)
case_law_id: str | None = None
closed: dict | None = None
try:
if is_committee:
# The DB CHECK forces chair_name + district to be non-empty for
# internal_committee rows. The UX goal is "upload file + citation
# only" — so if the user didn't fill those, infer district from
# the citation text (often contains the committee name, e.g.
# "ועדות ערר - תכנון ובנייה תל אביב-יפו") and fall back to a
# placeholder. The metadata extractor wakeup fired below will
# overwrite both placeholders once the LLM reads the file.
resolved_chair = chair_name.strip() or PLACEHOLDER_PENDING_EXTRACTION
resolved_district = (
district.strip()
or int_decisions_service._district_from_court(court)
or int_decisions_service._district_from_court(citation)
or PLACEHOLDER_PENDING_EXTRACTION
)
# case_number for the committee decision (not the cited-in case)
committee_case_number = case_number.strip() or citation
result = await int_decisions_service.ingest_internal_decision(
case_number=committee_case_number,
case_name=(case_name.strip() or mp.get("case_name") or "").strip(),
court=court.strip(),
decision_date=decision_date or None,
chair_name=resolved_chair,
district=resolved_district,
practice_area=practice_area,
appeal_subtype=appeal_subtype.strip(),
subject_tags=tags,
is_binding=is_binding,
summary=summary.strip(),
file_path=staged,
)
case_law_id = (
result.get("case_law_id") if isinstance(result, dict) else None
)
else:
if practice_area and practice_area not in _PRACTICE_AREAS:
raise HTTPException(400, "practice_area לא תקין")
if source_type and source_type not in _SOURCE_TYPES:
raise HTTPException(400, "source_type לא תקין")
result = await plib_service.ingest_precedent(
file_path=staged,
citation=citation,
case_name=(case_name.strip() or mp.get("case_name") or "").strip(),
court=court.strip(),
decision_date=decision_date or None,
source_type=source_type or "court_ruling",
precedent_level=precedent_level,
practice_area=practice_area,
appeal_subtype=appeal_subtype.strip(),
subject_tags=tags,
is_binding=is_binding,
headnote=headnote.strip(),
summary=summary.strip(),
)
case_law_id = (
result.get("case_law_id") if isinstance(result, dict) else None
)
if not case_law_id:
raise HTTPException(500, "לא התקבל case_law_id מההעלאה")
try:
closed = await db.close_missing_precedent(
mp_id=uid,
linked_case_law_id=UUID(case_law_id),
notes=mp.get("notes"),
status="closed",
)
except Exception as e:
logger.exception("missing-precedent close failed")
raise HTTPException(500, f"קישור הרשומה נכשל: {e}")
# Fire metadata-extraction wakeup so the placeholder fields above
# (and any other empty user-supplied fields) get filled in from the
# file's text. Best-effort: mirrors the precedent_library_upload
# contract — failures are logged, not surfaced.
try:
routing_pa = practice_area or {
"1": "rishuy_uvniya", "8": "betterment_levy", "9": "compensation_197",
}.get(committee_case_number.strip()[:1], "") if is_committee else practice_area
wake = await pc_wake_for_precedent_extraction(
case_law_id=case_law_id,
citation=citation,
practice_area=routing_pa,
)
if not wake.get("ok"):
logger.warning(
"missing-precedent %s: extraction wakeup did not queue (%s) — "
"halachot stay pending until precedent_process_pending runs",
case_law_id, wake.get("skipped") or wake.get("error"),
)
except Exception:
logger.exception(
"missing-precedent: precedent-extraction wakeup failed (non-fatal)"
)
finally:
staged.unlink(missing_ok=True)
return {
"missing_precedent": closed,
"case_law_id": case_law_id,
"route": "internal_committee" if is_committee else "external_upload",
}
# ── RAG telemetry / nDCG dashboard ────────────────────────────────────
# Backs the /admin/rag-metrics page. The heavy aggregation lives in
# ``scripts/compute_ndcg.py`` — we re-use its functions here so the API
# response stays in lock-step with the CLI tool.
@app.get("/api/admin/rag-metrics")
async def api_rag_metrics(weeks: int = 12, k: int = 10):
"""Return nDCG@k aggregates for the RAG retrieval feedback loop.
Args:
weeks: window for "recent" metrics (default 12).
k: nDCG cutoff (default 10).
"""
# Late import — keeps the path-extension to scripts/ local to this route.
scripts_dir = Path(__file__).resolve().parent.parent / "scripts"
if str(scripts_dir) not in sys.path:
sys.path.insert(0, str(scripts_dir))
import compute_ndcg # type: ignore
try:
metrics = await compute_ndcg.compute(weeks=weeks, k=k)
except Exception as e:
logger.exception("rag-metrics compute failed")
raise HTTPException(500, f"חישוב מטריקות נכשל: {e}") from e
return metrics
@app.post("/api/admin/rag-metrics/infer")
async def api_rag_metrics_infer(limit: int | None = None):
"""Run auto-inference: for every finalized case, mark its cited
precedents as ``relevance_score=3`` against any search_log where
they appeared in the top-K. Idempotent.
"""
from legal_mcp.services import telemetry as telem_svc
try:
result = await telem_svc.infer_relevance_for_all_finalized_cases(limit=limit)
except Exception as e:
logger.exception("rag-metrics auto-inference failed")
raise HTTPException(500, f"auto-inference נכשל: {e}") from e
return result