feat(training): Style Studio — upload, rich corpus, lessons, curator portrait, chat
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 2m7s

Six-phase upgrade of /training from a read-only dashboard into a full
Style Studio for managing Daphna's style corpus.

- Upload Sheet on /training: file → proofread preview → commit (no more
  CLI-only `upload-training` skill).
- Rich corpus metadata: GET /api/training/corpus returns summary, outcome,
  key_principles, page_count, parties (regex), legal_citation, lessons_count.
  PATCH endpoint for chair edits. CorpusDetailDrawer with 4 tabs (details
  /content/lessons/patterns) replaces the bare table row.
- LLM metadata enrichment: style_metadata_extractor + MCP tools
  (style_corpus_enrich, style_corpus_pending_enrichment) fill summary
  /outcome/key_principles via claude_session (free, host-side).
- Per-decision lessons: new decision_lessons table + 4 REST endpoints +
  LessonsTab in drawer; hermes-curator now auto-posts findings as
  decision_lessons(source=curator).
- Curator Portrait tab: prompt rendered with link to Gitea, recent
  curator findings, style_analyzer training prompts, propose-change
  form that writes proposals to data/curator-proposals/ for manual
  chair review (no auto-mutation of the agent file).
- Style chat tab: SSE-streamed conversations with the style agent.
  New host-side pm2 service (legal-chat-service, port 8770) wraps
  claude CLI with stream-json + --resume continuation; FastAPI proxies
  via host.docker.internal. Zero API cost — uses chaim's claude.ai
  subscription. chat_conversations + chat_messages persist history.

Architecture: keeps the existing rule that claude_session only runs
on the host (not the container). The new legal-chat-service is the
canonical bridge between the container and the local CLI for the chat
feature; everything else (upload, metadata, lessons) stays within the
container's existing capabilities.

Audit script (scripts/audit_training_corpus.py) included for verifying
which corpus rows still need enrichment.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-27 10:06:22 +00:00
parent 0629f19d5f
commit bb0cd7c6a2
23 changed files with 4568 additions and 75 deletions

View File

@@ -0,0 +1,328 @@
"use client";
/*
* Upload a Daphna decision into the style corpus, from the /training page.
*
* The flow is three explicit steps inside the same sheet:
* 1. file picker → POST /api/upload (gets sanitized filename)
* 2. preview → POST /api/training/analyze (proofread + auto-extracted meta)
* chair can correct decision_number / decision_date / subjects
* 3. commit → POST /api/training/upload (background task)
* progress watched via SSE; on completion we invalidate
* corpus + style-report so the new row appears.
*
* The Sheet UX mirrors precedent-upload-sheet.tsx: same dir="rtl", same
* loading + error patterns, same toast on success. The reason this isn't
* a single one-click upload is that style-corpus rows are write-once
* (we don't allow editing full_text), so the chair MUST see the proofread
* preview before committing — otherwise a bad OCR/proofread can silently
* pollute the style portrait.
*/
import { useEffect, useState } from "react";
import { Upload, Loader2, CheckCircle2, AlertCircle, FileText } from "lucide-react";
import { toast } from "sonner";
import { useQueryClient } from "@tanstack/react-query";
import {
Sheet, SheetContent, SheetHeader, SheetTitle, SheetDescription,
} from "@/components/ui/sheet";
import { Button } from "@/components/ui/button";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
import { Progress } from "@/components/ui/progress";
import { Badge } from "@/components/ui/badge";
import {
trainingKeys,
useAnalyzeTraining,
useCommitTrainingUpload,
useUploadFile,
type AnalyzeTrainingResponse,
} from "@/lib/api/training";
import { useProgress } from "@/lib/api/documents";
const ACCEPT = ".pdf,.docx,.doc,.rtf,.txt,.md";
type Props = {
open: boolean;
onOpenChange: (open: boolean) => void;
};
type Stage = "pick" | "analyzing" | "preview" | "committing" | "done" | "error";
export function TrainingUploadDialog({ open, onOpenChange }: Props) {
const [stage, setStage] = useState<Stage>("pick");
const [file, setFile] = useState<File | null>(null);
const [analysis, setAnalysis] = useState<AnalyzeTrainingResponse | null>(null);
// editable copies of the auto-extracted metadata
const [decisionNumber, setDecisionNumber] = useState("");
const [decisionDate, setDecisionDate] = useState("");
const [subjectsRaw, setSubjectsRaw] = useState("");
const [title, setTitle] = useState("");
const [taskId, setTaskId] = useState<string | null>(null);
const [errorMsg, setErrorMsg] = useState("");
const uploadFile = useUploadFile();
const analyze = useAnalyzeTraining();
const commit = useCommitTrainingUpload();
const progress = useProgress(taskId);
const qc = useQueryClient();
// Reset everything when the sheet closes — important because Sheet keeps
// the component mounted between opens. The cascade-render warning is the
// intended behavior (reset is the side effect we want).
useEffect(() => {
if (open) return;
/* eslint-disable react-hooks/set-state-in-effect */
setStage("pick"); setFile(null); setAnalysis(null);
setDecisionNumber(""); setDecisionDate(""); setSubjectsRaw("");
setTitle(""); setTaskId(null); setErrorMsg("");
/* eslint-enable react-hooks/set-state-in-effect */
}, [open]);
// Watch background task. When complete, invalidate corpus + report so the
// new row + updated stats show up automatically. The setStage call here
// is the deliberate UX (success card → auto-close) — synchronizing UI
// with the external SSE stream is exactly what effects are for.
useEffect(() => {
if (!progress) return;
if (progress.status === "completed") {
qc.invalidateQueries({ queryKey: trainingKeys.corpus() });
qc.invalidateQueries({ queryKey: trainingKeys.report() });
// eslint-disable-next-line react-hooks/set-state-in-effect
setStage("done");
toast.success(`החלטה ${decisionNumber || analysis?.decision_number || ""} נוספה לקורפוס`);
const t = window.setTimeout(() => onOpenChange(false), 1500);
return () => window.clearTimeout(t);
}
if (progress.status === "failed") {
setStage("error");
setErrorMsg(progress.error || "כשל בעיבוד");
}
}, [progress, analysis, decisionNumber, qc, onOpenChange]);
const onPickFile = async (f: File | null) => {
setFile(f);
setErrorMsg("");
if (!f) return;
setStage("analyzing");
try {
const { filename } = await uploadFile.mutateAsync(f);
const result = await analyze.mutateAsync(filename);
setAnalysis(result);
setDecisionNumber(result.decision_number);
setDecisionDate(result.decision_date);
setSubjectsRaw(result.subject_categories.join(", "));
// Default title from the original filename stem (chair can override).
const stem = f.name.replace(/\.[^.]+$/, "");
setTitle(stem);
setStage("preview");
} catch (e) {
setStage("error");
setErrorMsg(e instanceof Error ? e.message : "כשל בקריאת הקובץ");
}
};
const onCommit = async () => {
if (!analysis) return;
setStage("committing");
setErrorMsg("");
try {
const subjects = subjectsRaw
.split(/[,،]/)
.map((s) => s.trim())
.filter(Boolean);
const res = await commit.mutateAsync({
filename: analysis.filename,
decision_number: decisionNumber.trim(),
decision_date: decisionDate || "",
subject_categories: subjects,
title: title.trim() || undefined,
});
setTaskId(res.task_id);
} catch (e) {
setStage("error");
// 409 = duplicate decision_number — surface the backend's Hebrew message.
setErrorMsg(e instanceof Error ? e.message : "כשל בהעלאה");
}
};
const isProcessing =
stage === "analyzing" || stage === "committing" ||
(taskId !== null && progress?.status !== "completed" && progress?.status !== "failed");
const progressStep = (progress as { step?: string } | null)?.step;
return (
<Sheet open={open} onOpenChange={onOpenChange}>
<SheetContent side="left" className="w-full sm:max-w-2xl overflow-y-auto" dir="rtl">
<SheetHeader>
<SheetTitle className="text-navy">העלאת החלטה לקורפוס הסגנון</SheetTitle>
<SheetDescription className="text-ink-muted">
הקובץ יעבור הגהה (סינון Nevo, ניקוד), חילוץ אוטומטי של מספר תיק, תאריך
ונושאים, ויוטמע ב-style_corpus עם chunks ו-embeddings. תוכל לתקן את
פרטי המטא-דאטה לפני שמירה.
</SheetDescription>
</SheetHeader>
<div className="px-6 pb-6 mt-4 space-y-4">
{/* Step 1: pick */}
{stage === "pick" && (
<div className="space-y-2">
<Label htmlFor="t-file">קובץ ההחלטה (PDF / DOCX / DOC / RTF / TXT / MD)</Label>
<Input
id="t-file" type="file" accept={ACCEPT}
onChange={(e) => onPickFile(e.target.files?.[0] ?? null)}
/>
<p className="text-[0.78rem] text-ink-muted">
המערכת תחלץ מהקובץ את מספר התיק, התאריך והנושאים. תוכל לערוך
לפני השמירה.
</p>
</div>
)}
{/* Stage 2: analyzing the file */}
{stage === "analyzing" && (
<div className="rounded-lg border border-rule bg-rule-soft/40 p-6 space-y-2 text-center">
<Loader2 className="w-5 h-5 animate-spin mx-auto text-navy" />
<p className="text-sm text-navy">מבצע הגהה וחילוץ מטא-דאטה</p>
<p className="text-[0.78rem] text-ink-muted">
{file?.name}
</p>
</div>
)}
{/* Stage 3: preview + editable metadata */}
{stage === "preview" && analysis && (
<form
className="space-y-4"
onSubmit={(e) => { e.preventDefault(); onCommit(); }}
>
<div className="rounded-lg border border-rule bg-surface px-4 py-3">
<h3 className="text-[0.78rem] uppercase tracking-wider text-gold-deep font-semibold mb-2">
תצוגה מקדימה של הטקסט הנקי
</h3>
<p className="text-sm text-ink leading-relaxed line-clamp-6 whitespace-pre-wrap">
{analysis.preview}
</p>
<div className="mt-2 flex items-center gap-3 text-[0.72rem] text-ink-muted tabular-nums">
<span className="flex items-center gap-1">
<FileText className="w-3 h-3" />
{analysis.chars.toLocaleString("he-IL")} תווים
</span>
</div>
</div>
<div className="grid grid-cols-2 gap-3">
<div className="space-y-1">
<Label htmlFor="t-decision-number">מספר ההחלטה</Label>
<Input
id="t-decision-number"
value={decisionNumber}
onChange={(e) => setDecisionNumber(e.target.value)}
placeholder="1130-25"
dir="rtl"
/>
</div>
<div className="space-y-1">
<Label htmlFor="t-decision-date">תאריך ההחלטה</Label>
<Input
id="t-decision-date" type="date"
value={decisionDate}
onChange={(e) => setDecisionDate(e.target.value)}
/>
</div>
</div>
<div className="space-y-1">
<Label htmlFor="t-title">כותרת קצרה (אופציונלי)</Label>
<Input
id="t-title" value={title}
onChange={(e) => setTitle(e.target.value)}
placeholder="ARAR-25-1130 - כרמל יצחק" dir="rtl"
/>
</div>
<div className="space-y-1">
<Label htmlFor="t-subjects">נושאים (מופרדים בפסיקים)</Label>
<Input
id="t-subjects" value={subjectsRaw}
onChange={(e) => setSubjectsRaw(e.target.value)}
placeholder="חניה, קווי בניין, שימוש חורג" dir="rtl"
/>
{analysis.subject_categories.length > 0 && (
<div className="flex flex-wrap gap-1 mt-1">
<span className="text-[0.72rem] text-ink-muted">חולץ אוטומטית:</span>
{analysis.subject_categories.map((s) => (
<Badge key={s} variant="outline"
className="text-[0.7rem] bg-gold-wash text-gold-deep border-gold/40">
{s}
</Badge>
))}
</div>
)}
</div>
{errorMsg && (
<div className="rounded-lg border border-danger/40 bg-danger-bg p-3 flex items-center gap-2 text-danger text-sm">
<AlertCircle className="w-4 h-4 shrink-0" />
{errorMsg}
</div>
)}
<div className="flex gap-2 justify-end pt-2">
<Button type="button" variant="ghost"
onClick={() => onOpenChange(false)}
disabled={isProcessing}>
ביטול
</Button>
<Button type="submit" disabled={isProcessing || !decisionNumber.trim()}
className="bg-navy text-parchment hover:bg-navy-soft">
<Upload className="w-4 h-4 me-1" />
שמור בקורפוס
</Button>
</div>
</form>
)}
{/* Stage 4: committing — background task progress */}
{(stage === "committing" || (taskId && stage !== "done" && stage !== "error")) && (
<div className="rounded-lg border border-rule bg-rule-soft/40 p-4 space-y-2">
<div className="flex items-center gap-2 text-sm text-navy">
<Loader2 className="w-4 h-4 animate-spin" />
<span>{progressStep || "מעבד את ההחלטה לקורפוס"}</span>
</div>
<Progress value={progressStep ? 60 : 30} className="h-1.5" />
</div>
)}
{/* Stage 5: success */}
{stage === "done" && (
<div className="rounded-lg border border-gold/40 bg-gold-wash p-4 flex items-center gap-2 text-gold-deep text-sm">
<CheckCircle2 className="w-4 h-4" />
ההחלטה נוספה לקורפוס בהצלחה.
</div>
)}
{/* Stage 6: error (after a failed analyze or upload) */}
{stage === "error" && (
<div className="space-y-3">
<div className="rounded-lg border border-danger/40 bg-danger-bg p-4 flex items-center gap-2 text-danger text-sm">
<AlertCircle className="w-4 h-4 shrink-0" />
{errorMsg || "שגיאה לא ידועה"}
</div>
<div className="flex gap-2 justify-end">
<Button type="button" variant="ghost"
onClick={() => onOpenChange(false)}>
סגור
</Button>
<Button type="button"
onClick={() => { setStage("pick"); setErrorMsg(""); setFile(null); }}>
נסה קובץ אחר
</Button>
</div>
</div>
)}
</div>
</SheetContent>
</Sheet>
);
}