feat(training): Style Studio — upload, rich corpus, lessons, curator portrait, chat
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 2m7s

Six-phase upgrade of /training from a read-only dashboard into a full
Style Studio for managing Daphna's style corpus.

- Upload Sheet on /training: file → proofread preview → commit (no more
  CLI-only `upload-training` skill).
- Rich corpus metadata: GET /api/training/corpus returns summary, outcome,
  key_principles, page_count, parties (regex), legal_citation, lessons_count.
  PATCH endpoint for chair edits. CorpusDetailDrawer with 4 tabs (details
  /content/lessons/patterns) replaces the bare table row.
- LLM metadata enrichment: style_metadata_extractor + MCP tools
  (style_corpus_enrich, style_corpus_pending_enrichment) fill summary
  /outcome/key_principles via claude_session (free, host-side).
- Per-decision lessons: new decision_lessons table + 4 REST endpoints +
  LessonsTab in drawer; hermes-curator now auto-posts findings as
  decision_lessons(source=curator).
- Curator Portrait tab: prompt rendered with link to Gitea, recent
  curator findings, style_analyzer training prompts, propose-change
  form that writes proposals to data/curator-proposals/ for manual
  chair review (no auto-mutation of the agent file).
- Style chat tab: SSE-streamed conversations with the style agent.
  New host-side pm2 service (legal-chat-service, port 8770) wraps
  claude CLI with stream-json + --resume continuation; FastAPI proxies
  via host.docker.internal. Zero API cost — uses chaim's claude.ai
  subscription. chat_conversations + chat_messages persist history.

Architecture: keeps the existing rule that claude_session only runs
on the host (not the container). The new legal-chat-service is the
canonical bridge between the container and the local CLI for the chat
feature; everything else (upload, metadata, lessons) stays within the
container's existing capabilities.

Audit script (scripts/audit_training_corpus.py) included for verifying
which corpus rows still need enrichment.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-27 10:06:22 +00:00
parent 0629f19d5f
commit bb0cd7c6a2
23 changed files with 4568 additions and 75 deletions

View File

@@ -0,0 +1,402 @@
"use client";
/*
* Side-drawer for inspecting + editing a single style_corpus entry.
*
* Tabs:
* - "פרטים" — show + edit the enriched metadata (decision_number, date,
* subjects, summary, outcome, key_principles, appeal_subtype). Saving
* issues a PATCH /api/training/corpus/{id} and invalidates the list.
* - "תוכן" — read-only full_text view (truncated to 5K with "show more").
* We never let the chair edit full_text from the UI; corrections happen
* by re-uploading via the Upload dialog.
* - "מה למדנו" — per-decision lessons (Phase 4 placeholder for now).
* - "דפוסים" — style_patterns scoped by appeal_subtype.
*
* Why a Sheet, not a Dialog: the drawer needs to coexist with the corpus
* table so the chair can scan multiple decisions without losing context.
* Sheet (side: "left" in RTL = right edge in LTR) gives that without
* stealing the entire viewport.
*/
import { useEffect, useState } from "react";
import { Save, FileText, Tag, Calendar, BookOpen, Loader2 } from "lucide-react";
import { toast } from "sonner";
import {
Sheet, SheetContent, SheetHeader, SheetTitle, SheetDescription,
} from "@/components/ui/sheet";
import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs";
import { Card, CardContent } from "@/components/ui/card";
import { Button } from "@/components/ui/button";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
import { Textarea } from "@/components/ui/textarea";
import { Badge } from "@/components/ui/badge";
import { ScrollArea } from "@/components/ui/scroll-area";
import {
usePatchCorpus,
type CorpusDecision,
type CorpusDecisionPatch,
} from "@/lib/api/training";
import { LessonsTab } from "./lessons-tab";
type Props = {
decision: CorpusDecision | null;
onOpenChange: (open: boolean) => void;
};
export function CorpusDetailDrawer({ decision, onOpenChange }: Props) {
// Local editable state for the "details" tab. Re-seeds whenever the
// selected decision changes so the form reflects the row the chair
// clicked.
const [draft, setDraft] = useState<CorpusDecisionPatch>({});
const patch = usePatchCorpus();
/* eslint-disable react-hooks/set-state-in-effect */
useEffect(() => {
if (!decision) {
setDraft({});
return;
}
setDraft({
decision_number: decision.decision_number,
decision_date: decision.decision_date,
subject_categories: decision.subject_categories,
summary: decision.summary,
outcome: decision.outcome,
key_principles: decision.key_principles,
appeal_subtype: decision.appeal_subtype,
practice_area: decision.practice_area,
});
}, [decision]);
/* eslint-enable react-hooks/set-state-in-effect */
const open = decision !== null;
if (!decision) return null;
// Diff against the originally loaded row — only PATCH fields the chair
// actually changed, so concurrent edits to other fields stay intact.
const diff: CorpusDecisionPatch = {};
if (draft.decision_number !== decision.decision_number)
diff.decision_number = draft.decision_number;
if (draft.decision_date !== decision.decision_date)
diff.decision_date = draft.decision_date;
if (draft.summary !== decision.summary)
diff.summary = draft.summary;
if (draft.outcome !== decision.outcome)
diff.outcome = draft.outcome;
if (draft.appeal_subtype !== decision.appeal_subtype)
diff.appeal_subtype = draft.appeal_subtype;
if (draft.practice_area !== decision.practice_area)
diff.practice_area = draft.practice_area;
if (
JSON.stringify(draft.subject_categories) !==
JSON.stringify(decision.subject_categories)
)
diff.subject_categories = draft.subject_categories;
if (
JSON.stringify(draft.key_principles) !==
JSON.stringify(decision.key_principles)
)
diff.key_principles = draft.key_principles;
const isDirty = Object.keys(diff).length > 0;
const onSave = async () => {
if (!isDirty) return;
try {
await patch.mutateAsync({ id: decision.id, patch: diff });
toast.success("המטא-דאטה עודכן");
} catch (e) {
toast.error(e instanceof Error ? e.message : "כשל בשמירה");
}
};
const setSubjects = (raw: string) =>
setDraft((d) => ({
...d,
subject_categories: raw.split(/[,،]/).map((s) => s.trim()).filter(Boolean),
}));
const setPrinciples = (raw: string) =>
setDraft((d) => ({
...d,
key_principles: raw.split("\n").map((s) => s.trim()).filter(Boolean),
}));
return (
<Sheet open={open} onOpenChange={onOpenChange}>
<SheetContent side="left" className="w-full sm:max-w-3xl overflow-y-auto" dir="rtl">
<SheetHeader>
<SheetTitle className="text-navy flex items-center gap-2">
<BookOpen className="w-4 h-4 shrink-0" />
{decision.legal_citation || decision.decision_number || "—"}
</SheetTitle>
<SheetDescription className="text-ink-muted">
{decision.doc_title || "החלטה בקורפוס הסגנוני"}
</SheetDescription>
</SheetHeader>
{/* Summary strip — fast-scan info, always visible above the tabs. */}
<div className="px-6 mt-3 grid grid-cols-2 md:grid-cols-4 gap-3 text-[0.78rem]">
<DataPoint icon={<Calendar className="w-3 h-3" />} label="תאריך"
value={decision.decision_date || "—"} />
<DataPoint icon={<FileText className="w-3 h-3" />} label="תווים"
value={`${(decision.chars / 1000).toFixed(1)}K`} />
<DataPoint icon={<FileText className="w-3 h-3" />} label="עמודים"
value={decision.page_count > 0 ? String(decision.page_count) : "—"} />
<DataPoint icon={<Tag className="w-3 h-3" />} label="תת-סוג"
value={decision.appeal_subtype || "—"} />
</div>
<div className="px-6 pb-6 mt-4">
<Tabs defaultValue="details" dir="rtl">
<TabsList className="bg-rule-soft/60">
<TabsTrigger value="details">פרטים</TabsTrigger>
<TabsTrigger value="content">תוכן</TabsTrigger>
<TabsTrigger value="lessons">מה למדנו</TabsTrigger>
<TabsTrigger value="patterns">דפוסים</TabsTrigger>
</TabsList>
{/* ── Tab: editable metadata ─────────────────────────── */}
<TabsContent value="details" className="mt-4 space-y-4">
<div className="grid grid-cols-2 gap-3">
<Field label="מספר ההחלטה">
<Input value={draft.decision_number ?? ""}
onChange={(e) => setDraft((d) => ({ ...d, decision_number: e.target.value }))}
dir="rtl" />
</Field>
<Field label="תאריך">
<Input type="date" value={draft.decision_date ?? ""}
onChange={(e) => setDraft((d) => ({ ...d, decision_date: e.target.value }))} />
</Field>
</div>
<Field label="נושאים (מופרדים בפסיקים)">
<Input value={(draft.subject_categories ?? []).join(", ")}
onChange={(e) => setSubjects(e.target.value)} dir="rtl" />
{decision.subject_categories.length > 0 && (
<div className="flex flex-wrap gap-1 mt-1">
{decision.subject_categories.map((s) => (
<Badge key={s} variant="outline"
className="text-[0.7rem] bg-gold-wash text-gold-deep border-gold/40">
{s}
</Badge>
))}
</div>
)}
</Field>
<div className="grid grid-cols-2 gap-3">
<Field label="תת-סוג ערר">
<Input value={draft.appeal_subtype ?? ""}
onChange={(e) => setDraft((d) => ({ ...d, appeal_subtype: e.target.value }))}
placeholder="building_permit / betterment_levy / compensation_197"
dir="rtl" />
</Field>
<Field label="תחום משפט">
<Input value={draft.practice_area ?? ""}
onChange={(e) => setDraft((d) => ({ ...d, practice_area: e.target.value }))}
dir="rtl" />
</Field>
</div>
<Field label="תקציר (summary)">
<Textarea value={draft.summary ?? ""} rows={3}
onChange={(e) => setDraft((d) => ({ ...d, summary: e.target.value }))}
placeholder="תקציר חופשי — מי, מה, איך הוכרע"
dir="rtl" />
</Field>
<Field label="התוצאה (outcome)">
<Textarea value={draft.outcome ?? ""} rows={2}
onChange={(e) => setDraft((d) => ({ ...d, outcome: e.target.value }))}
placeholder="קבלה / קבלה חלקית / דחייה — בקצרה"
dir="rtl" />
</Field>
<Field label="עקרונות מרכזיים (שורה לכל אחד)">
<Textarea value={(draft.key_principles ?? []).join("\n")} rows={4}
onChange={(e) => setPrinciples(e.target.value)}
placeholder={"דוגמה:\nשיקול דעת מוגבל לחריגות קטנות\nריפוי פגם רק בנסיבות חריגות"}
dir="rtl" />
</Field>
{decision.parties.appellant && (
<Card className="bg-rule-soft/40 border-rule">
<CardContent className="px-4 py-3 text-[0.78rem] text-ink-soft">
<p><strong className="text-navy">עורר/ת:</strong> {decision.parties.appellant}</p>
{decision.parties.respondent && (
<p className="mt-1"><strong className="text-navy">משיב/ה:</strong> {decision.parties.respondent}</p>
)}
<p className="mt-2 text-ink-muted text-[0.72rem]">
(חולץ אוטומטית מתחילת הטקסט תקן ע&quot;י עריכת ה-full_text במקור.)
</p>
</CardContent>
</Card>
)}
<div className="flex items-center justify-end gap-2 pt-2 border-t border-rule">
<Button variant="ghost" onClick={() => onOpenChange(false)}>
סגור
</Button>
<Button onClick={onSave} disabled={!isDirty || patch.isPending}
className="bg-navy text-parchment hover:bg-navy-soft">
{patch.isPending ? (
<Loader2 className="w-4 h-4 animate-spin me-1" />
) : (
<Save className="w-4 h-4 me-1" />
)}
שמור שינויים
</Button>
</div>
</TabsContent>
{/* ── Tab: full_text (read-only) ─────────────────────── */}
<TabsContent value="content" className="mt-4">
<Card className="bg-surface border-rule">
<CardContent className="px-4 py-3">
<p className="text-[0.72rem] text-ink-muted mb-2">
{decision.chars.toLocaleString("he-IL")} תווים · קריאה בלבד
</p>
<ScrollArea className="h-[480px] pe-2">
<p className="text-sm text-ink leading-relaxed whitespace-pre-wrap">
<FullTextLazy id={decision.id} />
</p>
</ScrollArea>
</CardContent>
</Card>
</TabsContent>
{/* ── Tab: lessons (per-decision) ────────────────────── */}
<TabsContent value="lessons" className="mt-4">
<LessonsTab corpusId={decision.id} />
</TabsContent>
{/* ── Tab: patterns scoped by appeal_subtype ─────────── */}
<TabsContent value="patterns" className="mt-4">
<PatternsForSubtype subtype={decision.appeal_subtype} />
</TabsContent>
</Tabs>
</div>
</SheetContent>
</Sheet>
);
}
// ── helpers ────────────────────────────────────────────────────────
function DataPoint({
icon, label, value,
}: { icon: React.ReactNode; label: string; value: string }) {
return (
<div className="flex items-center gap-1 text-ink-muted">
{icon}
<span>{label}:</span>
<span className="font-semibold text-navy tabular-nums truncate">{value}</span>
</div>
);
}
function Field({
label, children,
}: { label: string; children: React.ReactNode }) {
return (
<div className="space-y-1">
<Label className="text-[0.78rem]">{label}</Label>
{children}
</div>
);
}
/* The corpus-list endpoint deliberately doesn't return full_text (too big).
* We fetch it on demand only when the content tab opens.
*
* Implementation note: we don't have a dedicated /api/training/corpus/{id}
* GET endpoint yet. As a thin stopgap we hit a planned `/full-text` shortcut
* via apiRequest; if the endpoint isn't deployed yet the UI just shows the
* fallback message instead of crashing. The full-text endpoint lands with
* the next backend deploy.
*/
function FullTextLazy({ id }: { id: string }) {
const [text, setText] = useState<string>("");
const [loading, setLoading] = useState(true);
const [error, setError] = useState("");
/* eslint-disable react-hooks/set-state-in-effect */
useEffect(() => {
let cancelled = false;
setLoading(true);
setError("");
fetch(`/api/training/corpus/${encodeURIComponent(id)}/full-text`)
.then((r) => (r.ok ? r.json() : Promise.reject(new Error(`HTTP ${r.status}`))))
.then((d: { full_text: string }) => {
if (cancelled) return;
setText(d.full_text || "");
})
.catch((e: Error) => {
if (cancelled) return;
setError(e.message);
})
.finally(() => !cancelled && setLoading(false));
return () => { cancelled = true; };
}, [id]);
/* eslint-enable react-hooks/set-state-in-effect */
if (loading) return <span className="text-ink-muted">טוען</span>;
if (error) return <span className="text-ink-muted">לא נמצא ({error})</span>;
return text;
}
function PatternsForSubtype({ subtype }: { subtype: string }) {
// Filtered patterns endpoint isn't built yet — we fall back to /patterns
// and filter client-side. The result is mediocre when many subtypes share
// patterns; better filtering ships in the metadata-enrichment iteration.
const [data, setData] = useState<Record<string, { pattern_text: string; frequency: number }[]> | null>(null);
const [loading, setLoading] = useState(true);
useEffect(() => {
let cancelled = false;
fetch("/api/training/patterns")
.then((r) => r.json())
.then((d: { by_type: Record<string, { pattern_text: string; frequency: number }[]> }) => {
if (!cancelled) setData(d.by_type);
})
.catch(() => !cancelled && setData({}))
.finally(() => !cancelled && setLoading(false));
return () => { cancelled = true; };
}, []);
if (loading) return <p className="text-ink-muted text-sm text-center py-6">טוען</p>;
if (!data || Object.keys(data).length === 0) {
return <p className="text-ink-muted text-sm text-center py-6">אין דפוסים שמורים הרץ ניתוח סגנון.</p>;
}
return (
<div className="space-y-3">
{subtype && (
<p className="text-[0.78rem] text-ink-muted">
דפוסים בכלל הקורפוס. סינון לפי תת-סוג {subtype} ייושם בעדכון הבא.
</p>
)}
{Object.entries(data).slice(0, 4).map(([type, items]) => (
<Card key={type} className="bg-surface border-rule">
<CardContent className="px-4 py-3">
<h4 className="text-[0.78rem] uppercase tracking-wider text-gold-deep font-semibold mb-2">
{type}
</h4>
<ul className="space-y-1 text-sm text-ink">
{items.slice(0, 6).map((p, i) => (
<li key={i} className="flex items-start gap-2">
<span className="text-[0.72rem] tabular-nums text-ink-muted shrink-0 mt-0.5">
×{p.frequency}
</span>
<span>{p.pattern_text}</span>
</li>
))}
</ul>
</CardContent>
</Card>
))}
</div>
);
}