feat(training): Style Studio — upload, rich corpus, lessons, curator portrait, chat

Six-phase upgrade of /training from a read-only dashboard into a full Style Studio for managing Daphna's style corpus. - Upload Sheet on /training: file → proofread preview → commit (no more CLI-only `upload-training` skill). - Rich corpus metadata: GET /api/training/corpus returns summary, outcome, key_principles, page_count, parties (regex), legal_citation, lessons_count. PATCH endpoint for chair edits. CorpusDetailDrawer with 4 tabs (details /content/lessons/patterns) replaces the bare table row. - LLM metadata enrichment: style_metadata_extractor + MCP tools (style_corpus_enrich, style_corpus_pending_enrichment) fill summary /outcome/key_principles via claude_session (free, host-side). - Per-decision lessons: new decision_lessons table + 4 REST endpoints + LessonsTab in drawer; hermes-curator now auto-posts findings as decision_lessons(source=curator). - Curator Portrait tab: prompt rendered with link to Gitea, recent curator findings, style_analyzer training prompts, propose-change form that writes proposals to data/curator-proposals/ for manual chair review (no auto-mutation of the agent file). - Style chat tab: SSE-streamed conversations with the style agent. New host-side pm2 service (legal-chat-service, port 8770) wraps claude CLI with stream-json + --resume continuation; FastAPI proxies via host.docker.internal. Zero API cost — uses chaim's claude.ai subscription. chat_conversations + chat_messages persist history. Architecture: keeps the existing rule that claude_session only runs on the host (not the container). The new legal-chat-service is the canonical bridge between the container and the local CLI for the chat feature; everything else (upload, metadata, lessons) stays within the container's existing capabilities. Audit script (scripts/audit_training_corpus.py) included for verifying which corpus rows still need enrichment. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-27 10:06:22 +00:00
parent 0629f19d5f
commit bb0cd7c6a2
23 changed files with 4568 additions and 75 deletions
--- a/web-ui/src/lib/api/training.ts
+++ b/web-ui/src/lib/api/training.ts
@@ -7,10 +7,13 @@
 *   - GET /corpus → flat list of decisions for the corpus tab / compare tool
 *   - GET /compare?a=UUID&b=UUID → side-by-side comparison
 *   - DELETE /corpus/{id} → remove a decision from the corpus
+ *   - POST /api/upload → multipart file → returns sanitized filename
+ *   - POST /analyze → proofread + extract metadata for preview
+ *   - POST /upload → commit a proofread decision to the corpus (task_id)
 */

 import { useMutation, useQuery, useQueryClient } from "@tanstack/react-query";
-import { apiRequest } from "./client";
+import { ApiError, apiRequest } from "./client";

 export type StyleReport = {
  corpus: {
@@ -69,6 +72,29 @@ export type CorpusDecision = {
  subject_categories: string[];
  chars: number;
  created_at: string;
+  // Enriched metadata (added in the corpus-page upgrade).
+  summary: string;
+  outcome: string;
+  key_principles: string[];
+  appeal_subtype: string;
+  practice_area: string;
+  page_count: number;
+  document_id: string | null;
+  doc_title: string;
+  parties: { appellant: string; respondent: string };
+  legal_citation: string;
+  lessons_count: number;
+};
+
+export type CorpusDecisionPatch = {
+  decision_number?: string;
+  decision_date?: string;
+  subject_categories?: string[];
+  summary?: string;
+  outcome?: string;
+  key_principles?: string[];
+  appeal_subtype?: string;
+  practice_area?: string;
 };

 export type CompareResult = {
@@ -149,3 +175,407 @@ export function useDeleteCorpusEntry() {
    },
  });
 }
+
+// ── Style-agent chat ─────────────────────────────────────────────
+
+export type ChatConversation = {
+  id: string;
+  title: string;
+  style_corpus_id: string | null;
+  decision_number: string;
+  claude_session_id: string | null;
+  message_count: number;
+  created_at: string;
+  last_message_at: string;
+};
+
+export type ChatMessage = {
+  id: string;
+  role: "user" | "assistant";
+  content: string;
+  created_at: string;
+};
+
+export type ChatHealth = {
+  reachable: boolean;
+  status?: number;
+  url: string;
+  error?: string;
+};
+
+export const chatKeys = {
+  conversations: () => [...trainingKeys.all, "chat", "conversations"] as const,
+  conversation: (id: string) =>
+    [...trainingKeys.all, "chat", "conversations", id] as const,
+  health: () => [...trainingKeys.all, "chat", "health"] as const,
+};
+
+export function useChatConversations() {
+  return useQuery({
+    queryKey: chatKeys.conversations(),
+    queryFn: ({ signal }) =>
+      apiRequest<ChatConversation[]>("/api/training/chat/conversations", { signal }),
+    staleTime: 15_000,
+  });
+}
+
+export function useChatConversation(convId: string | null) {
+  return useQuery({
+    queryKey: chatKeys.conversation(convId ?? ""),
+    queryFn: ({ signal }) =>
+      apiRequest<{ conversation: ChatConversation; messages: ChatMessage[] }>(
+        `/api/training/chat/conversations/${encodeURIComponent(convId!)}`,
+        { signal },
+      ),
+    enabled: Boolean(convId),
+    staleTime: 5_000,
+  });
+}
+
+export function useChatHealth() {
+  return useQuery({
+    queryKey: chatKeys.health(),
+    queryFn: ({ signal }) =>
+      apiRequest<ChatHealth>("/api/training/chat/health", { signal }),
+    staleTime: 30_000,
+    retry: false,
+  });
+}
+
+export function useCreateChat() {
+  const qc = useQueryClient();
+  return useMutation({
+    mutationFn: (body: { title?: string; style_corpus_id?: string | null }) =>
+      apiRequest<ChatConversation>("/api/training/chat/conversations", {
+        method: "POST",
+        body,
+      }),
+    onSuccess: () => {
+      qc.invalidateQueries({ queryKey: chatKeys.conversations() });
+    },
+  });
+}
+
+export function useDeleteChat() {
+  const qc = useQueryClient();
+  return useMutation({
+    mutationFn: (id: string) =>
+      apiRequest<{ deleted: boolean }>(
+        `/api/training/chat/conversations/${encodeURIComponent(id)}`,
+        { method: "DELETE" },
+      ),
+    onSuccess: () => {
+      qc.invalidateQueries({ queryKey: chatKeys.conversations() });
+    },
+  });
+}
+
+// ── Curator portrait ──────────────────────────────────────────────
+
+export type CuratorPrompt = {
+  content: string;
+  filename: string;
+  bytes: number;
+  last_modified: number;
+  gitea_url: string;
+};
+
+export type StyleAnalyzerPrompts = {
+  analysis_prompt: string;
+  single_decision_prompt: string;
+  synthesis_prompt: string;
+  max_input_tokens: number;
+};
+
+export type CuratorFinding = {
+  id: string;
+  lesson_text: string;
+  category: string;
+  applied_to_skill: boolean;
+  decision_number: string;
+  decision_date: string;
+  created_at: string;
+};
+
+export type CuratorStats = {
+  total_findings: number;
+  decisions_with_findings: number;
+  decisions_total: number;
+  findings_applied: number;
+  recent_findings: CuratorFinding[];
+};
+
+export type CuratorProposalInput = {
+  title: string;
+  proposed_change: string;
+  rationale: string;
+};
+
+export type CuratorProposalFile = {
+  filename: string;
+  bytes: number;
+  modified_at: number;
+};
+
+export const curatorKeys = {
+  prompt: () => [...trainingKeys.all, "curator", "prompt"] as const,
+  analyzerPrompt: () => [...trainingKeys.all, "curator", "analyzer-prompt"] as const,
+  stats: () => [...trainingKeys.all, "curator", "stats"] as const,
+  proposals: () => [...trainingKeys.all, "curator", "proposals"] as const,
+};
+
+export function useCuratorPrompt() {
+  return useQuery({
+    queryKey: curatorKeys.prompt(),
+    queryFn: ({ signal }) =>
+      apiRequest<CuratorPrompt>("/api/training/curator/prompt", { signal }),
+    staleTime: 5 * 60_000,
+  });
+}
+
+export function useStyleAnalyzerPrompts() {
+  return useQuery({
+    queryKey: curatorKeys.analyzerPrompt(),
+    queryFn: ({ signal }) =>
+      apiRequest<StyleAnalyzerPrompts>(
+        "/api/training/curator/style-analyzer-prompt",
+        { signal },
+      ),
+    staleTime: 5 * 60_000,
+  });
+}
+
+export function useCuratorStats() {
+  return useQuery({
+    queryKey: curatorKeys.stats(),
+    queryFn: ({ signal }) =>
+      apiRequest<CuratorStats>("/api/training/curator/stats", { signal }),
+    staleTime: 60_000,
+  });
+}
+
+export function useCuratorProposals() {
+  return useQuery({
+    queryKey: curatorKeys.proposals(),
+    queryFn: ({ signal }) =>
+      apiRequest<CuratorProposalFile[]>("/api/training/curator/proposals", { signal }),
+    staleTime: 30_000,
+  });
+}
+
+export function useSubmitCuratorProposal() {
+  const qc = useQueryClient();
+  return useMutation({
+    mutationFn: (body: CuratorProposalInput) =>
+      apiRequest<{ saved: boolean; filename: string }>(
+        "/api/training/curator/proposals",
+        { method: "POST", body },
+      ),
+    onSuccess: () => {
+      qc.invalidateQueries({ queryKey: curatorKeys.proposals() });
+    },
+  });
+}
+
+// ── Upload flow ──────────────────────────────────────────────────
+// Three-step pipeline:
+//   1. useUploadFile   → POST /api/upload (multipart)        → { filename }
+//   2. useAnalyzeFile  → POST /api/training/analyze (form)   → preview + extracted metadata
+//   3. useCommitUpload → POST /api/training/upload (json)    → { task_id }
+//      Track task_id via useProgress() from documents.ts.
+
+export type UploadFileResponse = {
+  filename: string;       // sanitized, time-prefixed name in UPLOAD_DIR
+  original_name: string;
+  size: number;
+};
+
+export type AnalyzeTrainingResponse = {
+  filename: string;
+  clean_text: string;
+  preview: string;
+  decision_number: string;
+  decision_date: string;        // ISO YYYY-MM-DD or ""
+  subject_categories: string[];
+  stats: Record<string, unknown>;
+  chars: number;
+};
+
+export type CommitTrainingRequest = {
+  filename: string;
+  decision_number: string;
+  decision_date: string;        // YYYY-MM-DD or ""
+  subject_categories: string[];
+  title?: string;
+};
+
+export type CommitTrainingResponse = { task_id: string };
+
+export function useUploadFile() {
+  return useMutation({
+    mutationFn: async (file: File): Promise<UploadFileResponse> => {
+      const fd = new FormData();
+      fd.append("file", file);
+      const res = await fetch("/api/upload", { method: "POST", body: fd });
+      const contentType = res.headers.get("content-type") ?? "";
+      const parsed = contentType.includes("application/json")
+        ? await res.json().catch(() => null)
+        : await res.text().catch(() => null);
+      if (!res.ok) {
+        throw new ApiError(
+          typeof parsed === "object" && parsed && "detail" in parsed
+            ? String((parsed as { detail: unknown }).detail)
+            : `Upload failed with ${res.status}`,
+          res.status,
+          parsed,
+        );
+      }
+      return parsed as UploadFileResponse;
+    },
+  });
+}
+
+export function useAnalyzeTraining() {
+  return useMutation({
+    mutationFn: async (filename: string): Promise<AnalyzeTrainingResponse> => {
+      const fd = new FormData();
+      fd.append("filename", filename);
+      const res = await fetch("/api/training/analyze", {
+        method: "POST",
+        body: fd,
+      });
+      const contentType = res.headers.get("content-type") ?? "";
+      const parsed = contentType.includes("application/json")
+        ? await res.json().catch(() => null)
+        : await res.text().catch(() => null);
+      if (!res.ok) {
+        throw new ApiError(
+          typeof parsed === "object" && parsed && "detail" in parsed
+            ? String((parsed as { detail: unknown }).detail)
+            : `Analyze failed with ${res.status}`,
+          res.status,
+          parsed,
+        );
+      }
+      return parsed as AnalyzeTrainingResponse;
+    },
+  });
+}
+
+// ── Per-decision lessons ─────────────────────────────────────────
+
+export type DecisionLesson = {
+  id: string;
+  style_corpus_id: string;
+  lesson_text: string;
+  category: "style" | "structure" | "lexicon" | "tabular" | "general";
+  source: "manual" | "curator" | "chair" | "style_analyzer";
+  applied_to_skill: boolean;
+  created_by: string;
+  created_at: string;
+  updated_at: string;
+};
+
+export type LessonCreate = {
+  lesson_text: string;
+  category?: DecisionLesson["category"];
+  source?: DecisionLesson["source"];
+};
+
+export type LessonPatch = {
+  lesson_text?: string;
+  category?: DecisionLesson["category"];
+  applied_to_skill?: boolean;
+};
+
+export const lessonsKeys = {
+  forCorpus: (corpusId: string) =>
+    [...trainingKeys.all, "lessons", corpusId] as const,
+};
+
+export function useCorpusLessons(corpusId: string | null) {
+  return useQuery({
+    queryKey: lessonsKeys.forCorpus(corpusId ?? ""),
+    queryFn: ({ signal }) =>
+      apiRequest<DecisionLesson[]>(
+        `/api/training/corpus/${encodeURIComponent(corpusId!)}/lessons`,
+        { signal },
+      ),
+    enabled: Boolean(corpusId),
+    staleTime: 30_000,
+  });
+}
+
+export function useAddLesson(corpusId: string) {
+  const qc = useQueryClient();
+  return useMutation({
+    mutationFn: (body: LessonCreate) =>
+      apiRequest<DecisionLesson>(
+        `/api/training/corpus/${encodeURIComponent(corpusId)}/lessons`,
+        { method: "POST", body },
+      ),
+    onSuccess: () => {
+      qc.invalidateQueries({ queryKey: lessonsKeys.forCorpus(corpusId) });
+      // lessons_count on the corpus row is computed server-side, so
+      // invalidate the list too — otherwise the badge stays stale.
+      qc.invalidateQueries({ queryKey: trainingKeys.corpus() });
+    },
+  });
+}
+
+export function usePatchLesson(corpusId: string) {
+  const qc = useQueryClient();
+  return useMutation({
+    mutationFn: ({ id, patch }: { id: string; patch: LessonPatch }) =>
+      apiRequest<{ updated: boolean }>(
+        `/api/training/lessons/${encodeURIComponent(id)}`,
+        { method: "PATCH", body: patch },
+      ),
+    onSuccess: () => {
+      qc.invalidateQueries({ queryKey: lessonsKeys.forCorpus(corpusId) });
+    },
+  });
+}
+
+export function useDeleteLesson(corpusId: string) {
+  const qc = useQueryClient();
+  return useMutation({
+    mutationFn: (id: string) =>
+      apiRequest<{ deleted: boolean }>(
+        `/api/training/lessons/${encodeURIComponent(id)}`,
+        { method: "DELETE" },
+      ),
+    onSuccess: () => {
+      qc.invalidateQueries({ queryKey: lessonsKeys.forCorpus(corpusId) });
+      qc.invalidateQueries({ queryKey: trainingKeys.corpus() });
+    },
+  });
+}
+
+export function usePatchCorpus() {
+  const qc = useQueryClient();
+  return useMutation({
+    mutationFn: ({ id, patch }: { id: string; patch: CorpusDecisionPatch }) =>
+      apiRequest<{ updated: boolean; id: string }>(
+        `/api/training/corpus/${encodeURIComponent(id)}`,
+        { method: "PATCH", body: patch },
+      ),
+    onSuccess: () => {
+      qc.invalidateQueries({ queryKey: trainingKeys.corpus() });
+      qc.invalidateQueries({ queryKey: trainingKeys.report() });
+    },
+  });
+}
+
+export function useCommitTrainingUpload() {
+  // No onSuccess invalidation here — the row only appears after the
+  // background task finishes. The dialog watches useProgress(task_id)
+  // and invalidates trainingKeys when status === "completed".
+  return useMutation({
+    mutationFn: (body: CommitTrainingRequest) =>
+      apiRequest<CommitTrainingResponse>("/api/training/upload", {
+        method: "POST",
+        body,
+      }),
+  });
+}