Files
legal-ai/web-ui/src/lib/api/training.ts
Chaim bb0cd7c6a2
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 2m7s
feat(training): Style Studio — upload, rich corpus, lessons, curator portrait, chat
Six-phase upgrade of /training from a read-only dashboard into a full
Style Studio for managing Daphna's style corpus.

- Upload Sheet on /training: file → proofread preview → commit (no more
  CLI-only `upload-training` skill).
- Rich corpus metadata: GET /api/training/corpus returns summary, outcome,
  key_principles, page_count, parties (regex), legal_citation, lessons_count.
  PATCH endpoint for chair edits. CorpusDetailDrawer with 4 tabs (details
  /content/lessons/patterns) replaces the bare table row.
- LLM metadata enrichment: style_metadata_extractor + MCP tools
  (style_corpus_enrich, style_corpus_pending_enrichment) fill summary
  /outcome/key_principles via claude_session (free, host-side).
- Per-decision lessons: new decision_lessons table + 4 REST endpoints +
  LessonsTab in drawer; hermes-curator now auto-posts findings as
  decision_lessons(source=curator).
- Curator Portrait tab: prompt rendered with link to Gitea, recent
  curator findings, style_analyzer training prompts, propose-change
  form that writes proposals to data/curator-proposals/ for manual
  chair review (no auto-mutation of the agent file).
- Style chat tab: SSE-streamed conversations with the style agent.
  New host-side pm2 service (legal-chat-service, port 8770) wraps
  claude CLI with stream-json + --resume continuation; FastAPI proxies
  via host.docker.internal. Zero API cost — uses chaim's claude.ai
  subscription. chat_conversations + chat_messages persist history.

Architecture: keeps the existing rule that claude_session only runs
on the host (not the container). The new legal-chat-service is the
canonical bridge between the container and the local CLI for the chat
feature; everything else (upload, metadata, lessons) stays within the
container's existing capabilities.

Audit script (scripts/audit_training_corpus.py) included for verifying
which corpus rows still need enrichment.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-27 10:06:22 +00:00

582 lines
16 KiB
TypeScript

/**
* Training / style corpus hooks.
*
* Endpoints touched (all under /api/training/):
* - GET /style-report → the dashboard payload (corpus stats + anatomy
* + signature phrases + per-decision contribution)
* - GET /corpus → flat list of decisions for the corpus tab / compare tool
* - GET /compare?a=UUID&b=UUID → side-by-side comparison
* - DELETE /corpus/{id} → remove a decision from the corpus
* - POST /api/upload → multipart file → returns sanitized filename
* - POST /analyze → proofread + extract metadata for preview
* - POST /upload → commit a proofread decision to the corpus (task_id)
*/
import { useMutation, useQuery, useQueryClient } from "@tanstack/react-query";
import { ApiError, apiRequest } from "./client";
export type StyleReport = {
corpus: {
decision_count: number;
total_chars: number;
avg_chars: number;
date_range: [string | null, string | null];
decisions: Array<{
number: string;
date: string;
chars: number;
subjects: string[];
}>;
subject_distribution: Array<{ label: string; count: number }>;
headline: string;
};
anatomy: {
sections: Array<{
type: string;
label: string;
avg_chars: number;
pct: number;
coverage: number;
}>;
total_coverage: number;
headline: string;
};
signature_phrases: {
items: Array<{
type: string;
text: string;
context: string;
frequency: number;
examples: string[];
}>;
total_decisions: number;
top_display: string;
headline: string;
};
contribution: {
growth_curve: Array<{
decision_number: string;
date: string;
cumulative: number;
}>;
decision_contributions: unknown[];
total_patterns: number;
headline: string;
};
};
export type CorpusDecision = {
id: string;
decision_number: string;
decision_date: string;
subject_categories: string[];
chars: number;
created_at: string;
// Enriched metadata (added in the corpus-page upgrade).
summary: string;
outcome: string;
key_principles: string[];
appeal_subtype: string;
practice_area: string;
page_count: number;
document_id: string | null;
doc_title: string;
parties: { appellant: string; respondent: string };
legal_citation: string;
lessons_count: number;
};
export type CorpusDecisionPatch = {
decision_number?: string;
decision_date?: string;
subject_categories?: string[];
summary?: string;
outcome?: string;
key_principles?: string[];
appeal_subtype?: string;
practice_area?: string;
};
export type CompareResult = {
a: CompareSide;
b: CompareSide;
shared: PatternEntry[];
only_a: PatternEntry[];
only_b: PatternEntry[];
};
export type CompareSide = {
id: string;
decision_number: string;
decision_date: string;
chars: number;
subjects: string[];
sections: Array<{ type: string; chars: number }>;
patterns_count: number;
};
export type PatternEntry = {
id: string;
type: string;
text: string;
context: string;
};
export const trainingKeys = {
all: ["training"] as const,
report: () => [...trainingKeys.all, "style-report"] as const,
corpus: () => [...trainingKeys.all, "corpus"] as const,
compare: (a: string, b: string) =>
[...trainingKeys.all, "compare", a, b] as const,
};
export function useStyleReport() {
return useQuery({
queryKey: trainingKeys.report(),
queryFn: ({ signal }) =>
apiRequest<StyleReport>("/api/training/style-report", { signal }),
staleTime: 60_000,
});
}
export function useCorpus() {
return useQuery({
queryKey: trainingKeys.corpus(),
queryFn: ({ signal }) =>
apiRequest<CorpusDecision[]>("/api/training/corpus", { signal }),
staleTime: 60_000,
});
}
export function useCompare(a: string | null, b: string | null) {
return useQuery({
queryKey: trainingKeys.compare(a ?? "", b ?? ""),
queryFn: ({ signal }) =>
apiRequest<CompareResult>(
`/api/training/compare?a=${encodeURIComponent(a!)}&b=${encodeURIComponent(b!)}`,
{ signal },
),
enabled: Boolean(a && b && a !== b),
staleTime: Infinity,
});
}
export function useDeleteCorpusEntry() {
const qc = useQueryClient();
return useMutation({
mutationFn: (id: string) =>
apiRequest<{ deleted: boolean }>(
`/api/training/corpus/${encodeURIComponent(id)}`,
{ method: "DELETE" },
),
onSuccess: () => {
qc.invalidateQueries({ queryKey: trainingKeys.corpus() });
qc.invalidateQueries({ queryKey: trainingKeys.report() });
},
});
}
// ── Style-agent chat ─────────────────────────────────────────────
export type ChatConversation = {
id: string;
title: string;
style_corpus_id: string | null;
decision_number: string;
claude_session_id: string | null;
message_count: number;
created_at: string;
last_message_at: string;
};
export type ChatMessage = {
id: string;
role: "user" | "assistant";
content: string;
created_at: string;
};
export type ChatHealth = {
reachable: boolean;
status?: number;
url: string;
error?: string;
};
export const chatKeys = {
conversations: () => [...trainingKeys.all, "chat", "conversations"] as const,
conversation: (id: string) =>
[...trainingKeys.all, "chat", "conversations", id] as const,
health: () => [...trainingKeys.all, "chat", "health"] as const,
};
export function useChatConversations() {
return useQuery({
queryKey: chatKeys.conversations(),
queryFn: ({ signal }) =>
apiRequest<ChatConversation[]>("/api/training/chat/conversations", { signal }),
staleTime: 15_000,
});
}
export function useChatConversation(convId: string | null) {
return useQuery({
queryKey: chatKeys.conversation(convId ?? ""),
queryFn: ({ signal }) =>
apiRequest<{ conversation: ChatConversation; messages: ChatMessage[] }>(
`/api/training/chat/conversations/${encodeURIComponent(convId!)}`,
{ signal },
),
enabled: Boolean(convId),
staleTime: 5_000,
});
}
export function useChatHealth() {
return useQuery({
queryKey: chatKeys.health(),
queryFn: ({ signal }) =>
apiRequest<ChatHealth>("/api/training/chat/health", { signal }),
staleTime: 30_000,
retry: false,
});
}
export function useCreateChat() {
const qc = useQueryClient();
return useMutation({
mutationFn: (body: { title?: string; style_corpus_id?: string | null }) =>
apiRequest<ChatConversation>("/api/training/chat/conversations", {
method: "POST",
body,
}),
onSuccess: () => {
qc.invalidateQueries({ queryKey: chatKeys.conversations() });
},
});
}
export function useDeleteChat() {
const qc = useQueryClient();
return useMutation({
mutationFn: (id: string) =>
apiRequest<{ deleted: boolean }>(
`/api/training/chat/conversations/${encodeURIComponent(id)}`,
{ method: "DELETE" },
),
onSuccess: () => {
qc.invalidateQueries({ queryKey: chatKeys.conversations() });
},
});
}
// ── Curator portrait ──────────────────────────────────────────────
export type CuratorPrompt = {
content: string;
filename: string;
bytes: number;
last_modified: number;
gitea_url: string;
};
export type StyleAnalyzerPrompts = {
analysis_prompt: string;
single_decision_prompt: string;
synthesis_prompt: string;
max_input_tokens: number;
};
export type CuratorFinding = {
id: string;
lesson_text: string;
category: string;
applied_to_skill: boolean;
decision_number: string;
decision_date: string;
created_at: string;
};
export type CuratorStats = {
total_findings: number;
decisions_with_findings: number;
decisions_total: number;
findings_applied: number;
recent_findings: CuratorFinding[];
};
export type CuratorProposalInput = {
title: string;
proposed_change: string;
rationale: string;
};
export type CuratorProposalFile = {
filename: string;
bytes: number;
modified_at: number;
};
export const curatorKeys = {
prompt: () => [...trainingKeys.all, "curator", "prompt"] as const,
analyzerPrompt: () => [...trainingKeys.all, "curator", "analyzer-prompt"] as const,
stats: () => [...trainingKeys.all, "curator", "stats"] as const,
proposals: () => [...trainingKeys.all, "curator", "proposals"] as const,
};
export function useCuratorPrompt() {
return useQuery({
queryKey: curatorKeys.prompt(),
queryFn: ({ signal }) =>
apiRequest<CuratorPrompt>("/api/training/curator/prompt", { signal }),
staleTime: 5 * 60_000,
});
}
export function useStyleAnalyzerPrompts() {
return useQuery({
queryKey: curatorKeys.analyzerPrompt(),
queryFn: ({ signal }) =>
apiRequest<StyleAnalyzerPrompts>(
"/api/training/curator/style-analyzer-prompt",
{ signal },
),
staleTime: 5 * 60_000,
});
}
export function useCuratorStats() {
return useQuery({
queryKey: curatorKeys.stats(),
queryFn: ({ signal }) =>
apiRequest<CuratorStats>("/api/training/curator/stats", { signal }),
staleTime: 60_000,
});
}
export function useCuratorProposals() {
return useQuery({
queryKey: curatorKeys.proposals(),
queryFn: ({ signal }) =>
apiRequest<CuratorProposalFile[]>("/api/training/curator/proposals", { signal }),
staleTime: 30_000,
});
}
export function useSubmitCuratorProposal() {
const qc = useQueryClient();
return useMutation({
mutationFn: (body: CuratorProposalInput) =>
apiRequest<{ saved: boolean; filename: string }>(
"/api/training/curator/proposals",
{ method: "POST", body },
),
onSuccess: () => {
qc.invalidateQueries({ queryKey: curatorKeys.proposals() });
},
});
}
// ── Upload flow ──────────────────────────────────────────────────
// Three-step pipeline:
// 1. useUploadFile → POST /api/upload (multipart) → { filename }
// 2. useAnalyzeFile → POST /api/training/analyze (form) → preview + extracted metadata
// 3. useCommitUpload → POST /api/training/upload (json) → { task_id }
// Track task_id via useProgress() from documents.ts.
export type UploadFileResponse = {
filename: string; // sanitized, time-prefixed name in UPLOAD_DIR
original_name: string;
size: number;
};
export type AnalyzeTrainingResponse = {
filename: string;
clean_text: string;
preview: string;
decision_number: string;
decision_date: string; // ISO YYYY-MM-DD or ""
subject_categories: string[];
stats: Record<string, unknown>;
chars: number;
};
export type CommitTrainingRequest = {
filename: string;
decision_number: string;
decision_date: string; // YYYY-MM-DD or ""
subject_categories: string[];
title?: string;
};
export type CommitTrainingResponse = { task_id: string };
export function useUploadFile() {
return useMutation({
mutationFn: async (file: File): Promise<UploadFileResponse> => {
const fd = new FormData();
fd.append("file", file);
const res = await fetch("/api/upload", { method: "POST", body: fd });
const contentType = res.headers.get("content-type") ?? "";
const parsed = contentType.includes("application/json")
? await res.json().catch(() => null)
: await res.text().catch(() => null);
if (!res.ok) {
throw new ApiError(
typeof parsed === "object" && parsed && "detail" in parsed
? String((parsed as { detail: unknown }).detail)
: `Upload failed with ${res.status}`,
res.status,
parsed,
);
}
return parsed as UploadFileResponse;
},
});
}
export function useAnalyzeTraining() {
return useMutation({
mutationFn: async (filename: string): Promise<AnalyzeTrainingResponse> => {
const fd = new FormData();
fd.append("filename", filename);
const res = await fetch("/api/training/analyze", {
method: "POST",
body: fd,
});
const contentType = res.headers.get("content-type") ?? "";
const parsed = contentType.includes("application/json")
? await res.json().catch(() => null)
: await res.text().catch(() => null);
if (!res.ok) {
throw new ApiError(
typeof parsed === "object" && parsed && "detail" in parsed
? String((parsed as { detail: unknown }).detail)
: `Analyze failed with ${res.status}`,
res.status,
parsed,
);
}
return parsed as AnalyzeTrainingResponse;
},
});
}
// ── Per-decision lessons ─────────────────────────────────────────
export type DecisionLesson = {
id: string;
style_corpus_id: string;
lesson_text: string;
category: "style" | "structure" | "lexicon" | "tabular" | "general";
source: "manual" | "curator" | "chair" | "style_analyzer";
applied_to_skill: boolean;
created_by: string;
created_at: string;
updated_at: string;
};
export type LessonCreate = {
lesson_text: string;
category?: DecisionLesson["category"];
source?: DecisionLesson["source"];
};
export type LessonPatch = {
lesson_text?: string;
category?: DecisionLesson["category"];
applied_to_skill?: boolean;
};
export const lessonsKeys = {
forCorpus: (corpusId: string) =>
[...trainingKeys.all, "lessons", corpusId] as const,
};
export function useCorpusLessons(corpusId: string | null) {
return useQuery({
queryKey: lessonsKeys.forCorpus(corpusId ?? ""),
queryFn: ({ signal }) =>
apiRequest<DecisionLesson[]>(
`/api/training/corpus/${encodeURIComponent(corpusId!)}/lessons`,
{ signal },
),
enabled: Boolean(corpusId),
staleTime: 30_000,
});
}
export function useAddLesson(corpusId: string) {
const qc = useQueryClient();
return useMutation({
mutationFn: (body: LessonCreate) =>
apiRequest<DecisionLesson>(
`/api/training/corpus/${encodeURIComponent(corpusId)}/lessons`,
{ method: "POST", body },
),
onSuccess: () => {
qc.invalidateQueries({ queryKey: lessonsKeys.forCorpus(corpusId) });
// lessons_count on the corpus row is computed server-side, so
// invalidate the list too — otherwise the badge stays stale.
qc.invalidateQueries({ queryKey: trainingKeys.corpus() });
},
});
}
export function usePatchLesson(corpusId: string) {
const qc = useQueryClient();
return useMutation({
mutationFn: ({ id, patch }: { id: string; patch: LessonPatch }) =>
apiRequest<{ updated: boolean }>(
`/api/training/lessons/${encodeURIComponent(id)}`,
{ method: "PATCH", body: patch },
),
onSuccess: () => {
qc.invalidateQueries({ queryKey: lessonsKeys.forCorpus(corpusId) });
},
});
}
export function useDeleteLesson(corpusId: string) {
const qc = useQueryClient();
return useMutation({
mutationFn: (id: string) =>
apiRequest<{ deleted: boolean }>(
`/api/training/lessons/${encodeURIComponent(id)}`,
{ method: "DELETE" },
),
onSuccess: () => {
qc.invalidateQueries({ queryKey: lessonsKeys.forCorpus(corpusId) });
qc.invalidateQueries({ queryKey: trainingKeys.corpus() });
},
});
}
export function usePatchCorpus() {
const qc = useQueryClient();
return useMutation({
mutationFn: ({ id, patch }: { id: string; patch: CorpusDecisionPatch }) =>
apiRequest<{ updated: boolean; id: string }>(
`/api/training/corpus/${encodeURIComponent(id)}`,
{ method: "PATCH", body: patch },
),
onSuccess: () => {
qc.invalidateQueries({ queryKey: trainingKeys.corpus() });
qc.invalidateQueries({ queryKey: trainingKeys.report() });
},
});
}
export function useCommitTrainingUpload() {
// No onSuccess invalidation here — the row only appears after the
// background task finishes. The dialog watches useProgress(task_id)
// and invalidates trainingKeys when status === "completed".
return useMutation({
mutationFn: (body: CommitTrainingRequest) =>
apiRequest<CommitTrainingResponse>("/api/training/upload", {
method: "POST",
body,
}),
});
}