The chair wanted an independent recommendation beside each tag, to reconsider his own judgments. Adds a NON-ground-truth AI second-opinion: - schema: halacha_goldset.ai_is_holding / ai_correct_type / ai_rationale / ai_generated_at (additive). - db.goldset_set_ai_recommendation + goldset_list now returns the ai_* fields. - scripts/goldset_ai_recommend.py — local claude_session judges is_holding + type + a one-line rationale per item, INDEPENDENTLY (own legal rubric). Independent of the rule-based validators #81.8 measures → no circularity. Never auto-applied; QA aid only. - web-ui: each card shows "🤖 המלצת AI: הלכה/לא · type" + rationale and an agreement/disagreement chip vs the human tag (amber on disagree); a "⚠ אי-הסכמות AI (N)" filter to review only the conflicts. Methodology note kept explicit: the human stays the ground truth; the AI is a prompt to reconsider, not to copy. Verified: tsc --noEmit 0; generator stores recs and flags disagreements with existing human tags. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
112 lines
3.2 KiB
TypeScript
112 lines
3.2 KiB
TypeScript
/**
|
|
* Gold-set tagging API (#81.7 / #81.8).
|
|
*
|
|
* The chair/Dafna manually labels a stratified sample of halachot
|
|
* (is_holding / correct_type / quote_complete). Those human labels are the
|
|
* ground truth used to measure the extraction validators and recalibrate the
|
|
* auto-approve threshold. Endpoints under /api/goldset.
|
|
*/
|
|
import { useMutation, useQuery, useQueryClient } from "@tanstack/react-query";
|
|
import { apiRequest } from "./client";
|
|
|
|
export type GoldsetItem = {
|
|
id: string;
|
|
halacha_id: string;
|
|
// human tags (null until tagged)
|
|
is_holding: boolean | null;
|
|
correct_type: string;
|
|
quote_complete: boolean | null;
|
|
tagged_by: string;
|
|
tagged_at: string | null;
|
|
// halacha content + the machine's own labels
|
|
rule_statement: string;
|
|
supporting_quote: string;
|
|
reasoning_summary: string;
|
|
rule_type: string;
|
|
confidence: number | null;
|
|
quality_flags?: string[];
|
|
review_status: string;
|
|
case_number: string | null;
|
|
case_name: string | null;
|
|
source_type: string | null; // 'court_ruling' | 'appeals_committee' | ''
|
|
// AI second-opinion (QA aid — independent, not ground truth, not auto-applied)
|
|
ai_is_holding: boolean | null;
|
|
ai_correct_type: string;
|
|
ai_rationale: string;
|
|
ai_generated_at: string | null;
|
|
};
|
|
|
|
export type GoldsetScore = {
|
|
batch: string;
|
|
total: number;
|
|
labeled: number;
|
|
validators: Record<
|
|
string,
|
|
{ precision: number; recall: number; f1: number; tp: number; fp: number; fn: number; tn: number }
|
|
>;
|
|
};
|
|
|
|
export type GoldsetTag = {
|
|
is_holding?: boolean | null;
|
|
correct_type?: string;
|
|
quote_complete?: boolean | null;
|
|
};
|
|
|
|
const keys = {
|
|
all: ["goldset"] as const,
|
|
list: (batch: string) => ["goldset", "list", batch] as const,
|
|
score: (batch: string) => ["goldset", "score", batch] as const,
|
|
};
|
|
|
|
export function useGoldset(batch = "default") {
|
|
return useQuery({
|
|
queryKey: keys.list(batch),
|
|
queryFn: ({ signal }) =>
|
|
apiRequest<{ items: GoldsetItem[]; batch: string }>(
|
|
`/api/goldset?batch=${encodeURIComponent(batch)}`,
|
|
{ signal },
|
|
),
|
|
staleTime: 5_000,
|
|
refetchOnMount: "always",
|
|
});
|
|
}
|
|
|
|
export function useGoldsetScore(batch = "default") {
|
|
return useQuery({
|
|
queryKey: keys.score(batch),
|
|
queryFn: ({ signal }) =>
|
|
apiRequest<GoldsetScore>(
|
|
`/api/goldset/score?batch=${encodeURIComponent(batch)}`,
|
|
{ signal },
|
|
),
|
|
staleTime: 5_000,
|
|
});
|
|
}
|
|
|
|
export function useTagGoldset(batch = "default") {
|
|
const qc = useQueryClient();
|
|
return useMutation({
|
|
mutationFn: ({ id, tag }: { id: string; tag: GoldsetTag }) =>
|
|
apiRequest<{ ok: boolean }>(`/api/goldset/${encodeURIComponent(id)}`, {
|
|
method: "PATCH",
|
|
body: { ...tag, tagged_by: "chair" },
|
|
}),
|
|
onSuccess: () => {
|
|
qc.invalidateQueries({ queryKey: keys.list(batch) });
|
|
qc.invalidateQueries({ queryKey: keys.score(batch) });
|
|
},
|
|
});
|
|
}
|
|
|
|
export function useCreateGoldsetSample(batch = "default") {
|
|
const qc = useQueryClient();
|
|
return useMutation({
|
|
mutationFn: (n: number) =>
|
|
apiRequest<{ batch: string; inserted: number; total: number }>(
|
|
"/api/goldset/sample",
|
|
{ method: "POST", body: { n, batch } },
|
|
),
|
|
onSuccess: () => qc.invalidateQueries({ queryKey: keys.list(batch) }),
|
|
});
|
|
}
|