diff --git a/web-ui/src/app/approvals/page.tsx b/web-ui/src/app/approvals/page.tsx index 664d83e..be4dbc4 100644 --- a/web-ui/src/app/approvals/page.tsx +++ b/web-ui/src/app/approvals/page.tsx @@ -14,7 +14,7 @@ import { * מרכז אישורים — דפנה (INV-G10). * * עמוד אחד שמרכז את כל השערים האנושיים הממתינים להכרעת היו"ר: אישור הלכות, - * פסיקה חסרה, הערות שטרם יושמו, תיקים שנכשלו ב-QA, וסקירת gold-set. המטרה: + * פסיקה חסרה, הערות שטרם יושמו, ותיקים שנכשלו ב-QA. המטרה: * שאף פריט הדורש את אישורך לא יישכח. הנתונים נשלפים חי מ-/api/chair/pending. */ const SEVERITY_BADGE: Record = { diff --git a/web-ui/src/app/goldset/page.tsx b/web-ui/src/app/goldset/page.tsx deleted file mode 100644 index 223b83e..0000000 --- a/web-ui/src/app/goldset/page.tsx +++ /dev/null @@ -1,41 +0,0 @@ -"use client"; - -import Link from "next/link"; -import { AppShell } from "@/components/app-shell"; -import { GoldsetPanel } from "@/components/goldset/goldset-panel"; - -/** - * Gold-set tagging page (#81.7 / #81.8). - * - * Interactive review of a stratified halacha sample. The chair/Dafna labels each - * item (is_holding / correct_type / quote_complete); those human labels are the - * ground truth that measures the extraction validators and recalibrates the - * auto-approve threshold. Tags MUST be human — no AI pre-fill (circular bias). - */ -export default function GoldsetPage() { - return ( - -
-
- -

מדגם-זהב לתיוג איכות

-

- מדגם מרובד של הלכות שחולצו. לכל הלכה הכריעו שלוש שאלות — - האם זו הלכה אמיתית, מה הסוג הנכון, - והאם הציטוט שלם. ההכרעות שלכם הן אמת-המידה שמודדת את - דיוק המחלץ ומכיילת את סף-האישור האוטומטי. שיפוט משפטי אנושי בלבד — - לא תיוג-AI (כדי למנוע הטיה מעגלית). -

-
- -
- - -
-
- ); -} diff --git a/web-ui/src/components/app-shell.tsx b/web-ui/src/components/app-shell.tsx index dabf62f..97b2349 100644 --- a/web-ui/src/components/app-shell.tsx +++ b/web-ui/src/components/app-shell.tsx @@ -56,8 +56,7 @@ const KNOWLEDGE_MENUS: NavMenuDef[] = [ { href: "/precedents", label: "ספריית פסיקה" }, { href: "/digests", label: "יומונים" }, { href: "/missing-precedents", label: "פסיקה חסרה" }, - { href: "/graph", label: "מפת הקורפוס", groupLabel: "ניתוח וכיול" }, - { href: "/goldset", label: "מדגם-זהב" }, + { href: "/graph", label: "מפת הקורפוס" }, ], }, { diff --git a/web-ui/src/components/goldset/goldset-panel.tsx b/web-ui/src/components/goldset/goldset-panel.tsx deleted file mode 100644 index cf4bc89..0000000 --- a/web-ui/src/components/goldset/goldset-panel.tsx +++ /dev/null @@ -1,500 +0,0 @@ -"use client"; - -import { useEffect, useMemo, useState } from "react"; -import { Check, X, ChevronDown, ChevronLeft, Info, AlertTriangle } from "lucide-react"; -import { toast } from "sonner"; -import { Button } from "@/components/ui/button"; -import { Badge } from "@/components/ui/badge"; -import { Skeleton } from "@/components/ui/skeleton"; -import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover"; -import { - useGoldset, useGoldsetScore, useTagGoldset, useCreateGoldsetSample, - type GoldsetItem, -} from "@/lib/api/goldset"; -import { AuthorityBadge } from "@/components/precedents/halacha-meta"; - -// rule ROLE only (INV-DM7) — authority (binding/persuasive) is a SEPARATE, -// derived axis, shown read-only and never tagged here. -const TYPES: { value: string; label: string }[] = [ - { value: "holding", label: "מהותי" }, - { value: "interpretive", label: "פרשני" }, - { value: "procedural", label: "פרוצדורלי" }, - { value: "application", label: "יישום" }, - { value: "obiter", label: "אמרת-אגב" }, -]; - -// Consistency between is_holding and the role (#81.7): a real holding is -// holding/interpretive/procedural; a NON-holding is its reason — -// application (fact-bound) or obiter (not decided). Other pairings contradict. -const HOLDING_TYPES = new Set(["holding", "interpretive", "procedural"]); -const NON_HOLDING_TYPES = new Set(["application", "obiter"]); - -function inconsistentTag(it: GoldsetItem): string | null { - if (it.is_holding === null || !it.correct_type) return null; - if (it.is_holding === true && NON_HOLDING_TYPES.has(it.correct_type)) { - return "סימנת \"הלכה\" אך הסוג הוא יישום/אמרת-אגב — אלה דווקא הסיבות שמשהו אינו הלכה."; - } - if (it.is_holding === false && HOLDING_TYPES.has(it.correct_type)) { - return "סימנת \"לא הלכה\" אך הסוג מציין הלכה (מהותי/פרשני/…); ל\"לא\" מתאים יישום או אמרת-אגב."; - } - return null; -} - -const FLAG_LABELS: Record = { - non_decision: "אי-הכרעה", truncated_quote: "ציטוט קטוע", thin_restatement: "ניסוח דק", - quote_unverified: "ציטוט לא מאומת", nli_unsupported: "כלל לא נגזר", application: "יישום", - near_duplicate: "כפילות-קרובה", nevo_preamble_leak: "דליפת רציו", -}; - -function cleanCitation(s: string | null | undefined): string { - if (!s) return "—"; - return s.replace(/[‎‏‪-‮⁦-⁩]/g, "").trim(); -} - -// Source separation (פסקי-דין מול החלטות ועדת-ערר) for convenient tagging. -function sourceLabel(s: string | null): string { - return s === "court_ruling" ? "פסק-דין" - : s === "appeals_committee" ? "ועדת ערר" : "אחר"; -} -const SOURCE_FILTERS: { value: "all" | "court_ruling" | "appeals_committee"; label: string }[] = [ - { value: "all", label: "הכל" }, - { value: "court_ruling", label: "פסקי דין" }, - { value: "appeals_committee", label: "ועדת ערר" }, -]; - -function isTagged(it: GoldsetItem): boolean { - // Fully tagged only when ALL THREE answers are set — otherwise, in - // "hide tagged" mode, a card would vanish the moment is_holding is clicked, - // before correct_type / quote_complete can be set. - return it.is_holding !== null && it.quote_complete !== null && !!it.correct_type; -} - -// The AI second-opinion disagrees with the human tag (on is_holding or type). -function aiDisagrees(it: GoldsetItem): boolean { - if (!it.ai_generated_at) return false; - const holdDiff = it.is_holding !== null && it.ai_is_holding !== null - && it.is_holding !== it.ai_is_holding; - const typeDiff = !!it.correct_type && !!it.ai_correct_type - && it.correct_type !== it.ai_correct_type; - return holdDiff || typeDiff; -} - -// ─── Score panel ────────────────────────────────────────────────────────────── - -function ScorePanel({ batch }: { batch: string }) { - const { data } = useGoldsetScore(batch); - const [open, setOpen] = useState(true); - if (!data || data.labeled === 0) return null; - const rows = Object.entries(data.validators); - // negatives so far (truly "not a holding") = tp+fn of any validator. - const af = data.validators.any_flag; - const negatives = af ? af.tp + af.fn : 0; - return ( -
- - {open && ( -
-

- המדדים מודדים זיהוי "לא-הלכה" (יישום / ציטוט-קטוע / אי-הכרעה...). - {negatives < 10 - ? ` עד כה תויגו רק ${negatives} פריטי "לא הלכה" — המספרים יהפכו משמעותיים ככל שיצטברו עוד (במיוחד מבקט המסומנים).` - : " precision גבוה = מעט אזעקות-שווא; recall גבוה = תופס את רוב ה'לא-הלכה'."} -

- - - - - - - - - - - - {rows.map(([name, v]) => ( - - - - - - - - ))} - -
ולידטורPrecisionRecallF1tp/fp/fn/tn
{name}{v.precision.toFixed(2)}{v.recall.toFixed(2)}{v.f1.toFixed(2)} - {v.tp}/{v.fp}/{v.fn}/{v.tn} -
-
- )} -
- ); -} - -// ─── Rule-type help (info popover) ──────────────────────────────────────────── - -// Role only — "כמה מחייב" (מחייב/משכנע) is the SEPARATE authority axis, derived -// automatically from the court's identity and shown as a read-only badge. -const TYPE_HELP: { label: string; def: string; test: string; example: string }[] = [ - { - label: "מהותי", - def: "העיקרון המהותי שהיה הכרחי להכרעה — ה-ratio האמיתי. בר-הסתמכות מלא.", - test: "מבחן וומבו: הפוך את הכלל — אם התוצאה הייתה משתנה → מהותי.", - example: "נטל ההוכחה בהיטל השבחה מוטל על הוועדה המקומית.", - }, - { - label: "פרשני", - def: "קביעה שמפרשת הוראת-חוק / מונח / תכנית (מה המשמעות של סעיף X).", - test: "עונה ל'מה פירוש הנורמה?' ולא ל'מה הדין?'.", - example: "תכלית הפטור לפי ס' 19(ב)(4) היא לעודד פעילות ציבורית.", - }, - { - label: "פרוצדורלי", - def: "כלל סדר-דין: מועדים, סמכות, זכות-עמידה, מיצוי הליכים, נטל.", - test: "עוסק ב'איך' מתנהל ההליך, לא במהות התכנונית.", - example: "המועד להגשת ערר הוא 30 יום.", - }, - { - label: "יישום", - def: "החלת כלל על עובדות התיק הספציפי — תלוי-עובדות, לא בר-הכללה (לרוב 'לא הלכה').", - test: "מכיל 'במקרה דנן', שמות-צדדים, סכומים, המבנה הקונקרטי.", - example: "במקרה דנן ההיתר בטל כי השומה שגתה ב-12,000 ₪.", - }, - { - label: "אמרת-אגב", - def: "נאמר אגב אורחא, לא הכרחי להכרעה; הערכאה לא הכריעה בו.", - test: "מבחן וומבו הפוך: היפוך הכלל לא משנה את התוצאה. דגלים: 'למעלה מן הצורך', 'מבלי לקבוע מסמרות'.", - example: "אף שאיננו נדרשים להכריע, נעיר כי ייתכן ש...", - }, -]; - -function RuleTypeHelp() { - return ( - - - - - -
-

סוגי ההלכה — במה הם נבדלים

-

- כלל-אצבע: סימנת “הלכה” → לרוב מהותי / פרשני / פרוצדורלי. סימנת “לא” → לרוב יישום / אמרת-אגב. -

-
-
    - {TYPE_HELP.map((t) => ( -
  • -
    {t.label}
    -
    {t.def}
    -
    מבחן: {t.test}
    -
    דוגמה: {t.example}
    -
  • - ))} -
-
-
- ); -} - -// ─── Tag card ───────────────────────────────────────────────────────────────── - -function TagCard({ - it, focused, onTag, -}: { - it: GoldsetItem; - focused: boolean; - onTag: (tag: { is_holding?: boolean; correct_type?: string; quote_complete?: boolean }) => void; -}) { - const tagged = isTagged(it); - return ( -
-
- {cleanCitation(it.case_number)} - - {sourceLabel(it.source_type)} - - מכונה: {it.rule_type} - - {it.confidence != null && ( - ביטחון {it.confidence.toFixed(2)} - )} - {(it.quality_flags ?? []).map((f) => ( - - {FLAG_LABELS[f] ?? f} - - ))} - {tagged && ( - - תויג - - )} -
- -

{it.rule_statement}

-
- “{it.supporting_quote}” -
- - {it.ai_generated_at && (() => { - const aiType = TYPES.find((t) => t.value === it.ai_correct_type)?.label ?? it.ai_correct_type; - const holdDisagree = it.is_holding !== null && it.ai_is_holding !== null - && it.is_holding !== it.ai_is_holding; - const typeDisagree = !!it.correct_type && !!it.ai_correct_type - && it.correct_type !== it.ai_correct_type; - const anyTag = it.is_holding !== null || !!it.correct_type; - return ( -
-
- 🤖 המלצת AI: - {it.ai_is_holding ? "הלכה" : "לא הלכה"} - {aiType && · {aiType}} - {anyTag && ( - - {holdDisagree ? "⚠ חולק על 'הלכה/לא'" - : typeDisagree ? "⚠ חולק על הסוג" - : "✓ מסכים איתך"} - - )} -
- {it.ai_rationale &&
{it.ai_rationale}
} -
- ); - })()} - -
- {/* is_holding */} -
-
האם זו הלכה אמיתית?
-
- - -
-
- {/* correct_type */} -
-
- הסוג הנכון - -
-
- {TYPES.map((t) => ( - - ))} -
- {inconsistentTag(it) && ( -

- - {inconsistentTag(it)} -

- )} -
- {/* quote_complete */} -
-
הציטוט שלם?
-
- - -
-
-
-
- ); -} - -// ─── Main panel ─────────────────────────────────────────────────────────────── - -export function GoldsetPanel() { - const batch = "default"; - const { data, isPending, error } = useGoldset(batch); - const tag = useTagGoldset(batch); - const createSample = useCreateGoldsetSample(batch); - const [focusedId, setFocusedId] = useState(null); - // Single mutually-exclusive view mode — can't get "stuck" like the old - // independent toggles (where the disagree filter hid the untagged items). - const [viewMode, setViewMode] = - useState<"all" | "untagged" | "tagged" | "disagree">("all"); - const [sourceFilter, setSourceFilter] = - useState<"all" | "court_ruling" | "appeals_committee">("all"); - - const items = useMemo(() => data?.items ?? [], [data]); - const taggedCount = items.filter(isTagged).length; - const untaggedCount = items.length - taggedCount; - const disagreeCount = items.filter(aiDisagrees).length; - const sourceCounts = useMemo(() => ({ - court_ruling: items.filter((i) => i.source_type === "court_ruling").length, - appeals_committee: items.filter((i) => i.source_type === "appeals_committee").length, - }), [items]); - const visible = useMemo(() => { - let v = items; - if (sourceFilter !== "all") v = v.filter((i) => i.source_type === sourceFilter); - if (viewMode === "untagged") v = v.filter((i) => !isTagged(i)); - else if (viewMode === "tagged") v = v.filter(isTagged); - else if (viewMode === "disagree") v = v.filter(aiDisagrees); - // group-sort: כל פסקי-הדין יחד, ואז כל החלטות ועדת-הערר (הפרדה ברורה). - const order = (s: string | null) => - s === "court_ruling" ? 0 : s === "appeals_committee" ? 1 : 2; - return [...v].sort((a, b) => order(a.source_type) - order(b.source_type)); - }, [items, viewMode, sourceFilter]); - - const focused = focusedId ? visible.find((i) => i.id === focusedId) ?? null : null; - - useEffect(() => { - if (focusedId && visible.some((i) => i.id === focusedId)) return; - setFocusedId(visible[0]?.id ?? null); - }, [focusedId, visible]); - - useEffect(() => { - if (!focusedId) return; - document.querySelector(`[data-goldset-id="${focusedId}"]`) - ?.scrollIntoView({ block: "nearest", behavior: "smooth" }); - }, [focusedId]); - - const move = (delta: 1 | -1) => { - if (!visible.length) return; - const idx = focusedId ? visible.findIndex((i) => i.id === focusedId) : -1; - const next = idx < 0 ? (delta > 0 ? 0 : visible.length - 1) - : Math.max(0, Math.min(visible.length - 1, idx + delta)); - setFocusedId(visible[next].id); - }; - - const doTag = async ( - it: GoldsetItem, - t: { is_holding?: boolean; correct_type?: string; quote_complete?: boolean }, - ) => { - try { - await tag.mutateAsync({ id: it.id, tag: t }); - } catch (e) { - toast.error(e instanceof Error ? e.message : "שגיאה"); - } - }; - - useEffect(() => { - const onKey = (e: KeyboardEvent) => { - const tagName = (e.target as HTMLElement)?.tagName?.toLowerCase(); - if (tagName === "input" || tagName === "textarea" || tagName === "select") return; - if (e.key === "j") { e.preventDefault(); move(1); } - else if (e.key === "k") { e.preventDefault(); move(-1); } - else if (focused && (e.key === "h" || e.key === "H")) { e.preventDefault(); doTag(focused, { is_holding: true }); } - else if (focused && (e.key === "n" || e.key === "N")) { e.preventDefault(); doTag(focused, { is_holding: false }); } - else if (focused && (e.key === "c" || e.key === "C")) { e.preventDefault(); doTag(focused, { quote_complete: true }); } - else if (focused && (e.key === "x" || e.key === "X")) { e.preventDefault(); doTag(focused, { quote_complete: false }); } - }; - window.addEventListener("keydown", onKey); - return () => window.removeEventListener("keydown", onKey); - // eslint-disable-next-line react-hooks/exhaustive-deps - }, [focused, visible]); - - if (error) { - return
{error.message}
; - } - if (isPending) { - return
{[...Array(3)].map((_, i) => )}
; - } - if (!items.length) { - return ( -
-

אין מדגם-זהב עדיין.

- -
- ); - } - - const pct = items.length ? Math.round((taggedCount / items.length) * 100) : 0; - - return ( -
- - - {/* source separation — פסקי-דין מול החלטות ועדת-ערר */} -
- {SOURCE_FILTERS.map((s) => ( - - ))} -
- -
- {taggedCount}/{items.length} תויגו -
-
-
- - מקלדת: J/K - {" "}· הלכה H / לא N - {" "}· ציטוט שלם C / קטוע X - -
- {([ - { v: "all", label: `הכל (${items.length})` }, - { v: "untagged", label: `לא תויגו (${untaggedCount})` }, - { v: "tagged", label: `תויגו (${taggedCount})` }, - { v: "disagree", label: `⚠ אי-הסכמות (${disagreeCount})` }, - ] as const).map((m) => ( - - ))} -
-
- -
- {visible.map((it) => ( - doTag(it, t)} /> - ))} -
-
- ); -} diff --git a/web-ui/src/lib/api/chair.ts b/web-ui/src/lib/api/chair.ts index ee10ccc..be3e343 100644 --- a/web-ui/src/lib/api/chair.ts +++ b/web-ui/src/lib/api/chair.ts @@ -3,8 +3,8 @@ import { apiRequest } from "./client"; /** * Chair approval center (INV-G10) — aggregates every pending human-gate item - * (halacha approvals, missing precedents, unapplied feedback, QA-failed cases, - * gold-set review) so nothing Dafna must approve is forgotten. + * (halacha approvals, missing precedents, unapplied feedback, QA-failed cases) + * so nothing Dafna must approve is forgotten. * * Hand-typed (not from the generated types.ts) because /api/chair/pending is a * new endpoint; switch to the generated type after the next `npm run api:types`. diff --git a/web-ui/src/lib/api/goldset.ts b/web-ui/src/lib/api/goldset.ts deleted file mode 100644 index a35baf8..0000000 --- a/web-ui/src/lib/api/goldset.ts +++ /dev/null @@ -1,113 +0,0 @@ -/** - * Gold-set tagging API (#81.7 / #81.8). - * - * The chair/Dafna manually labels a stratified sample of halachot - * (is_holding / correct_type / quote_complete). Those human labels are the - * ground truth used to measure the extraction validators and recalibrate the - * auto-approve threshold. Endpoints under /api/goldset. - */ -import { useMutation, useQuery, useQueryClient } from "@tanstack/react-query"; -import { apiRequest } from "./client"; - -export type GoldsetItem = { - id: string; - halacha_id: string; - // human tags (null until tagged) - is_holding: boolean | null; - correct_type: string; - quote_complete: boolean | null; - tagged_by: string; - tagged_at: string | null; - // halacha content + the machine's own labels - rule_statement: string; - supporting_quote: string; - reasoning_summary: string; - rule_type: string; - // authority over the committee — DERIVED from the source (INV-DM7), read-only. - authority?: "binding" | "persuasive" | null; - confidence: number | null; - quality_flags?: string[]; - review_status: string; - case_number: string | null; - case_name: string | null; - source_type: string | null; // 'court_ruling' | 'appeals_committee' | '' - // AI second-opinion (QA aid — independent, not ground truth, not auto-applied) - ai_is_holding: boolean | null; - ai_correct_type: string; - ai_rationale: string; - ai_generated_at: string | null; -}; - -export type GoldsetScore = { - batch: string; - total: number; - labeled: number; - validators: Record< - string, - { precision: number; recall: number; f1: number; tp: number; fp: number; fn: number; tn: number } - >; -}; - -export type GoldsetTag = { - is_holding?: boolean | null; - correct_type?: string; - quote_complete?: boolean | null; -}; - -const keys = { - all: ["goldset"] as const, - list: (batch: string) => ["goldset", "list", batch] as const, - score: (batch: string) => ["goldset", "score", batch] as const, -}; - -export function useGoldset(batch = "default") { - return useQuery({ - queryKey: keys.list(batch), - queryFn: ({ signal }) => - apiRequest<{ items: GoldsetItem[]; batch: string }>( - `/api/goldset?batch=${encodeURIComponent(batch)}`, - { signal }, - ), - staleTime: 5_000, - refetchOnMount: "always", - }); -} - -export function useGoldsetScore(batch = "default") { - return useQuery({ - queryKey: keys.score(batch), - queryFn: ({ signal }) => - apiRequest( - `/api/goldset/score?batch=${encodeURIComponent(batch)}`, - { signal }, - ), - staleTime: 5_000, - }); -} - -export function useTagGoldset(batch = "default") { - const qc = useQueryClient(); - return useMutation({ - mutationFn: ({ id, tag }: { id: string; tag: GoldsetTag }) => - apiRequest<{ ok: boolean }>(`/api/goldset/${encodeURIComponent(id)}`, { - method: "PATCH", - body: { ...tag, tagged_by: "chair" }, - }), - onSuccess: () => { - qc.invalidateQueries({ queryKey: keys.list(batch) }); - qc.invalidateQueries({ queryKey: keys.score(batch) }); - }, - }); -} - -export function useCreateGoldsetSample(batch = "default") { - const qc = useQueryClient(); - return useMutation({ - mutationFn: (n: number) => - apiRequest<{ batch: string; inserted: number; total: number }>( - "/api/goldset/sample", - { method: "POST", body: { n, batch } }, - ), - onSuccess: () => qc.invalidateQueries({ queryKey: keys.list(batch) }), - }); -}