Merge pull request 'feat(goldset): interactive gold-set tagging page (#81.7/#81.8)' (#101) from worktree-goldset-tagging-ui into main
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m25s
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m25s
This commit was merged in pull request #101.
This commit is contained in:
@@ -1256,6 +1256,27 @@ CREATE INDEX IF NOT EXISTS idx_equiv_halacha_a ON equivalent_halachot(halacha_a)
|
|||||||
CREATE INDEX IF NOT EXISTS idx_equiv_halacha_b ON equivalent_halachot(halacha_b);
|
CREATE INDEX IF NOT EXISTS idx_equiv_halacha_b ON equivalent_halachot(halacha_b);
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
SCHEMA_V29_SQL = """
|
||||||
|
-- halacha_goldset (#81.7/#81.8): a human-tagged evaluation set. A stratified
|
||||||
|
-- sample of halachot the chair/Dafna labels (is_holding / correct_type /
|
||||||
|
-- quote_complete) so we can measure the extraction validators' precision/recall
|
||||||
|
-- and recalibrate the auto-approve threshold. The tags are the ground truth —
|
||||||
|
-- they MUST be human (no AI pre-fill) to avoid circular bias.
|
||||||
|
CREATE TABLE IF NOT EXISTS halacha_goldset (
|
||||||
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||||
|
halacha_id UUID NOT NULL REFERENCES halachot(id) ON DELETE CASCADE,
|
||||||
|
batch TEXT NOT NULL DEFAULT 'default',
|
||||||
|
is_holding BOOLEAN, -- NULL until tagged
|
||||||
|
correct_type TEXT DEFAULT '', -- binding | interpretive | obiter | application | ''
|
||||||
|
quote_complete BOOLEAN,
|
||||||
|
tagged_by TEXT DEFAULT '',
|
||||||
|
tagged_at TIMESTAMPTZ,
|
||||||
|
created_at TIMESTAMPTZ DEFAULT now(),
|
||||||
|
UNIQUE (halacha_id, batch)
|
||||||
|
);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_goldset_batch ON halacha_goldset(batch);
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
async def _run_schema_migrations(pool: asyncpg.Pool) -> None:
|
async def _run_schema_migrations(pool: asyncpg.Pool) -> None:
|
||||||
async with pool.acquire() as conn:
|
async with pool.acquire() as conn:
|
||||||
@@ -1288,7 +1309,8 @@ async def _run_schema_migrations(pool: asyncpg.Pool) -> None:
|
|||||||
await conn.execute(SCHEMA_V26_SQL)
|
await conn.execute(SCHEMA_V26_SQL)
|
||||||
await conn.execute(SCHEMA_V27_SQL)
|
await conn.execute(SCHEMA_V27_SQL)
|
||||||
await conn.execute(SCHEMA_V28_SQL)
|
await conn.execute(SCHEMA_V28_SQL)
|
||||||
logger.info("Database schema initialized (v1-v28)")
|
await conn.execute(SCHEMA_V29_SQL)
|
||||||
|
logger.info("Database schema initialized (v1-v29)")
|
||||||
|
|
||||||
|
|
||||||
async def init_schema() -> None:
|
async def init_schema() -> None:
|
||||||
@@ -4270,6 +4292,132 @@ async def _annotate_equivalents(pool, out: list[dict]) -> None:
|
|||||||
d["equivalents"] = by_src.get(str(d["id"]), [])
|
d["equivalents"] = by_src.get(str(d["id"]), [])
|
||||||
|
|
||||||
|
|
||||||
|
# ── Gold-set evaluation (#81.7 / #81.8) ──────────────────────────────────────
|
||||||
|
|
||||||
|
async def goldset_create_sample(
|
||||||
|
n: int = 150, batch: str = "default", reset: bool = False,
|
||||||
|
) -> dict:
|
||||||
|
"""Stratified sample of halachot (round-robin over case×rule_type) into a
|
||||||
|
tagging batch. Idempotent (ON CONFLICT); ``reset`` clears the batch first."""
|
||||||
|
pool = await get_pool()
|
||||||
|
if reset:
|
||||||
|
await pool.execute("DELETE FROM halacha_goldset WHERE batch = $1", batch)
|
||||||
|
rows = await pool.fetch(
|
||||||
|
"SELECT id, case_law_id, rule_type FROM halachot WHERE rule_statement <> ''"
|
||||||
|
)
|
||||||
|
from collections import defaultdict
|
||||||
|
buckets: dict = defaultdict(list)
|
||||||
|
for r in rows:
|
||||||
|
buckets[(r["case_law_id"], r["rule_type"])].append(r["id"])
|
||||||
|
keys = list(buckets.values())
|
||||||
|
sample: list = []
|
||||||
|
i = 0
|
||||||
|
while len(sample) < n and any(keys):
|
||||||
|
b = keys[i % len(keys)]
|
||||||
|
if b:
|
||||||
|
sample.append(b.pop())
|
||||||
|
i += 1
|
||||||
|
if i > n * 50:
|
||||||
|
break
|
||||||
|
inserted = 0
|
||||||
|
for hid in sample:
|
||||||
|
res = await pool.execute(
|
||||||
|
"INSERT INTO halacha_goldset (halacha_id, batch) VALUES ($1, $2) "
|
||||||
|
"ON CONFLICT (halacha_id, batch) DO NOTHING", hid, batch,
|
||||||
|
)
|
||||||
|
if res.endswith(" 1"):
|
||||||
|
inserted += 1
|
||||||
|
total = await pool.fetchval(
|
||||||
|
"SELECT count(*) FROM halacha_goldset WHERE batch = $1", batch)
|
||||||
|
return {"batch": batch, "inserted": inserted, "total": total}
|
||||||
|
|
||||||
|
|
||||||
|
async def goldset_list(batch: str = "default") -> list[dict]:
|
||||||
|
"""Gold-set items joined with the halacha content + the machine's labels."""
|
||||||
|
pool = await get_pool()
|
||||||
|
rows = await pool.fetch(
|
||||||
|
"SELECT g.id, g.halacha_id::text AS halacha_id, g.is_holding, "
|
||||||
|
" g.correct_type, g.quote_complete, g.tagged_by, g.tagged_at, "
|
||||||
|
" h.rule_statement, h.supporting_quote, h.reasoning_summary, "
|
||||||
|
" h.rule_type, h.confidence, h.quality_flags, h.review_status, "
|
||||||
|
" cl.case_number, cl.case_name "
|
||||||
|
"FROM halacha_goldset g JOIN halachot h ON h.id = g.halacha_id "
|
||||||
|
"LEFT JOIN case_law cl ON cl.id = h.case_law_id "
|
||||||
|
"WHERE g.batch = $1 ORDER BY g.created_at, g.id", batch,
|
||||||
|
)
|
||||||
|
out = []
|
||||||
|
for r in rows:
|
||||||
|
d = dict(r)
|
||||||
|
if d.get("tagged_at") is not None:
|
||||||
|
d["tagged_at"] = d["tagged_at"].isoformat()
|
||||||
|
if d.get("confidence") is not None:
|
||||||
|
d["confidence"] = float(d["confidence"])
|
||||||
|
out.append(d)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
async def goldset_tag(
|
||||||
|
goldset_id: UUID, *, is_holding: bool | None = None,
|
||||||
|
correct_type: str | None = None, quote_complete: bool | None = None,
|
||||||
|
tagged_by: str = "chair",
|
||||||
|
) -> dict | None:
|
||||||
|
"""Save one human tag (partial — only provided fields change)."""
|
||||||
|
pool = await get_pool()
|
||||||
|
sets = ["tagged_by = $2", "tagged_at = now()"]
|
||||||
|
params: list = [goldset_id, tagged_by]
|
||||||
|
i = 3
|
||||||
|
if is_holding is not None:
|
||||||
|
sets.append(f"is_holding = ${i}"); params.append(is_holding); i += 1
|
||||||
|
if correct_type is not None:
|
||||||
|
sets.append(f"correct_type = ${i}"); params.append(correct_type); i += 1
|
||||||
|
if quote_complete is not None:
|
||||||
|
sets.append(f"quote_complete = ${i}"); params.append(quote_complete); i += 1
|
||||||
|
row = await pool.fetchrow(
|
||||||
|
f"UPDATE halacha_goldset SET {', '.join(sets)} WHERE id = $1 RETURNING *", *params,
|
||||||
|
)
|
||||||
|
return dict(row) if row else None
|
||||||
|
|
||||||
|
|
||||||
|
async def goldset_score(batch: str = "default") -> dict:
|
||||||
|
"""Measure each extraction validator against the human tags (#81.8).
|
||||||
|
|
||||||
|
A validator flag predicts "NOT a clean holding"; ground truth is
|
||||||
|
is_holding == false. truncated_quote is scored against quote_complete."""
|
||||||
|
items = await goldset_list(batch)
|
||||||
|
labeled = [r for r in items if r.get("is_holding") is not None]
|
||||||
|
from collections import defaultdict
|
||||||
|
counters: dict = defaultdict(lambda: {"tp": 0, "fp": 0, "fn": 0, "tn": 0})
|
||||||
|
|
||||||
|
def tally(name: str, predicted_bad: bool, truly_bad: bool) -> None:
|
||||||
|
c = counters[name]
|
||||||
|
key = ("tp" if truly_bad else "fp") if predicted_bad else ("fn" if truly_bad else "tn")
|
||||||
|
c[key] += 1
|
||||||
|
|
||||||
|
for r in labeled:
|
||||||
|
rule = r.get("rule_statement") or ""
|
||||||
|
quote = r.get("supporting_quote") or ""
|
||||||
|
rtype = r.get("rule_type") or "binding"
|
||||||
|
qc = r["quote_complete"] if r["quote_complete"] is not None else True
|
||||||
|
truly_bad = r["is_holding"] is False
|
||||||
|
flags = halacha_quality.compute_quality_flags(rule, quote, "", qc, rtype)
|
||||||
|
tally("any_flag", bool(flags), truly_bad)
|
||||||
|
tally("application", halacha_quality.FLAG_APPLICATION in flags, truly_bad)
|
||||||
|
tally("non_decision", halacha_quality.FLAG_NON_DECISION in flags, truly_bad)
|
||||||
|
tally("thin_restatement", halacha_quality.FLAG_THIN_RESTATEMENT in flags, truly_bad)
|
||||||
|
tally("truncated_quote", halacha_quality.is_quote_truncated(quote), qc is False)
|
||||||
|
|
||||||
|
def prf(c: dict) -> dict:
|
||||||
|
p = c["tp"] / (c["tp"] + c["fp"]) if (c["tp"] + c["fp"]) else 0.0
|
||||||
|
rec = c["tp"] / (c["tp"] + c["fn"]) if (c["tp"] + c["fn"]) else 0.0
|
||||||
|
f1 = 2 * p * rec / (p + rec) if (p + rec) else 0.0
|
||||||
|
return {"precision": round(p, 3), "recall": round(rec, 3), "f1": round(f1, 3), **c}
|
||||||
|
|
||||||
|
return {
|
||||||
|
"batch": batch, "total": len(items), "labeled": len(labeled),
|
||||||
|
"validators": {name: prf(c) for name, c in counters.items()},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
async def list_corroboration_for_halacha(halacha_id: UUID) -> list[dict]:
|
async def list_corroboration_for_halacha(halacha_id: UUID) -> list[dict]:
|
||||||
"""Return all corroboration rows for one halacha, ordered by match_score DESC."""
|
"""Return all corroboration rows for one halacha, ordered by match_score DESC."""
|
||||||
pool = await get_pool()
|
pool = await get_pool()
|
||||||
|
|||||||
41
web-ui/src/app/goldset/page.tsx
Normal file
41
web-ui/src/app/goldset/page.tsx
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
"use client";
|
||||||
|
|
||||||
|
import Link from "next/link";
|
||||||
|
import { AppShell } from "@/components/app-shell";
|
||||||
|
import { GoldsetPanel } from "@/components/goldset/goldset-panel";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gold-set tagging page (#81.7 / #81.8).
|
||||||
|
*
|
||||||
|
* Interactive review of a stratified halacha sample. The chair/Dafna labels each
|
||||||
|
* item (is_holding / correct_type / quote_complete); those human labels are the
|
||||||
|
* ground truth that measures the extraction validators and recalibrates the
|
||||||
|
* auto-approve threshold. Tags MUST be human — no AI pre-fill (circular bias).
|
||||||
|
*/
|
||||||
|
export default function GoldsetPage() {
|
||||||
|
return (
|
||||||
|
<AppShell>
|
||||||
|
<section className="space-y-6">
|
||||||
|
<header>
|
||||||
|
<nav className="text-[0.78rem] text-ink-muted mb-1">
|
||||||
|
<Link href="/" className="hover:text-gold-deep">בית</Link>
|
||||||
|
<span aria-hidden> · </span>
|
||||||
|
<span className="text-navy">מדגם-זהב לתיוג</span>
|
||||||
|
</nav>
|
||||||
|
<h1 className="text-navy mb-0">מדגם-זהב לתיוג איכות</h1>
|
||||||
|
<p className="text-ink-muted text-sm mt-1 max-w-3xl">
|
||||||
|
מדגם מרובד של הלכות שחולצו. לכל הלכה הכריעו שלוש שאלות —
|
||||||
|
<strong> האם זו הלכה אמיתית</strong>, <strong>מה הסוג הנכון</strong>,
|
||||||
|
ו<strong>האם הציטוט שלם</strong>. ההכרעות שלכם הן אמת-המידה שמודדת את
|
||||||
|
דיוק המחלץ ומכיילת את סף-האישור האוטומטי. שיפוט משפטי אנושי בלבד —
|
||||||
|
לא תיוג-AI (כדי למנוע הטיה מעגלית).
|
||||||
|
</p>
|
||||||
|
</header>
|
||||||
|
|
||||||
|
<div className="h-[2px] bg-gradient-to-l from-transparent via-gold to-transparent" />
|
||||||
|
|
||||||
|
<GoldsetPanel />
|
||||||
|
</section>
|
||||||
|
</AppShell>
|
||||||
|
);
|
||||||
|
}
|
||||||
@@ -51,6 +51,7 @@ const NAV_GROUPS: NavGroup[] = [
|
|||||||
items: [
|
items: [
|
||||||
{ href: "/precedents", label: "ספריית פסיקה" },
|
{ href: "/precedents", label: "ספריית פסיקה" },
|
||||||
{ href: "/missing-precedents", label: "פסיקה חסרה" },
|
{ href: "/missing-precedents", label: "פסיקה חסרה" },
|
||||||
|
{ href: "/goldset", label: "מדגם-זהב" },
|
||||||
{ href: "/training", label: "אימון סגנון" },
|
{ href: "/training", label: "אימון סגנון" },
|
||||||
{ href: "/methodology", label: "מתודולוגיה" },
|
{ href: "/methodology", label: "מתודולוגיה" },
|
||||||
],
|
],
|
||||||
|
|||||||
283
web-ui/src/components/goldset/goldset-panel.tsx
Normal file
283
web-ui/src/components/goldset/goldset-panel.tsx
Normal file
@@ -0,0 +1,283 @@
|
|||||||
|
"use client";
|
||||||
|
|
||||||
|
import { useEffect, useMemo, useState } from "react";
|
||||||
|
import { Check, X, ChevronDown, ChevronLeft } from "lucide-react";
|
||||||
|
import { toast } from "sonner";
|
||||||
|
import { Button } from "@/components/ui/button";
|
||||||
|
import { Badge } from "@/components/ui/badge";
|
||||||
|
import { Skeleton } from "@/components/ui/skeleton";
|
||||||
|
import {
|
||||||
|
useGoldset, useGoldsetScore, useTagGoldset, useCreateGoldsetSample,
|
||||||
|
type GoldsetItem,
|
||||||
|
} from "@/lib/api/goldset";
|
||||||
|
|
||||||
|
const TYPES: { value: string; label: string }[] = [
|
||||||
|
{ value: "binding", label: "מחייבת" },
|
||||||
|
{ value: "interpretive", label: "פרשני" },
|
||||||
|
{ value: "application", label: "יישום" },
|
||||||
|
{ value: "obiter", label: "אמרת-אגב" },
|
||||||
|
{ value: "procedural", label: "פרוצדורלי" },
|
||||||
|
{ value: "persuasive", label: "משכנע" },
|
||||||
|
];
|
||||||
|
|
||||||
|
const FLAG_LABELS: Record<string, string> = {
|
||||||
|
non_decision: "אי-הכרעה", truncated_quote: "ציטוט קטוע", thin_restatement: "ניסוח דק",
|
||||||
|
quote_unverified: "ציטוט לא מאומת", nli_unsupported: "כלל לא נגזר", application: "יישום",
|
||||||
|
near_duplicate: "כפילות-קרובה", nevo_preamble_leak: "דליפת רציו",
|
||||||
|
};
|
||||||
|
|
||||||
|
function cleanCitation(s: string | null | undefined): string {
|
||||||
|
if (!s) return "—";
|
||||||
|
return s.replace(/[--]/g, "").trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
function isTagged(it: GoldsetItem): boolean {
|
||||||
|
return it.is_holding !== null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Score panel ──────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
function ScorePanel({ batch }: { batch: string }) {
|
||||||
|
const { data } = useGoldsetScore(batch);
|
||||||
|
const [open, setOpen] = useState(false);
|
||||||
|
if (!data || data.labeled === 0) return null;
|
||||||
|
const rows = Object.entries(data.validators);
|
||||||
|
return (
|
||||||
|
<div className="rounded-lg border border-rule bg-surface">
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
onClick={() => setOpen((v) => !v)}
|
||||||
|
className="w-full flex items-center gap-2 px-4 py-2.5 text-sm hover:bg-gold-wash/30"
|
||||||
|
aria-expanded={open}
|
||||||
|
>
|
||||||
|
{open ? <ChevronDown className="w-4 h-4" /> : <ChevronLeft className="w-4 h-4" />}
|
||||||
|
<span className="font-semibold text-navy">ציון הוולידטורים מול התיוג</span>
|
||||||
|
<span className="text-ink-muted">({data.labeled} מתויגות)</span>
|
||||||
|
</button>
|
||||||
|
{open && (
|
||||||
|
<div className="px-4 pb-3 overflow-x-auto">
|
||||||
|
<table className="w-full text-sm tabular-nums">
|
||||||
|
<thead>
|
||||||
|
<tr className="text-ink-muted text-[0.72rem] border-b border-rule">
|
||||||
|
<th className="text-start py-1 ps-1">ולידטור</th>
|
||||||
|
<th className="text-start">Precision</th>
|
||||||
|
<th className="text-start">Recall</th>
|
||||||
|
<th className="text-start">F1</th>
|
||||||
|
<th className="text-start">tp/fp/fn/tn</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{rows.map(([name, v]) => (
|
||||||
|
<tr key={name} className="border-b border-rule-soft last:border-0">
|
||||||
|
<td className="py-1 ps-1 text-navy">{name}</td>
|
||||||
|
<td>{v.precision.toFixed(2)}</td>
|
||||||
|
<td>{v.recall.toFixed(2)}</td>
|
||||||
|
<td className="font-semibold">{v.f1.toFixed(2)}</td>
|
||||||
|
<td className="text-ink-muted text-[0.72rem]">
|
||||||
|
{v.tp}/{v.fp}/{v.fn}/{v.tn}
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
))}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Tag card ─────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
function TagCard({
|
||||||
|
it, focused, onTag,
|
||||||
|
}: {
|
||||||
|
it: GoldsetItem;
|
||||||
|
focused: boolean;
|
||||||
|
onTag: (tag: { is_holding?: boolean; correct_type?: string; quote_complete?: boolean }) => void;
|
||||||
|
}) {
|
||||||
|
const tagged = isTagged(it);
|
||||||
|
return (
|
||||||
|
<div
|
||||||
|
data-goldset-id={it.id}
|
||||||
|
className={`rounded-lg border bg-surface p-4 space-y-3 transition-colors
|
||||||
|
${focused ? "border-gold ring-2 ring-gold/40 shadow-md" : tagged ? "border-rule-soft opacity-70" : "border-rule"}`}
|
||||||
|
>
|
||||||
|
<div className="flex items-center gap-2 text-[0.72rem] text-ink-muted flex-wrap">
|
||||||
|
<span className="font-semibold text-navy">{cleanCitation(it.case_number)}</span>
|
||||||
|
<Badge variant="outline" className="text-[0.65rem]">מכונה: {it.rule_type}</Badge>
|
||||||
|
{it.confidence != null && (
|
||||||
|
<Badge variant="outline" className="text-[0.65rem] tabular-nums">ביטחון {it.confidence.toFixed(2)}</Badge>
|
||||||
|
)}
|
||||||
|
{(it.quality_flags ?? []).map((f) => (
|
||||||
|
<Badge key={f} variant="outline" className="text-[0.65rem] bg-danger-bg text-danger border-danger/40">
|
||||||
|
{FLAG_LABELS[f] ?? f}
|
||||||
|
</Badge>
|
||||||
|
))}
|
||||||
|
{tagged && (
|
||||||
|
<Badge variant="outline" className="text-[0.65rem] bg-gold-wash text-gold-deep border-gold/40 ms-auto">
|
||||||
|
<Check className="w-3 h-3 me-1" /> תויג
|
||||||
|
</Badge>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<p className="text-navy font-medium leading-relaxed" dir="rtl">{it.rule_statement}</p>
|
||||||
|
<blockquote className="text-ink-soft text-sm leading-relaxed border-r-2 border-gold pr-3" dir="rtl">
|
||||||
|
“{it.supporting_quote}”
|
||||||
|
</blockquote>
|
||||||
|
|
||||||
|
<div className="grid gap-3 sm:grid-cols-3 pt-1 border-t border-rule-soft">
|
||||||
|
{/* is_holding */}
|
||||||
|
<div>
|
||||||
|
<div className="text-[0.7rem] text-ink-muted mb-1">האם זו הלכה אמיתית?</div>
|
||||||
|
<div className="flex gap-1">
|
||||||
|
<Button size="sm" variant={it.is_holding === true ? "default" : "ghost"}
|
||||||
|
className={it.is_holding === true ? "bg-gold text-navy hover:bg-gold-deep" : ""}
|
||||||
|
onClick={() => onTag({ is_holding: true })}>הלכה (H)</Button>
|
||||||
|
<Button size="sm" variant={it.is_holding === false ? "default" : "ghost"}
|
||||||
|
className={it.is_holding === false ? "bg-danger text-parchment hover:bg-danger" : "text-danger"}
|
||||||
|
onClick={() => onTag({ is_holding: false })}>לא (N)</Button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{/* correct_type */}
|
||||||
|
<div>
|
||||||
|
<div className="text-[0.7rem] text-ink-muted mb-1">הסוג הנכון</div>
|
||||||
|
<div className="flex gap-1 flex-wrap">
|
||||||
|
{TYPES.map((t) => (
|
||||||
|
<Button key={t.value} size="sm"
|
||||||
|
variant={it.correct_type === t.value ? "default" : "ghost"}
|
||||||
|
className={`text-[0.7rem] px-2 ${it.correct_type === t.value ? "bg-navy text-parchment hover:bg-navy-soft" : ""}`}
|
||||||
|
onClick={() => onTag({ correct_type: t.value })}>{t.label}</Button>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{/* quote_complete */}
|
||||||
|
<div>
|
||||||
|
<div className="text-[0.7rem] text-ink-muted mb-1">הציטוט שלם?</div>
|
||||||
|
<div className="flex gap-1">
|
||||||
|
<Button size="sm" variant={it.quote_complete === true ? "default" : "ghost"}
|
||||||
|
className={it.quote_complete === true ? "bg-gold text-navy hover:bg-gold-deep" : ""}
|
||||||
|
onClick={() => onTag({ quote_complete: true })}>שלם (C)</Button>
|
||||||
|
<Button size="sm" variant={it.quote_complete === false ? "default" : "ghost"}
|
||||||
|
className={it.quote_complete === false ? "bg-danger text-parchment hover:bg-danger" : "text-danger"}
|
||||||
|
onClick={() => onTag({ quote_complete: false })}>קטוע (X)</Button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Main panel ───────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export function GoldsetPanel() {
|
||||||
|
const batch = "default";
|
||||||
|
const { data, isPending, error } = useGoldset(batch);
|
||||||
|
const tag = useTagGoldset(batch);
|
||||||
|
const createSample = useCreateGoldsetSample(batch);
|
||||||
|
const [focusedId, setFocusedId] = useState<string | null>(null);
|
||||||
|
const [hideTagged, setHideTagged] = useState(false);
|
||||||
|
|
||||||
|
const items = useMemo(() => data?.items ?? [], [data]);
|
||||||
|
const taggedCount = items.filter(isTagged).length;
|
||||||
|
const visible = useMemo(
|
||||||
|
() => (hideTagged ? items.filter((i) => !isTagged(i)) : items),
|
||||||
|
[items, hideTagged],
|
||||||
|
);
|
||||||
|
|
||||||
|
const focused = focusedId ? visible.find((i) => i.id === focusedId) ?? null : null;
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (focusedId && visible.some((i) => i.id === focusedId)) return;
|
||||||
|
setFocusedId(visible[0]?.id ?? null);
|
||||||
|
}, [focusedId, visible]);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (!focusedId) return;
|
||||||
|
document.querySelector(`[data-goldset-id="${focusedId}"]`)
|
||||||
|
?.scrollIntoView({ block: "nearest", behavior: "smooth" });
|
||||||
|
}, [focusedId]);
|
||||||
|
|
||||||
|
const move = (delta: 1 | -1) => {
|
||||||
|
if (!visible.length) return;
|
||||||
|
const idx = focusedId ? visible.findIndex((i) => i.id === focusedId) : -1;
|
||||||
|
const next = idx < 0 ? (delta > 0 ? 0 : visible.length - 1)
|
||||||
|
: Math.max(0, Math.min(visible.length - 1, idx + delta));
|
||||||
|
setFocusedId(visible[next].id);
|
||||||
|
};
|
||||||
|
|
||||||
|
const doTag = async (
|
||||||
|
it: GoldsetItem,
|
||||||
|
t: { is_holding?: boolean; correct_type?: string; quote_complete?: boolean },
|
||||||
|
) => {
|
||||||
|
try {
|
||||||
|
await tag.mutateAsync({ id: it.id, tag: t });
|
||||||
|
} catch (e) {
|
||||||
|
toast.error(e instanceof Error ? e.message : "שגיאה");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
const onKey = (e: KeyboardEvent) => {
|
||||||
|
const tagName = (e.target as HTMLElement)?.tagName?.toLowerCase();
|
||||||
|
if (tagName === "input" || tagName === "textarea" || tagName === "select") return;
|
||||||
|
if (e.key === "j") { e.preventDefault(); move(1); }
|
||||||
|
else if (e.key === "k") { e.preventDefault(); move(-1); }
|
||||||
|
else if (focused && (e.key === "h" || e.key === "H")) { e.preventDefault(); doTag(focused, { is_holding: true }); }
|
||||||
|
else if (focused && (e.key === "n" || e.key === "N")) { e.preventDefault(); doTag(focused, { is_holding: false }); }
|
||||||
|
else if (focused && (e.key === "c" || e.key === "C")) { e.preventDefault(); doTag(focused, { quote_complete: true }); }
|
||||||
|
else if (focused && (e.key === "x" || e.key === "X")) { e.preventDefault(); doTag(focused, { quote_complete: false }); }
|
||||||
|
};
|
||||||
|
window.addEventListener("keydown", onKey);
|
||||||
|
return () => window.removeEventListener("keydown", onKey);
|
||||||
|
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||||
|
}, [focused, visible]);
|
||||||
|
|
||||||
|
if (error) {
|
||||||
|
return <div className="rounded bg-danger-bg border border-danger/40 px-6 py-5 text-danger text-center">{error.message}</div>;
|
||||||
|
}
|
||||||
|
if (isPending) {
|
||||||
|
return <div className="space-y-3">{[...Array(3)].map((_, i) => <Skeleton key={i} className="h-40 w-full" />)}</div>;
|
||||||
|
}
|
||||||
|
if (!items.length) {
|
||||||
|
return (
|
||||||
|
<div className="text-center text-ink-muted py-16 space-y-4">
|
||||||
|
<p className="text-lg">אין מדגם-זהב עדיין.</p>
|
||||||
|
<Button disabled={createSample.isPending}
|
||||||
|
onClick={() => createSample.mutate(150)}
|
||||||
|
className="bg-gold text-navy hover:bg-gold-deep">
|
||||||
|
צור מדגם של 150 הלכות לתיוג
|
||||||
|
</Button>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const pct = items.length ? Math.round((taggedCount / items.length) * 100) : 0;
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="space-y-4">
|
||||||
|
<ScorePanel batch={batch} />
|
||||||
|
|
||||||
|
<div className="flex items-center gap-3 flex-wrap text-sm">
|
||||||
|
<span className="text-navy font-semibold tabular-nums">{taggedCount}/{items.length} תויגו</span>
|
||||||
|
<div className="h-2 w-40 rounded-full bg-rule-soft overflow-hidden">
|
||||||
|
<div className="h-full bg-gold" style={{ width: `${pct}%` }} />
|
||||||
|
</div>
|
||||||
|
<span className="text-ink-muted text-[0.72rem]">
|
||||||
|
מקלדת: <kbd className="bg-rule-soft px-1.5 rounded">J</kbd>/<kbd className="bg-rule-soft px-1.5 rounded">K</kbd>
|
||||||
|
{" "}· הלכה <kbd className="bg-rule-soft px-1.5 rounded">H</kbd> / לא <kbd className="bg-rule-soft px-1.5 rounded">N</kbd>
|
||||||
|
{" "}· ציטוט שלם <kbd className="bg-rule-soft px-1.5 rounded">C</kbd> / קטוע <kbd className="bg-rule-soft px-1.5 rounded">X</kbd>
|
||||||
|
</span>
|
||||||
|
<Button size="sm" variant="ghost" className="ms-auto" onClick={() => setHideTagged((v) => !v)}>
|
||||||
|
{hideTagged ? "הצג הכל" : "הסתר מתויגים"}
|
||||||
|
</Button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div className="space-y-3">
|
||||||
|
{visible.map((it) => (
|
||||||
|
<TagCard key={it.id} it={it} focused={it.id === focusedId}
|
||||||
|
onTag={(t) => doTag(it, t)} />
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
105
web-ui/src/lib/api/goldset.ts
Normal file
105
web-ui/src/lib/api/goldset.ts
Normal file
@@ -0,0 +1,105 @@
|
|||||||
|
/**
|
||||||
|
* Gold-set tagging API (#81.7 / #81.8).
|
||||||
|
*
|
||||||
|
* The chair/Dafna manually labels a stratified sample of halachot
|
||||||
|
* (is_holding / correct_type / quote_complete). Those human labels are the
|
||||||
|
* ground truth used to measure the extraction validators and recalibrate the
|
||||||
|
* auto-approve threshold. Endpoints under /api/goldset.
|
||||||
|
*/
|
||||||
|
import { useMutation, useQuery, useQueryClient } from "@tanstack/react-query";
|
||||||
|
import { apiRequest } from "./client";
|
||||||
|
|
||||||
|
export type GoldsetItem = {
|
||||||
|
id: string;
|
||||||
|
halacha_id: string;
|
||||||
|
// human tags (null until tagged)
|
||||||
|
is_holding: boolean | null;
|
||||||
|
correct_type: string;
|
||||||
|
quote_complete: boolean | null;
|
||||||
|
tagged_by: string;
|
||||||
|
tagged_at: string | null;
|
||||||
|
// halacha content + the machine's own labels
|
||||||
|
rule_statement: string;
|
||||||
|
supporting_quote: string;
|
||||||
|
reasoning_summary: string;
|
||||||
|
rule_type: string;
|
||||||
|
confidence: number | null;
|
||||||
|
quality_flags?: string[];
|
||||||
|
review_status: string;
|
||||||
|
case_number: string | null;
|
||||||
|
case_name: string | null;
|
||||||
|
};
|
||||||
|
|
||||||
|
export type GoldsetScore = {
|
||||||
|
batch: string;
|
||||||
|
total: number;
|
||||||
|
labeled: number;
|
||||||
|
validators: Record<
|
||||||
|
string,
|
||||||
|
{ precision: number; recall: number; f1: number; tp: number; fp: number; fn: number; tn: number }
|
||||||
|
>;
|
||||||
|
};
|
||||||
|
|
||||||
|
export type GoldsetTag = {
|
||||||
|
is_holding?: boolean | null;
|
||||||
|
correct_type?: string;
|
||||||
|
quote_complete?: boolean | null;
|
||||||
|
};
|
||||||
|
|
||||||
|
const keys = {
|
||||||
|
all: ["goldset"] as const,
|
||||||
|
list: (batch: string) => ["goldset", "list", batch] as const,
|
||||||
|
score: (batch: string) => ["goldset", "score", batch] as const,
|
||||||
|
};
|
||||||
|
|
||||||
|
export function useGoldset(batch = "default") {
|
||||||
|
return useQuery({
|
||||||
|
queryKey: keys.list(batch),
|
||||||
|
queryFn: ({ signal }) =>
|
||||||
|
apiRequest<{ items: GoldsetItem[]; batch: string }>(
|
||||||
|
`/api/goldset?batch=${encodeURIComponent(batch)}`,
|
||||||
|
{ signal },
|
||||||
|
),
|
||||||
|
staleTime: 5_000,
|
||||||
|
refetchOnMount: "always",
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
export function useGoldsetScore(batch = "default") {
|
||||||
|
return useQuery({
|
||||||
|
queryKey: keys.score(batch),
|
||||||
|
queryFn: ({ signal }) =>
|
||||||
|
apiRequest<GoldsetScore>(
|
||||||
|
`/api/goldset/score?batch=${encodeURIComponent(batch)}`,
|
||||||
|
{ signal },
|
||||||
|
),
|
||||||
|
staleTime: 5_000,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
export function useTagGoldset(batch = "default") {
|
||||||
|
const qc = useQueryClient();
|
||||||
|
return useMutation({
|
||||||
|
mutationFn: ({ id, tag }: { id: string; tag: GoldsetTag }) =>
|
||||||
|
apiRequest<{ ok: boolean }>(`/api/goldset/${encodeURIComponent(id)}`, {
|
||||||
|
method: "PATCH",
|
||||||
|
body: { ...tag, tagged_by: "chair" },
|
||||||
|
}),
|
||||||
|
onSuccess: () => {
|
||||||
|
qc.invalidateQueries({ queryKey: keys.list(batch) });
|
||||||
|
qc.invalidateQueries({ queryKey: keys.score(batch) });
|
||||||
|
},
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
export function useCreateGoldsetSample(batch = "default") {
|
||||||
|
const qc = useQueryClient();
|
||||||
|
return useMutation({
|
||||||
|
mutationFn: (n: number) =>
|
||||||
|
apiRequest<{ batch: string; inserted: number; total: number }>(
|
||||||
|
"/api/goldset/sample",
|
||||||
|
{ method: "POST", body: { n, batch } },
|
||||||
|
),
|
||||||
|
onSuccess: () => qc.invalidateQueries({ queryKey: keys.list(batch) }),
|
||||||
|
});
|
||||||
|
}
|
||||||
53
web/app.py
53
web/app.py
@@ -6099,6 +6099,59 @@ async def halacha_equivalents_unlink(halacha_id: str, other_id: str):
|
|||||||
return {"ok": await db.unlink_equivalent_halachot(hid, oid)}
|
return {"ok": await db.unlink_equivalent_halachot(hid, oid)}
|
||||||
|
|
||||||
|
|
||||||
|
# ── Gold-set tagging (#81.7 / #81.8) ─────────────────────────────────────────
|
||||||
|
|
||||||
|
class GoldsetSampleRequest(BaseModel):
|
||||||
|
n: int = 150
|
||||||
|
batch: str = "default"
|
||||||
|
reset: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
class GoldsetTagRequest(BaseModel):
|
||||||
|
is_holding: bool | None = None
|
||||||
|
correct_type: str | None = None
|
||||||
|
quote_complete: bool | None = None
|
||||||
|
tagged_by: str = "chair"
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/goldset")
|
||||||
|
async def goldset_list_ep(batch: str = "default"):
|
||||||
|
"""The gold-set tagging queue (halacha content + machine labels + human tags)."""
|
||||||
|
return {"items": await db.goldset_list(batch), "batch": batch}
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/api/goldset/sample")
|
||||||
|
async def goldset_sample_ep(req: GoldsetSampleRequest):
|
||||||
|
"""Create/extend a stratified gold-set batch for tagging (#81.7)."""
|
||||||
|
return await db.goldset_create_sample(n=req.n, batch=req.batch, reset=req.reset)
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/goldset/score")
|
||||||
|
async def goldset_score_ep(batch: str = "default"):
|
||||||
|
"""Measure the extraction validators against the human tags (#81.8)."""
|
||||||
|
return await db.goldset_score(batch)
|
||||||
|
|
||||||
|
|
||||||
|
@app.patch("/api/goldset/{goldset_id}")
|
||||||
|
async def goldset_tag_ep(goldset_id: str, req: GoldsetTagRequest):
|
||||||
|
"""Save one human tag on a gold-set item."""
|
||||||
|
try:
|
||||||
|
gid = UUID(goldset_id)
|
||||||
|
except ValueError:
|
||||||
|
raise HTTPException(400, "מזהה לא תקין")
|
||||||
|
if req.correct_type and req.correct_type not in (
|
||||||
|
"binding", "interpretive", "obiter", "application", "procedural", "persuasive",
|
||||||
|
):
|
||||||
|
raise HTTPException(400, "correct_type לא תקין")
|
||||||
|
row = await db.goldset_tag(
|
||||||
|
gid, is_holding=req.is_holding, correct_type=req.correct_type,
|
||||||
|
quote_complete=req.quote_complete, tagged_by=req.tagged_by,
|
||||||
|
)
|
||||||
|
if not row:
|
||||||
|
raise HTTPException(404, "פריט לא נמצא")
|
||||||
|
return {"ok": True}
|
||||||
|
|
||||||
|
|
||||||
@app.patch("/api/halachot/{halacha_id}")
|
@app.patch("/api/halachot/{halacha_id}")
|
||||||
async def halacha_update(halacha_id: str, req: HalachaUpdateRequest):
|
async def halacha_update(halacha_id: str, req: HalachaUpdateRequest):
|
||||||
"""Approve / reject / edit a halacha. Used by the chair review queue."""
|
"""Approve / reject / edit a halacha. Used by the chair review queue."""
|
||||||
|
|||||||
Reference in New Issue
Block a user