feat(goldset): AI second-opinion per item (QA aid) — compare vs human tag
The chair wanted an independent recommendation beside each tag, to reconsider his own judgments. Adds a NON-ground-truth AI second-opinion: - schema: halacha_goldset.ai_is_holding / ai_correct_type / ai_rationale / ai_generated_at (additive). - db.goldset_set_ai_recommendation + goldset_list now returns the ai_* fields. - scripts/goldset_ai_recommend.py — local claude_session judges is_holding + type + a one-line rationale per item, INDEPENDENTLY (own legal rubric). Independent of the rule-based validators #81.8 measures → no circularity. Never auto-applied; QA aid only. - web-ui: each card shows "🤖 המלצת AI: הלכה/לא · type" + rationale and an agreement/disagreement chip vs the human tag (amber on disagree); a "⚠ אי-הסכמות AI (N)" filter to review only the conflicts. Methodology note kept explicit: the human stays the ground truth; the AI is a prompt to reconsider, not to copy. Verified: tsc --noEmit 0; generator stores recs and flags disagreements with existing human tags. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -67,6 +67,16 @@ function isTagged(it: GoldsetItem): boolean {
|
||||
return it.is_holding !== null && it.quote_complete !== null && !!it.correct_type;
|
||||
}
|
||||
|
||||
// The AI second-opinion disagrees with the human tag (on is_holding or type).
|
||||
function aiDisagrees(it: GoldsetItem): boolean {
|
||||
if (!it.ai_generated_at) return false;
|
||||
const holdDiff = it.is_holding !== null && it.ai_is_holding !== null
|
||||
&& it.is_holding !== it.ai_is_holding;
|
||||
const typeDiff = !!it.correct_type && !!it.ai_correct_type
|
||||
&& it.correct_type !== it.ai_correct_type;
|
||||
return holdDiff || typeDiff;
|
||||
}
|
||||
|
||||
// ─── Score panel ──────────────────────────────────────────────────────────────
|
||||
|
||||
function ScorePanel({ batch }: { batch: string }) {
|
||||
@@ -248,6 +258,36 @@ function TagCard({
|
||||
“{it.supporting_quote}”
|
||||
</blockquote>
|
||||
|
||||
{it.ai_generated_at && (() => {
|
||||
const aiType = TYPES.find((t) => t.value === it.ai_correct_type)?.label ?? it.ai_correct_type;
|
||||
const holdDisagree = it.is_holding !== null && it.ai_is_holding !== null
|
||||
&& it.is_holding !== it.ai_is_holding;
|
||||
const typeDisagree = !!it.correct_type && !!it.ai_correct_type
|
||||
&& it.correct_type !== it.ai_correct_type;
|
||||
const anyTag = it.is_holding !== null || !!it.correct_type;
|
||||
return (
|
||||
<div className={`rounded-md border p-2.5 text-[0.78rem] space-y-1
|
||||
${holdDisagree ? "border-amber-400 bg-amber-50" : "border-rule bg-rule-soft/20"}`} dir="rtl">
|
||||
<div className="flex items-center gap-2 flex-wrap">
|
||||
<span className="font-semibold text-navy">🤖 המלצת AI:</span>
|
||||
<span>{it.ai_is_holding ? "הלכה" : "לא הלכה"}</span>
|
||||
{aiType && <span className="text-ink-muted">· {aiType}</span>}
|
||||
{anyTag && (
|
||||
<span className={`ms-auto text-[0.7rem] px-1.5 py-0.5 rounded
|
||||
${holdDisagree || typeDisagree
|
||||
? "bg-amber-100 text-amber-800"
|
||||
: "bg-emerald-50 text-emerald-700"}`}>
|
||||
{holdDisagree ? "⚠ חולק על 'הלכה/לא'"
|
||||
: typeDisagree ? "⚠ חולק על הסוג"
|
||||
: "✓ מסכים איתך"}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
{it.ai_rationale && <div className="text-ink-soft leading-relaxed">{it.ai_rationale}</div>}
|
||||
</div>
|
||||
);
|
||||
})()}
|
||||
|
||||
<div className="grid gap-3 sm:grid-cols-3 pt-1 border-t border-rule-soft">
|
||||
{/* is_holding */}
|
||||
<div>
|
||||
@@ -308,11 +348,13 @@ export function GoldsetPanel() {
|
||||
const createSample = useCreateGoldsetSample(batch);
|
||||
const [focusedId, setFocusedId] = useState<string | null>(null);
|
||||
const [hideTagged, setHideTagged] = useState(false);
|
||||
const [disagreeOnly, setDisagreeOnly] = useState(false);
|
||||
const [sourceFilter, setSourceFilter] =
|
||||
useState<"all" | "court_ruling" | "appeals_committee">("all");
|
||||
|
||||
const items = useMemo(() => data?.items ?? [], [data]);
|
||||
const taggedCount = items.filter(isTagged).length;
|
||||
const disagreeCount = items.filter(aiDisagrees).length;
|
||||
const sourceCounts = useMemo(() => ({
|
||||
court_ruling: items.filter((i) => i.source_type === "court_ruling").length,
|
||||
appeals_committee: items.filter((i) => i.source_type === "appeals_committee").length,
|
||||
@@ -321,11 +363,12 @@ export function GoldsetPanel() {
|
||||
let v = items;
|
||||
if (sourceFilter !== "all") v = v.filter((i) => i.source_type === sourceFilter);
|
||||
if (hideTagged) v = v.filter((i) => !isTagged(i));
|
||||
if (disagreeOnly) v = v.filter(aiDisagrees);
|
||||
// group-sort: כל פסקי-הדין יחד, ואז כל החלטות ועדת-הערר (הפרדה ברורה).
|
||||
const order = (s: string | null) =>
|
||||
s === "court_ruling" ? 0 : s === "appeals_committee" ? 1 : 2;
|
||||
return [...v].sort((a, b) => order(a.source_type) - order(b.source_type));
|
||||
}, [items, hideTagged, sourceFilter]);
|
||||
}, [items, hideTagged, sourceFilter, disagreeOnly]);
|
||||
|
||||
const focused = focusedId ? visible.find((i) => i.id === focusedId) ?? null : null;
|
||||
|
||||
@@ -424,7 +467,14 @@ export function GoldsetPanel() {
|
||||
{" "}· הלכה <kbd className="bg-rule-soft px-1.5 rounded">H</kbd> / לא <kbd className="bg-rule-soft px-1.5 rounded">N</kbd>
|
||||
{" "}· ציטוט שלם <kbd className="bg-rule-soft px-1.5 rounded">C</kbd> / קטוע <kbd className="bg-rule-soft px-1.5 rounded">X</kbd>
|
||||
</span>
|
||||
<Button size="sm" variant="ghost" className="ms-auto" onClick={() => setHideTagged((v) => !v)}>
|
||||
{disagreeCount > 0 && (
|
||||
<Button size="sm" variant={disagreeOnly ? "default" : "ghost"}
|
||||
className={disagreeOnly ? "ms-auto bg-amber-500 text-white hover:bg-amber-600" : "ms-auto text-amber-700"}
|
||||
onClick={() => setDisagreeOnly((v) => !v)}>
|
||||
⚠ אי-הסכמות AI ({disagreeCount})
|
||||
</Button>
|
||||
)}
|
||||
<Button size="sm" variant="ghost" className={disagreeCount > 0 ? "" : "ms-auto"} onClick={() => setHideTagged((v) => !v)}>
|
||||
{hideTagged ? "הצג הכל" : "הסתר מתויגים"}
|
||||
</Button>
|
||||
</div>
|
||||
|
||||
@@ -29,6 +29,11 @@ export type GoldsetItem = {
|
||||
case_number: string | null;
|
||||
case_name: string | null;
|
||||
source_type: string | null; // 'court_ruling' | 'appeals_committee' | ''
|
||||
// AI second-opinion (QA aid — independent, not ground truth, not auto-applied)
|
||||
ai_is_holding: boolean | null;
|
||||
ai_correct_type: string;
|
||||
ai_rationale: string;
|
||||
ai_generated_at: string | null;
|
||||
};
|
||||
|
||||
export type GoldsetScore = {
|
||||
|
||||
Reference in New Issue
Block a user