feat(halacha-triage): quality-gated + prioritized review queue + metrics (#84)
Backend for the halacha approval-queue triage (#84). The keyboard UI, batch actions and defer/reject (#84.4–6) already shipped; this adds the gating, prioritization and metrics the queue was missing. db.list_halachot — two opt-in triage controls: * exclude_low_quality (#84.1): drop items carrying ANY quality_flag (application / quote_unverified / truncated / non_decision / thin / nli_unsupported / near_duplicate) — they belong in a 'needs extraction fix' bucket, not the chair's approve queue. * order_by_priority (#84.3): active-learning order — negatively-treated first, then most-uncertain (lowest confidence), then oldest — instead of FIFO, so the highest-value decisions surface first. halachot_pending (MCP) — now gated + prioritized BY DEFAULT; include_low_quality= true reveals the needs-fix bucket. The agent review path benefits immediately. GET /api/halachot — same two params, default OFF (non-breaking; the UI opts in). metrics.halacha_backlog (#84.7) — splits pending into clean vs flagged, adds deferred, reviewed_total, approve_ratio, and a pending_by_flag breakdown, so the backlog distinguishes real review work from extraction noise. Deferred (documented): #84.2 near-duplicate cluster cards and wiring the UI fetch to the new params require frontend work + an api:types regen AFTER this deploys (the new query params aren't in prod's OpenAPI until then) — a clean follow-up. The backend fully supports both now. Verified against the live DB (read-only): - pending 177 → gated-clean 110, 0 flagged items leak into the clean queue. - priority order surfaces the lowest-confidence items first (0.55, 0.55, ...). - backlog: pending_clean=110 / pending_flagged=67 / approve_ratio=0.916, pending_by_flag={nli_unsupported:59, quote_unverified:3, thin:3, truncated:2}. - pytest tests/test_halacha_quality.py — 52 passed (no regression). Invariants: G1 (gate at source — SQL filter, not post-hoc); G2 (no parallel path — same list_halachot); §6 (flagged items routed to a bucket, never dropped). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -3792,7 +3792,19 @@ async def list_halachot(
|
||||
practice_area: str | None = None,
|
||||
limit: int = 200,
|
||||
offset: int = 0,
|
||||
exclude_low_quality: bool = False,
|
||||
order_by_priority: bool = False,
|
||||
) -> list[dict]:
|
||||
"""List halachot with optional triage controls (#84).
|
||||
|
||||
exclude_low_quality — drop items carrying ANY quality_flag (application /
|
||||
truncated_quote / quote_unverified / non_decision / thin_restatement /
|
||||
nli_unsupported / near_duplicate). These belong in a 'needs extraction
|
||||
fix' bucket, not the chair's approve queue (#84.1).
|
||||
order_by_priority — replace FIFO with an active-learning order (#84.3):
|
||||
negatively-treated first, then most-uncertain (lowest confidence), then
|
||||
oldest — so the chair sees the highest-value decisions first.
|
||||
"""
|
||||
pool = await get_pool()
|
||||
conditions = []
|
||||
params: list = []
|
||||
@@ -3809,7 +3821,16 @@ async def list_halachot(
|
||||
conditions.append(f"${idx} = ANY(h.practice_areas)")
|
||||
params.append(practice_area)
|
||||
idx += 1
|
||||
if exclude_low_quality:
|
||||
# a clean item has an empty/NULL quality_flags array
|
||||
conditions.append("COALESCE(array_length(h.quality_flags, 1), 0) = 0")
|
||||
where_sql = f"WHERE {' AND '.join(conditions)}" if conditions else ""
|
||||
order_sql = (
|
||||
"ORDER BY corroboration_negative DESC, h.confidence ASC NULLS LAST, "
|
||||
"h.created_at ASC"
|
||||
if order_by_priority
|
||||
else "ORDER BY h.case_law_id, h.halacha_index"
|
||||
)
|
||||
params.extend([limit, offset])
|
||||
sql = f"""
|
||||
SELECT h.id, h.case_law_id, h.halacha_index, h.rule_statement,
|
||||
@@ -3837,7 +3858,7 @@ async def list_halachot(
|
||||
GROUP BY halacha_id
|
||||
) cor ON cor.halacha_id = h.id
|
||||
{where_sql}
|
||||
ORDER BY h.case_law_id, h.halacha_index
|
||||
{order_sql}
|
||||
LIMIT ${idx} OFFSET ${idx + 1}
|
||||
"""
|
||||
rows = await pool.fetch(sql, *params)
|
||||
|
||||
@@ -117,12 +117,33 @@ async def halacha_backlog(conn) -> dict:
|
||||
oldest = await conn.fetchval(
|
||||
"SELECT MIN(created_at) FROM halachot WHERE review_status = 'pending_review'"
|
||||
)
|
||||
# #84.7 — split the pending bucket: how many are genuine candidates (clean)
|
||||
# vs flagged 'needs extraction fix', and the breakdown by flag, so the chair
|
||||
# sees how much of the backlog is real review vs extraction noise.
|
||||
pending_clean = await conn.fetchval(
|
||||
"SELECT COUNT(*) FROM halachot WHERE review_status = 'pending_review' "
|
||||
"AND COALESCE(array_length(quality_flags, 1), 0) = 0"
|
||||
)
|
||||
flag_rows = await conn.fetch(
|
||||
"SELECT flag, COUNT(*) AS n FROM ("
|
||||
" SELECT unnest(quality_flags) AS flag FROM halachot "
|
||||
" WHERE review_status = 'pending_review'"
|
||||
") t GROUP BY flag ORDER BY n DESC"
|
||||
)
|
||||
pending_total = counts.get("pending_review", 0)
|
||||
reviewed = counts.get("approved", 0) + counts.get("rejected", 0) + counts.get("published", 0)
|
||||
return {
|
||||
"pending_review": counts.get("pending_review", 0),
|
||||
"pending_review": pending_total,
|
||||
"pending_clean": pending_clean, # real review candidates (#84.1)
|
||||
"pending_flagged": pending_total - pending_clean, # needs-fix bucket
|
||||
"approved": counts.get("approved", 0),
|
||||
"rejected": counts.get("rejected", 0),
|
||||
"deferred": counts.get("deferred", 0),
|
||||
"published": counts.get("published", 0),
|
||||
"total": sum(counts.values()),
|
||||
"reviewed_total": reviewed,
|
||||
"approve_ratio": round(counts.get("approved", 0) / reviewed, 3) if reviewed else None,
|
||||
"pending_by_flag": {r["flag"]: r["n"] for r in flag_rows},
|
||||
"oldest_pending_at": oldest.isoformat() if oldest else None,
|
||||
}
|
||||
|
||||
|
||||
@@ -356,7 +356,22 @@ async def halacha_review(
|
||||
return _ok(row)
|
||||
|
||||
|
||||
async def halachot_pending(limit: int = 100) -> str:
|
||||
"""תור ההלכות הממתינות לאישור (review_status='pending_review')."""
|
||||
rows = await db.list_halachot(review_status="pending_review", limit=limit)
|
||||
async def halachot_pending(limit: int = 100, include_low_quality: bool = False) -> str:
|
||||
"""תור ההלכות הממתינות לאישור (review_status='pending_review').
|
||||
|
||||
כברירת-מחדל (#84.1, #84.3) התור **מסונן** — הלכות עם דגל-איכות כלשהו
|
||||
(application / ציטוט-לא-מאומת / קטוע / obiter / restatement דק / לא-נתמך /
|
||||
near-duplicate) מוסתרות (הן שייכות ל'דורש תיקון-חילוץ', לא לתור-האישור),
|
||||
ו**ממוין לפי עדיפות** (טופלו-לרעה תחילה, אז הכי לא-ודאיים, אז הישנים).
|
||||
|
||||
Args:
|
||||
limit: מספר מקסימלי.
|
||||
include_low_quality: True כדי לחשוף גם פריטים מסומני-איכות (בקט 'דורש תיקון').
|
||||
"""
|
||||
rows = await db.list_halachot(
|
||||
review_status="pending_review",
|
||||
limit=limit,
|
||||
exclude_low_quality=not include_low_quality,
|
||||
order_by_priority=True,
|
||||
)
|
||||
return _ok(rows)
|
||||
|
||||
Reference in New Issue
Block a user