All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m35s
Six independent sub-tasks dispatched in parallel; aggregated here. ## #33 — Hide case_name column library-list-panel.tsx: `<TableHead>` + `<TableCell>` for "שם" get `className="hidden"` in both Court and Committee row variants. DB column preserved for future use. ## #47 — Audit script periodic New scripts/audit_corpus_integrity.py — 3 SQL checks (external+ערר prefix, internal missing chair/district, cases.practice_area enum) + CEO wakeup on violations + cron `0 7 * * *`. First run: 0 issues. ## #48 — Parent-doc retrieval (gated, default off) Schema V17: precedent_chunks.parent_chunk_id + chunk_role ('child'|'parent'). New chunker.chunk_document_hierarchical() — section-aware parents (~1500 tokens) containing ~5 overlapping children (~300 tokens each). New db.store_precedent_chunks_hierarchical two-pass writer. Search SQL (semantic + lexical) LEFT-JOIN parent and swap content + dedupe by parent_chunk_id when flag on. Toggle: PARENT_DOC_RETRIEVAL_ENABLED + PARENT_DOC_{CHILD,PARENT}_SIZE_TOKENS. Backfill ~3min and ~$0.20 — deferred to follow-up. ## #49 — Multimodal backfill New scripts/backfill_multimodal_precedents.py with token-matching case_number ↔ source files (PDF + DOCX via PyMuPDF). Ran in container: 26 precedents embedded, 503 pages, $0.21, 0 errors. precedent_image_embeddings grew 3 → 29 rows. 44 remaining are style_corpus-migrated rows (no source file on disk) — will catch up when re-uploaded. ## #50 — Closed-loop feedback + nDCG Schema V18: search_logs + search_relevance_feedback. New telemetry.py with fire-and-forget log_search_bg (p50 = 0.002ms — zero overhead) + auto-infer_relevance_from_citations (reads case drafts → marks score=3 when cited precedent appears in past search top-K). Hooks added to 5 search paths. scripts/compute_ndcg.py for aggregation. Two admin API endpoints (GET /api/admin/rag-metrics + POST .../infer). Dashboard UI deferred — API is enough for now. ## #51 — Halacha quality monitoring New scripts/monitor_halacha_quality.py — baseline avg confidence (trusted=0.849, all=0.833, pending=0.694) with rolling window drift detection. Default 5% threshold. Exits non-zero on alert for cron integration. Recommended: `0 8 * * 1` weekly Mon 8am. ## Bonus: 230 unlinked citations → missing_precedents Bulk-imported 230 distinct unlinked citations from precedent_internal_citations to missing_precedents.status='open', party='committee', with notes listing source citers. Top candidate: ע"א 3213/97 (cited 5x). Total open missing_precedents now 237. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
392 lines
13 KiB
Python
392 lines
13 KiB
Python
"""RAG retrieval telemetry — closed-loop feedback (TaskMaster #50).
|
|
|
|
Logs every semantic search call so we can compute nDCG@10 over time,
|
|
spot retrieval drift, and feed the rerank training set.
|
|
|
|
Design notes
|
|
------------
|
|
- **All writes are fire-and-forget**: callers wrap us in ``try/except``
|
|
but we also swallow our own DB errors so a telemetry hiccup can never
|
|
fail a search. The log itself is also written via a detached task —
|
|
the search returns to the caller immediately and the row lands in
|
|
the DB on the side.
|
|
|
|
- **search_decisions / search_case_documents** return document chunks
|
|
from active cases, not ``case_law`` rows. Their telemetry rows leave
|
|
``top_case_law_ids`` empty; nDCG aggregation ignores them.
|
|
|
|
- **Auto-inferred feedback**: once a final decision is exported, we
|
|
scan its ``decision_paragraphs.citations`` JSONB, pull the
|
|
``case_law_id`` values, and mark them as ``relevance_score=3`` on
|
|
any search_log for the same case where the precedent appeared in
|
|
the top-K. This gives us a "cited == relevant" ground truth signal
|
|
without asking the chair to label results by hand.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import logging
|
|
from typing import Any, Iterable
|
|
from uuid import UUID
|
|
|
|
from legal_mcp.services import db
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
_VALID_SOURCES = {"cited_in_decision", "chair_marked", "auto_inferred"}
|
|
|
|
|
|
def _coerce_case_law_ids(results: Iterable[Any], limit: int = 10) -> list[UUID]:
|
|
"""Pull up to ``limit`` ``case_law_id`` UUIDs from search results.
|
|
|
|
Tolerates rows missing the field, non-UUID strings, and ``None``
|
|
values. Preserves order (= ranking).
|
|
"""
|
|
out: list[UUID] = []
|
|
seen: set[str] = set()
|
|
for r in results:
|
|
if len(out) >= limit:
|
|
break
|
|
if not isinstance(r, dict):
|
|
continue
|
|
raw = r.get("case_law_id")
|
|
if raw is None:
|
|
continue
|
|
s = str(raw)
|
|
if s in seen:
|
|
continue
|
|
try:
|
|
out.append(UUID(s))
|
|
seen.add(s)
|
|
except (ValueError, AttributeError):
|
|
continue
|
|
return out
|
|
|
|
|
|
async def _insert_log(
|
|
*,
|
|
search_type: str,
|
|
query: str,
|
|
practice_area: str | None,
|
|
case_id: UUID | None,
|
|
user_agent: str | None,
|
|
result_count: int,
|
|
top_case_law_ids: list[UUID],
|
|
duration_ms: int | None,
|
|
) -> UUID | None:
|
|
try:
|
|
pool = await db.get_pool()
|
|
async with pool.acquire() as conn:
|
|
row = await conn.fetchrow(
|
|
"""
|
|
INSERT INTO search_logs (
|
|
search_type, query, practice_area, case_id,
|
|
user_agent, result_count, top_case_law_ids,
|
|
duration_ms
|
|
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
|
|
RETURNING id
|
|
""",
|
|
search_type,
|
|
query[:2000], # guard against pathologically long queries
|
|
practice_area or None,
|
|
case_id,
|
|
user_agent or None,
|
|
int(result_count),
|
|
top_case_law_ids or None,
|
|
duration_ms,
|
|
)
|
|
return row["id"] if row else None
|
|
except Exception:
|
|
logger.exception("telemetry.log_search: insert failed (swallowed)")
|
|
return None
|
|
|
|
|
|
async def log_search(
|
|
*,
|
|
search_type: str,
|
|
query: str,
|
|
results: Iterable[dict],
|
|
duration_ms: int | None = None,
|
|
practice_area: str | None = None,
|
|
case_id: UUID | str | None = None,
|
|
user_agent: str | None = None,
|
|
) -> UUID | None:
|
|
"""Record a search call. Never raises.
|
|
|
|
Args:
|
|
search_type: one of 'precedent_library', 'internal_decisions',
|
|
'decisions', 'case_documents', 'similar_cases'.
|
|
query: the raw user query.
|
|
results: iterable of result dicts. We pull ``case_law_id`` from
|
|
the first 10 to populate ``top_case_law_ids``.
|
|
duration_ms: search latency in milliseconds.
|
|
practice_area: optional filter applied to the search.
|
|
case_id: optional case context (when the search was scoped to
|
|
or triggered from a specific case).
|
|
user_agent: 'writer' / 'researcher' / 'analyst' / 'manual'.
|
|
|
|
Returns:
|
|
The ``search_logs.id`` UUID if the row was written, else None.
|
|
Most callers ignore this; auto-inference uses it later via
|
|
``infer_relevance_from_citations``.
|
|
"""
|
|
# Snapshot results immediately — callers may keep iterating.
|
|
snapshot = list(results) if not isinstance(results, list) else results
|
|
top_ids = _coerce_case_law_ids(snapshot, limit=10)
|
|
|
|
case_uuid: UUID | None
|
|
if case_id is None:
|
|
case_uuid = None
|
|
elif isinstance(case_id, UUID):
|
|
case_uuid = case_id
|
|
else:
|
|
try:
|
|
case_uuid = UUID(str(case_id))
|
|
except (ValueError, AttributeError):
|
|
case_uuid = None
|
|
|
|
return await _insert_log(
|
|
search_type=search_type,
|
|
query=query,
|
|
practice_area=practice_area,
|
|
case_id=case_uuid,
|
|
user_agent=user_agent,
|
|
result_count=len(snapshot),
|
|
top_case_law_ids=top_ids,
|
|
duration_ms=duration_ms,
|
|
)
|
|
|
|
|
|
def log_search_bg(
|
|
*,
|
|
search_type: str,
|
|
query: str,
|
|
results: Iterable[dict],
|
|
duration_ms: int | None = None,
|
|
practice_area: str | None = None,
|
|
case_id: UUID | str | None = None,
|
|
user_agent: str | None = None,
|
|
) -> None:
|
|
"""Fire-and-forget variant. Schedules the insert as a detached task.
|
|
|
|
Use this from hot search paths so the caller returns to the user
|
|
immediately. Errors are logged inside ``log_search``.
|
|
"""
|
|
# Snapshot eagerly so the caller can mutate/iterate results freely.
|
|
snapshot = list(results) if not isinstance(results, list) else list(results)
|
|
try:
|
|
loop = asyncio.get_running_loop()
|
|
except RuntimeError:
|
|
# No running loop — caller is sync. Best-effort: skip telemetry.
|
|
return
|
|
loop.create_task(
|
|
log_search(
|
|
search_type=search_type,
|
|
query=query,
|
|
results=snapshot,
|
|
duration_ms=duration_ms,
|
|
practice_area=practice_area,
|
|
case_id=case_id,
|
|
user_agent=user_agent,
|
|
)
|
|
)
|
|
|
|
|
|
# ──────────────────────────────────────────────────────────────────────
|
|
# Auto-inferred relevance feedback
|
|
# ──────────────────────────────────────────────────────────────────────
|
|
|
|
|
|
def _extract_citations_from_jsonb(citations: Any) -> list[UUID]:
|
|
"""Parse ``decision_paragraphs.citations`` JSONB into UUID list.
|
|
|
|
Stored shape: ``[{"case_law_id": "...", "text": "...", "type": ...}]``.
|
|
Tolerates string form (asyncpg returns it as JSON string when the
|
|
column registration didn't auto-decode).
|
|
"""
|
|
import json as _json
|
|
|
|
if not citations:
|
|
return []
|
|
if isinstance(citations, (bytes, bytearray)):
|
|
try:
|
|
citations = _json.loads(citations.decode("utf-8"))
|
|
except (ValueError, UnicodeDecodeError):
|
|
return []
|
|
elif isinstance(citations, str):
|
|
try:
|
|
citations = _json.loads(citations)
|
|
except ValueError:
|
|
return []
|
|
|
|
if not isinstance(citations, list):
|
|
return []
|
|
|
|
out: list[UUID] = []
|
|
seen: set[str] = set()
|
|
for item in citations:
|
|
if not isinstance(item, dict):
|
|
continue
|
|
raw = item.get("case_law_id")
|
|
if not raw:
|
|
continue
|
|
s = str(raw)
|
|
if s in seen:
|
|
continue
|
|
try:
|
|
out.append(UUID(s))
|
|
seen.add(s)
|
|
except (ValueError, AttributeError):
|
|
continue
|
|
return out
|
|
|
|
|
|
async def _gather_cited_case_law_ids(case_id: UUID) -> list[UUID]:
|
|
"""Pull every distinct ``case_law_id`` cited anywhere in the case's
|
|
decision paragraphs.
|
|
"""
|
|
pool = await db.get_pool()
|
|
async with pool.acquire() as conn:
|
|
rows = await conn.fetch(
|
|
"""
|
|
SELECT dp.citations
|
|
FROM decision_paragraphs dp
|
|
JOIN decision_blocks db ON db.id = dp.block_id
|
|
JOIN decisions d ON d.id = db.decision_id
|
|
WHERE d.case_id = $1
|
|
AND dp.citations IS NOT NULL
|
|
AND jsonb_array_length(dp.citations) > 0
|
|
""",
|
|
case_id,
|
|
)
|
|
seen: set[str] = set()
|
|
out: list[UUID] = []
|
|
for r in rows:
|
|
for clid in _extract_citations_from_jsonb(r["citations"]):
|
|
s = str(clid)
|
|
if s not in seen:
|
|
seen.add(s)
|
|
out.append(clid)
|
|
return out
|
|
|
|
|
|
async def infer_relevance_from_citations(
|
|
case_id: UUID | str,
|
|
*,
|
|
relevance_score: int = 3,
|
|
feedback_source: str = "cited_in_decision",
|
|
) -> dict:
|
|
"""For each precedent cited in the case's draft, write a relevance
|
|
row against every search_log where that precedent appeared in the
|
|
top-K for the same case.
|
|
|
|
Idempotent: the ``UNIQUE(search_log_id, case_law_id, feedback_source)``
|
|
constraint on ``search_relevance_feedback`` prevents duplicates.
|
|
|
|
Returns:
|
|
``{"cited_precedents": int, "feedback_rows_inserted": int,
|
|
"searches_matched": int}``.
|
|
"""
|
|
if relevance_score not in (0, 1, 2, 3):
|
|
raise ValueError("relevance_score must be in 0..3")
|
|
if feedback_source not in _VALID_SOURCES:
|
|
raise ValueError(f"feedback_source must be one of {_VALID_SOURCES!r}")
|
|
|
|
case_uuid = case_id if isinstance(case_id, UUID) else UUID(str(case_id))
|
|
|
|
cited = await _gather_cited_case_law_ids(case_uuid)
|
|
if not cited:
|
|
return {
|
|
"cited_precedents": 0,
|
|
"feedback_rows_inserted": 0,
|
|
"searches_matched": 0,
|
|
}
|
|
|
|
pool = await db.get_pool()
|
|
inserted = 0
|
|
matched_searches: set[str] = set()
|
|
|
|
async with pool.acquire() as conn:
|
|
# For each cited precedent, find all logs where it appeared in
|
|
# top_case_law_ids for this case, and record its rank.
|
|
for clid in cited:
|
|
rows = await conn.fetch(
|
|
"""
|
|
SELECT id, top_case_law_ids
|
|
FROM search_logs
|
|
WHERE case_id = $1
|
|
AND top_case_law_ids IS NOT NULL
|
|
AND $2 = ANY(top_case_law_ids)
|
|
""",
|
|
case_uuid,
|
|
clid,
|
|
)
|
|
for row in rows:
|
|
top_ids = row["top_case_law_ids"] or []
|
|
# asyncpg returns uuid[] as list[UUID]
|
|
try:
|
|
rank = top_ids.index(clid) + 1
|
|
except ValueError:
|
|
continue
|
|
result = await conn.execute(
|
|
"""
|
|
INSERT INTO search_relevance_feedback (
|
|
search_log_id, case_law_id, rank,
|
|
relevance_score, feedback_source
|
|
) VALUES ($1, $2, $3, $4, $5)
|
|
ON CONFLICT (search_log_id, case_law_id, feedback_source)
|
|
DO NOTHING
|
|
""",
|
|
row["id"],
|
|
clid,
|
|
rank,
|
|
relevance_score,
|
|
feedback_source,
|
|
)
|
|
# ``execute`` returns 'INSERT 0 1' or 'INSERT 0 0' for
|
|
# the no-op path; count only the writes.
|
|
if result.endswith(" 1"):
|
|
inserted += 1
|
|
matched_searches.add(str(row["id"]))
|
|
|
|
return {
|
|
"cited_precedents": len(cited),
|
|
"feedback_rows_inserted": inserted,
|
|
"searches_matched": len(matched_searches),
|
|
}
|
|
|
|
|
|
async def infer_relevance_for_all_finalized_cases(limit: int | None = None) -> dict:
|
|
"""Bulk-run auto-inference for every case whose draft is final/exported.
|
|
|
|
Useful for back-filling after V18 schema lands and a few decisions
|
|
have already been written. Skips cases with no cited precedents
|
|
silently (they contribute zero to the totals).
|
|
"""
|
|
pool = await db.get_pool()
|
|
sql = """
|
|
SELECT DISTINCT c.id
|
|
FROM cases c
|
|
JOIN decisions d ON d.case_id = c.id
|
|
WHERE c.status IN ('final', 'exported')
|
|
"""
|
|
if limit is not None and limit > 0:
|
|
sql += " LIMIT $1"
|
|
async with pool.acquire() as conn:
|
|
rows = await conn.fetch(sql, *([limit] if limit else []))
|
|
|
|
totals = {
|
|
"cases_processed": 0,
|
|
"cited_precedents": 0,
|
|
"feedback_rows_inserted": 0,
|
|
"searches_matched": 0,
|
|
}
|
|
for r in rows:
|
|
stats = await infer_relevance_from_citations(r["id"])
|
|
totals["cases_processed"] += 1
|
|
totals["cited_precedents"] += stats["cited_precedents"]
|
|
totals["feedback_rows_inserted"] += stats["feedback_rows_inserted"]
|
|
totals["searches_matched"] += stats["searches_matched"]
|
|
return totals
|