feat: external precedent library with auto halacha extraction
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m27s

Adds a third corpus of legal authority distinct from style_corpus
(Daphna's prior decisions for voice) and case_precedents (chair-attached
quotes per case). The new corpus holds chair-uploaded court rulings and
other appeals committee decisions, with binding rules (הלכות) extracted
automatically and queued for chair approval.

Pipeline (web/app.py + services/precedent_library.py):
file → extract → chunk → Voyage embed → halacha_extractor → store +
publish progress over the existing Redis SSE channel.

Schema V7 (services/db.py): extends case_law with source_kind +
extraction status fields under a CHECK constraint pinning practice_area
to the three appeals committee domains (rishuy_uvniya, betterment_levy,
compensation_197). New precedent_chunks (vector(1024)) and halachot
tables (vector(1024) over rule_statement, IVFFlat indexes, gin on
practice_areas/subject_tags). Halachot start as pending_review; only
approved/published rows are visible to search_precedent_library.

Agents: legal-writer, legal-researcher, legal-analyst, legal-ceo,
legal-qa get search_precedent_library. legal-writer prompt explains
the three-corpus distinction and CREAC use; legal-qa now verifies that
every cited halacha resolves to an approved row in the corpus.

UI: /precedents page with four tabs — library / semantic search /
pending review (J/K nav, A/R/E shortcuts, badge count) / stats.
Reuses the existing upload-sheet progress + SSE pattern.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-03 08:38:18 +00:00
parent a6edb75bbf
commit 7ee90dce31
23 changed files with 3853 additions and 67 deletions

View File

@@ -518,6 +518,91 @@ CREATE INDEX IF NOT EXISTS idx_cases_archived ON cases(archived_at) WHERE archiv
"""
# ── V7: External Precedent Library + halacha extraction ──────────
# Chair-uploaded external court rulings and other appeals committee decisions
# become an authoritative law corpus. Distinct from style_corpus (Daphna's
# style) and case_precedents (chair-attached quotes scoped to a single case).
SCHEMA_V7_SQL = """
-- case_law extensions: distinguish chair-uploaded full rulings from
-- auto-extracted citation stubs, and track ingestion progress.
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS source_kind TEXT DEFAULT 'cited_only';
-- 'external_upload' (chair uploaded full ruling) | 'cited_only' (stub from
-- references_extractor) | 'nevo_seed' (future: auto-fetched from Nevo).
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS document_id UUID REFERENCES documents(id) ON DELETE SET NULL;
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS extraction_status TEXT DEFAULT 'pending';
-- 'pending' | 'processing' | 'completed' | 'failed'
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS halacha_extraction_status TEXT DEFAULT 'pending';
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS practice_area TEXT DEFAULT '';
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS appeal_subtype TEXT DEFAULT '';
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS headnote TEXT DEFAULT '';
-- chair-editable abstract shown in search results.
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS source_type TEXT DEFAULT '';
-- 'court_ruling' | 'appeals_committee'
-- practice_area is closed to the three appeals committee domains.
DO $$ BEGIN
ALTER TABLE case_law ADD CONSTRAINT case_law_practice_area_check
CHECK (practice_area IN ('', 'rishuy_uvniya', 'betterment_levy', 'compensation_197'));
EXCEPTION WHEN duplicate_object THEN NULL; END $$;
CREATE INDEX IF NOT EXISTS idx_case_law_source_kind ON case_law(source_kind);
CREATE INDEX IF NOT EXISTS idx_case_law_practice ON case_law(practice_area, appeal_subtype);
-- precedent_chunks: full-text chunks of an uploaded ruling, with embeddings.
-- Analog of document_chunks for case_law rows where source_kind='external_upload'.
CREATE TABLE IF NOT EXISTS precedent_chunks (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
case_law_id UUID REFERENCES case_law(id) ON DELETE CASCADE,
chunk_index INTEGER NOT NULL,
content TEXT NOT NULL,
section_type TEXT DEFAULT 'other',
-- intro | facts | legal_analysis | ruling | conclusion | other
page_number INTEGER,
embedding vector(1024),
created_at TIMESTAMPTZ DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_precedent_chunks_case_law ON precedent_chunks(case_law_id);
CREATE INDEX IF NOT EXISTS idx_precedent_chunks_section ON precedent_chunks(case_law_id, section_type);
CREATE INDEX IF NOT EXISTS idx_precedent_chunks_vec
ON precedent_chunks USING ivfflat (embedding vector_cosine_ops) WITH (lists = 50);
-- halachot: extracted binding rules. One halacha = one rule + verbatim quote.
-- Embedded separately for rule-precision semantic match (chunks centroid is
-- dominated by surrounding context). All halachot start as pending_review;
-- only approved/published rows are visible to search_precedent_library.
CREATE TABLE IF NOT EXISTS halachot (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
case_law_id UUID REFERENCES case_law(id) ON DELETE CASCADE,
halacha_index INTEGER NOT NULL,
rule_statement TEXT NOT NULL,
rule_type TEXT DEFAULT 'binding',
-- binding | interpretive | procedural | obiter
reasoning_summary TEXT DEFAULT '',
supporting_quote TEXT NOT NULL,
page_reference TEXT DEFAULT '',
practice_areas TEXT[] DEFAULT '{}',
subject_tags TEXT[] DEFAULT '{}',
cites TEXT[] DEFAULT '{}',
confidence NUMERIC(3,2) DEFAULT 0.0,
quote_verified BOOLEAN DEFAULT FALSE,
review_status TEXT DEFAULT 'pending_review',
-- pending_review | approved | rejected | published
reviewer TEXT DEFAULT '',
reviewed_at TIMESTAMPTZ,
embedding vector(1024),
created_at TIMESTAMPTZ DEFAULT now(),
updated_at TIMESTAMPTZ DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_halachot_case_law ON halachot(case_law_id);
CREATE INDEX IF NOT EXISTS idx_halachot_status ON halachot(review_status);
CREATE INDEX IF NOT EXISTS idx_halachot_practice ON halachot USING gin(practice_areas);
CREATE INDEX IF NOT EXISTS idx_halachot_tags ON halachot USING gin(subject_tags);
CREATE INDEX IF NOT EXISTS idx_halachot_vec
ON halachot USING ivfflat (embedding vector_cosine_ops) WITH (lists = 50);
"""
async def init_schema() -> None:
pool = await get_pool()
async with pool.acquire() as conn:
@@ -528,7 +613,8 @@ async def init_schema() -> None:
await conn.execute(SCHEMA_V4_SQL)
await conn.execute(SCHEMA_V5_SQL)
await conn.execute(SCHEMA_V6_SQL)
logger.info("Database schema initialized (v1-v6)")
await conn.execute(SCHEMA_V7_SQL)
logger.info("Database schema initialized (v1-v7)")
# ── Case CRUD ───────────────────────────────────────────────────────
@@ -1518,3 +1604,590 @@ async def detect_appraiser_conflicts(case_id: UUID) -> list[dict]:
"entries": entries,
})
return conflicts
# ── V7: External precedent library + halachot ─────────────────────
def _row_to_case_law(row: asyncpg.Record) -> dict:
"""Normalize a case_law row, parsing subject_tags JSONB to list."""
d = dict(row)
if isinstance(d.get("subject_tags"), str):
try:
d["subject_tags"] = json.loads(d["subject_tags"])
except (TypeError, ValueError):
d["subject_tags"] = []
if d.get("date") is not None:
d["date"] = d["date"].isoformat()
return d
async def get_case_law(case_law_id: UUID) -> dict | None:
pool = await get_pool()
row = await pool.fetchrow(
"SELECT * FROM case_law WHERE id = $1", case_law_id,
)
return _row_to_case_law(row) if row else None
async def get_case_law_by_citation(case_number: str) -> dict | None:
pool = await get_pool()
row = await pool.fetchrow(
"SELECT * FROM case_law WHERE case_number = $1", case_number,
)
return _row_to_case_law(row) if row else None
async def create_external_case_law(
case_number: str,
case_name: str,
full_text: str,
court: str = "",
decision_date: date | None = None,
practice_area: str = "",
appeal_subtype: str = "",
subject_tags: list[str] | None = None,
summary: str = "",
headnote: str = "",
key_quote: str = "",
source_url: str = "",
source_type: str = "",
precedent_level: str = "",
is_binding: bool = True,
document_id: UUID | None = None,
) -> dict:
"""Insert a chair-uploaded external precedent into case_law.
If a row with this ``case_number`` already exists with
source_kind='cited_only' (auto-discovered), promote it to
source_kind='external_upload' and fill in the missing fields.
"""
pool = await get_pool()
tags_json = json.dumps(subject_tags or [], ensure_ascii=False)
async with pool.acquire() as conn:
existing = await conn.fetchrow(
"SELECT id, source_kind FROM case_law WHERE case_number = $1",
case_number,
)
if existing:
row = await conn.fetchrow(
"""
UPDATE case_law SET
case_name = $2,
court = COALESCE(NULLIF($3, ''), court),
date = COALESCE($4, date),
practice_area = $5,
appeal_subtype = $6,
subject_tags = $7,
summary = COALESCE(NULLIF($8, ''), summary),
headnote = $9,
key_quote = COALESCE(NULLIF($10, ''), key_quote),
full_text = $11,
source_url = COALESCE(NULLIF($12, ''), source_url),
source_type = $13,
precedent_level = $14,
is_binding = $15,
document_id = COALESCE($16, document_id),
source_kind = 'external_upload',
extraction_status = 'processing',
halacha_extraction_status = 'pending'
WHERE id = $1
RETURNING *
""",
existing["id"], case_name, court, decision_date,
practice_area, appeal_subtype, tags_json, summary, headnote,
key_quote, full_text, source_url, source_type,
precedent_level, is_binding, document_id,
)
else:
row = await conn.fetchrow(
"""
INSERT INTO case_law (
case_number, case_name, court, date, subject_tags,
summary, key_quote, full_text, source_url,
source_kind, document_id, extraction_status,
halacha_extraction_status, practice_area, appeal_subtype,
headnote, source_type, precedent_level, is_binding
) VALUES (
$1, $2, $3, $4, $5, $6, $7, $8, $9,
'external_upload', $10, 'processing', 'pending',
$11, $12, $13, $14, $15, $16
)
RETURNING *
""",
case_number, case_name, court, decision_date, tags_json,
summary, key_quote, full_text, source_url,
document_id, practice_area, appeal_subtype, headnote,
source_type, precedent_level, is_binding,
)
return _row_to_case_law(row)
async def update_case_law(case_law_id: UUID, **fields) -> dict | None:
"""Patch metadata fields on a case_law row.
Allowed fields: case_name, court, date, practice_area, appeal_subtype,
subject_tags, summary, headnote, key_quote, source_url, source_type,
precedent_level, is_binding.
"""
allowed = {
"case_name", "court", "date", "practice_area", "appeal_subtype",
"subject_tags", "summary", "headnote", "key_quote", "source_url",
"source_type", "precedent_level", "is_binding",
}
updates = {k: v for k, v in fields.items() if k in allowed}
if not updates:
return await get_case_law(case_law_id)
pool = await get_pool()
set_parts = []
params: list = [case_law_id]
for i, (k, v) in enumerate(updates.items(), start=2):
if k == "subject_tags":
v = json.dumps(v or [], ensure_ascii=False)
set_parts.append(f"{k} = ${i}")
params.append(v)
sql = f"UPDATE case_law SET {', '.join(set_parts)} WHERE id = $1 RETURNING *"
row = await pool.fetchrow(sql, *params)
return _row_to_case_law(row) if row else None
async def set_case_law_extraction_status(case_law_id: UUID, status: str) -> None:
pool = await get_pool()
await pool.execute(
"UPDATE case_law SET extraction_status = $2 WHERE id = $1",
case_law_id, status,
)
async def set_case_law_halacha_status(case_law_id: UUID, status: str) -> None:
pool = await get_pool()
await pool.execute(
"UPDATE case_law SET halacha_extraction_status = $2 WHERE id = $1",
case_law_id, status,
)
async def list_external_case_law(
practice_area: str = "",
court: str = "",
precedent_level: str = "",
source_type: str = "",
search: str = "",
limit: int = 100,
offset: int = 0,
) -> list[dict]:
"""List chair-uploaded precedents, with simple filters."""
pool = await get_pool()
conditions = ["source_kind = 'external_upload'"]
params: list = []
idx = 1
if practice_area:
conditions.append(f"practice_area = ${idx}")
params.append(practice_area)
idx += 1
if court:
conditions.append(f"court ILIKE ${idx}")
params.append(f"%{court}%")
idx += 1
if precedent_level:
conditions.append(f"precedent_level = ${idx}")
params.append(precedent_level)
idx += 1
if source_type:
conditions.append(f"source_type = ${idx}")
params.append(source_type)
idx += 1
if search:
conditions.append(
f"(case_number ILIKE ${idx} OR case_name ILIKE ${idx} "
f"OR summary ILIKE ${idx} OR headnote ILIKE ${idx})"
)
params.append(f"%{search}%")
idx += 1
where_sql = " AND ".join(conditions)
params.extend([limit, offset])
sql = f"""
SELECT id, case_number, case_name, court, date, practice_area,
appeal_subtype, source_type, precedent_level, is_binding,
summary, headnote, subject_tags, source_kind,
extraction_status, halacha_extraction_status,
created_at,
(SELECT COUNT(*) FROM halachot h WHERE h.case_law_id = case_law.id) AS halachot_count,
(SELECT COUNT(*) FROM halachot h WHERE h.case_law_id = case_law.id
AND h.review_status IN ('approved', 'published')) AS approved_count
FROM case_law
WHERE {where_sql}
ORDER BY created_at DESC
LIMIT ${idx} OFFSET ${idx + 1}
"""
rows = await pool.fetch(sql, *params)
return [_row_to_case_law(r) for r in rows]
async def delete_case_law(case_law_id: UUID) -> bool:
"""Delete a precedent and cascade chunks + halachot."""
pool = await get_pool()
result = await pool.execute(
"DELETE FROM case_law WHERE id = $1", case_law_id,
)
return result == "DELETE 1"
async def store_precedent_chunks(
case_law_id: UUID, chunks: list[dict],
) -> int:
"""Replace precedent chunks for a case_law row.
Each chunk dict has: chunk_index, content, section_type, page_number,
embedding (list[float] or None).
"""
pool = await get_pool()
async with pool.acquire() as conn:
await conn.execute(
"DELETE FROM precedent_chunks WHERE case_law_id = $1",
case_law_id,
)
for c in chunks:
await conn.execute(
"""INSERT INTO precedent_chunks
(case_law_id, chunk_index, content, section_type,
page_number, embedding)
VALUES ($1, $2, $3, $4, $5, $6)""",
case_law_id,
c["chunk_index"],
c["content"],
c.get("section_type", "other"),
c.get("page_number"),
c.get("embedding"),
)
return len(chunks)
async def list_precedent_chunks(
case_law_id: UUID,
section_types: tuple[str, ...] | None = None,
) -> list[dict]:
pool = await get_pool()
if section_types:
rows = await pool.fetch(
"""SELECT id, chunk_index, content, section_type, page_number
FROM precedent_chunks
WHERE case_law_id = $1 AND section_type = ANY($2::text[])
ORDER BY chunk_index""",
case_law_id, list(section_types),
)
else:
rows = await pool.fetch(
"""SELECT id, chunk_index, content, section_type, page_number
FROM precedent_chunks
WHERE case_law_id = $1
ORDER BY chunk_index""",
case_law_id,
)
return [dict(r) for r in rows]
async def delete_halachot(case_law_id: UUID) -> int:
pool = await get_pool()
result = await pool.execute(
"DELETE FROM halachot WHERE case_law_id = $1", case_law_id,
)
# result is e.g. "DELETE 5" — extract the number.
try:
return int(result.split()[-1])
except (ValueError, IndexError):
return 0
async def store_halachot(case_law_id: UUID, halachot: list[dict]) -> int:
"""Bulk-insert extracted halachot. Always with review_status='pending_review'."""
if not halachot:
return 0
pool = await get_pool()
async with pool.acquire() as conn:
for i, h in enumerate(halachot):
await conn.execute(
"""INSERT INTO halachot
(case_law_id, halacha_index, rule_statement, rule_type,
reasoning_summary, supporting_quote, page_reference,
practice_areas, subject_tags, cites, confidence,
quote_verified, embedding, review_status)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11,
$12, $13, 'pending_review')""",
case_law_id,
i,
h["rule_statement"],
h.get("rule_type", "binding"),
h.get("reasoning_summary", ""),
h["supporting_quote"],
h.get("page_reference", ""),
h.get("practice_areas", []),
h.get("subject_tags", []),
h.get("cites", []),
h.get("confidence", 0.0),
h.get("quote_verified", False),
h.get("embedding"),
)
return len(halachot)
async def list_halachot(
case_law_id: UUID | None = None,
review_status: str | None = None,
practice_area: str | None = None,
limit: int = 200,
offset: int = 0,
) -> list[dict]:
pool = await get_pool()
conditions = []
params: list = []
idx = 1
if case_law_id is not None:
conditions.append(f"h.case_law_id = ${idx}")
params.append(case_law_id)
idx += 1
if review_status:
conditions.append(f"h.review_status = ${idx}")
params.append(review_status)
idx += 1
if practice_area:
conditions.append(f"${idx} = ANY(h.practice_areas)")
params.append(practice_area)
idx += 1
where_sql = f"WHERE {' AND '.join(conditions)}" if conditions else ""
params.extend([limit, offset])
sql = f"""
SELECT h.id, h.case_law_id, h.halacha_index, h.rule_statement,
h.rule_type, h.reasoning_summary, h.supporting_quote,
h.page_reference, h.practice_areas, h.subject_tags,
h.cites, h.confidence, h.quote_verified, h.review_status,
h.reviewer, h.reviewed_at, h.created_at, h.updated_at,
cl.case_number, cl.case_name, cl.court, cl.date AS decision_date,
cl.precedent_level
FROM halachot h
LEFT JOIN case_law cl ON cl.id = h.case_law_id
{where_sql}
ORDER BY h.case_law_id, h.halacha_index
LIMIT ${idx} OFFSET ${idx + 1}
"""
rows = await pool.fetch(sql, *params)
out = []
for r in rows:
d = dict(r)
if d.get("decision_date") is not None:
d["decision_date"] = d["decision_date"].isoformat()
out.append(d)
return out
async def update_halacha(
halacha_id: UUID,
review_status: str | None = None,
reviewer: str = "",
rule_statement: str | None = None,
reasoning_summary: str | None = None,
subject_tags: list[str] | None = None,
practice_areas: list[str] | None = None,
) -> dict | None:
"""Update a halacha — used by the chair to approve/reject/edit."""
pool = await get_pool()
set_parts: list[str] = []
params: list = [halacha_id]
idx = 2
if review_status is not None:
set_parts.append(f"review_status = ${idx}")
params.append(review_status)
idx += 1
if review_status in ("approved", "rejected", "published"):
set_parts.append(f"reviewed_at = now()")
set_parts.append(f"reviewer = ${idx}")
params.append(reviewer)
idx += 1
if rule_statement is not None:
set_parts.append(f"rule_statement = ${idx}")
params.append(rule_statement)
idx += 1
if reasoning_summary is not None:
set_parts.append(f"reasoning_summary = ${idx}")
params.append(reasoning_summary)
idx += 1
if subject_tags is not None:
set_parts.append(f"subject_tags = ${idx}")
params.append(subject_tags)
idx += 1
if practice_areas is not None:
set_parts.append(f"practice_areas = ${idx}")
params.append(practice_areas)
idx += 1
if not set_parts:
return None
set_parts.append("updated_at = now()")
sql = f"UPDATE halachot SET {', '.join(set_parts)} WHERE id = $1 RETURNING *"
row = await pool.fetchrow(sql, *params)
return dict(row) if row else None
async def search_precedent_library_semantic(
query_embedding: list[float],
practice_area: str = "",
court: str = "",
precedent_level: str = "",
appeal_subtype: str = "",
is_binding: bool | None = None,
subject_tag: str = "",
limit: int = 10,
include_halachot: bool = True,
) -> list[dict]:
"""Semantic search over chair-uploaded precedents.
Returns merged halachot + chunks. Halachot are pre-distilled rules, so
they get a small score boost. Only ``approved`` / ``published`` halachot
are visible (per chair-review policy). Chunks are visible regardless
of halacha review status.
"""
pool = await get_pool()
halacha_filters = ["h.review_status IN ('approved', 'published')"]
chunk_filters = ["cl.source_kind = 'external_upload'"]
h_params: list = [query_embedding, limit]
c_params: list = [query_embedding, limit]
h_idx = 3
c_idx = 3
if practice_area:
halacha_filters.append(f"${h_idx} = ANY(h.practice_areas)")
h_params.append(practice_area)
h_idx += 1
chunk_filters.append(f"cl.practice_area = ${c_idx}")
c_params.append(practice_area)
c_idx += 1
if court:
halacha_filters.append(f"cl.court ILIKE ${h_idx}")
h_params.append(f"%{court}%")
h_idx += 1
chunk_filters.append(f"cl.court ILIKE ${c_idx}")
c_params.append(f"%{court}%")
c_idx += 1
if precedent_level:
halacha_filters.append(f"cl.precedent_level = ${h_idx}")
h_params.append(precedent_level)
h_idx += 1
chunk_filters.append(f"cl.precedent_level = ${c_idx}")
c_params.append(precedent_level)
c_idx += 1
if appeal_subtype:
halacha_filters.append(f"cl.appeal_subtype = ${h_idx}")
h_params.append(appeal_subtype)
h_idx += 1
chunk_filters.append(f"cl.appeal_subtype = ${c_idx}")
c_params.append(appeal_subtype)
c_idx += 1
if is_binding is not None:
halacha_filters.append(f"cl.is_binding = ${h_idx}")
h_params.append(is_binding)
h_idx += 1
chunk_filters.append(f"cl.is_binding = ${c_idx}")
c_params.append(is_binding)
c_idx += 1
if subject_tag:
halacha_filters.append(f"${h_idx} = ANY(h.subject_tags)")
h_params.append(subject_tag)
h_idx += 1
halacha_sql = f"""
SELECT h.id AS halacha_id, h.case_law_id, h.rule_statement,
h.reasoning_summary, h.supporting_quote, h.page_reference,
h.practice_areas, h.subject_tags, h.confidence, h.rule_type,
cl.case_number, cl.case_name, cl.court, cl.date AS decision_date,
cl.precedent_level,
1 - (h.embedding <=> $1) AS score
FROM halachot h
JOIN case_law cl ON cl.id = h.case_law_id
WHERE {' AND '.join(halacha_filters)}
AND h.embedding IS NOT NULL
ORDER BY h.embedding <=> $1
LIMIT $2
"""
chunk_sql = f"""
SELECT pc.id AS chunk_id, pc.case_law_id, pc.content,
pc.section_type, pc.page_number,
cl.case_number, cl.case_name, cl.court, cl.date AS decision_date,
cl.precedent_level, cl.practice_area,
1 - (pc.embedding <=> $1) AS score
FROM precedent_chunks pc
JOIN case_law cl ON cl.id = pc.case_law_id
WHERE {' AND '.join(chunk_filters)}
AND pc.embedding IS NOT NULL
ORDER BY pc.embedding <=> $1
LIMIT $2
"""
results: list[dict] = []
if include_halachot:
rows = await pool.fetch(halacha_sql, *h_params)
for r in rows:
d = dict(r)
if d.get("decision_date") is not None:
d["decision_date"] = d["decision_date"].isoformat()
d["score"] = float(d["score"]) + 0.05 # rule-level boost
d["type"] = "halacha"
results.append(d)
rows = await pool.fetch(chunk_sql, *c_params)
for r in rows:
d = dict(r)
if d.get("decision_date") is not None:
d["decision_date"] = d["decision_date"].isoformat()
d["score"] = float(d["score"])
d["type"] = "passage"
results.append(d)
results.sort(key=lambda x: x["score"], reverse=True)
return results[:limit]
async def precedent_library_stats() -> dict:
"""Aggregate stats for the /precedents stats tab."""
pool = await get_pool()
async with pool.acquire() as conn:
total = await conn.fetchval(
"SELECT COUNT(*) FROM case_law WHERE source_kind = 'external_upload'"
)
by_practice = await conn.fetch(
"""SELECT practice_area, COUNT(*) AS n
FROM case_law
WHERE source_kind = 'external_upload'
GROUP BY practice_area
ORDER BY n DESC"""
)
by_level = await conn.fetch(
"""SELECT precedent_level, COUNT(*) AS n
FROM case_law
WHERE source_kind = 'external_upload'
GROUP BY precedent_level
ORDER BY n DESC"""
)
halachot_total = await conn.fetchval(
"SELECT COUNT(*) FROM halachot"
)
halachot_pending = await conn.fetchval(
"SELECT COUNT(*) FROM halachot WHERE review_status = 'pending_review'"
)
halachot_approved = await conn.fetchval(
"SELECT COUNT(*) FROM halachot WHERE review_status IN ('approved', 'published')"
)
return {
"precedents_total": int(total or 0),
"by_practice_area": [
{"practice_area": r["practice_area"], "count": int(r["n"])}
for r in by_practice
],
"by_precedent_level": [
{"precedent_level": r["precedent_level"], "count": int(r["n"])}
for r in by_level
],
"halachot_total": int(halachot_total or 0),
"halachot_pending": int(halachot_pending or 0),
"halachot_approved": int(halachot_approved or 0),
}