feat: external precedent library with auto halacha extraction

Adds a third corpus of legal authority distinct from style_corpus (Daphna's prior decisions for voice) and case_precedents (chair-attached quotes per case). The new corpus holds chair-uploaded court rulings and other appeals committee decisions, with binding rules (הלכות) extracted automatically and queued for chair approval. Pipeline (web/app.py + services/precedent_library.py): file → extract → chunk → Voyage embed → halacha_extractor → store + publish progress over the existing Redis SSE channel. Schema V7 (services/db.py): extends case_law with source_kind + extraction status fields under a CHECK constraint pinning practice_area to the three appeals committee domains (rishuy_uvniya, betterment_levy, compensation_197). New precedent_chunks (vector(1024)) and halachot tables (vector(1024) over rule_statement, IVFFlat indexes, gin on practice_areas/subject_tags). Halachot start as pending_review; only approved/published rows are visible to search_precedent_library. Agents: legal-writer, legal-researcher, legal-analyst, legal-ceo, legal-qa get search_precedent_library. legal-writer prompt explains the three-corpus distinction and CREAC use; legal-qa now verifies that every cited halacha resolves to an approved row in the corpus. UI: /precedents page with four tabs — library / semantic search / pending review (J/K nav, A/R/E shortcuts, badge count) / stats. Reuses the existing upload-sheet progress + SSE pattern. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-03 08:38:18 +00:00
parent a6edb75bbf
commit 7ee90dce31
23 changed files with 3853 additions and 67 deletions
--- a/mcp-server/src/legal_mcp/services/db.py
+++ b/mcp-server/src/legal_mcp/services/db.py
@@ -518,6 +518,91 @@ CREATE INDEX IF NOT EXISTS idx_cases_archived ON cases(archived_at) WHERE archiv
 """


+# ── V7: External Precedent Library + halacha extraction ──────────
+# Chair-uploaded external court rulings and other appeals committee decisions
+# become an authoritative law corpus. Distinct from style_corpus (Daphna's
+# style) and case_precedents (chair-attached quotes scoped to a single case).
+
+SCHEMA_V7_SQL = """
+-- case_law extensions: distinguish chair-uploaded full rulings from
+-- auto-extracted citation stubs, and track ingestion progress.
+ALTER TABLE case_law ADD COLUMN IF NOT EXISTS source_kind TEXT DEFAULT 'cited_only';
+    -- 'external_upload' (chair uploaded full ruling) | 'cited_only' (stub from
+    -- references_extractor) | 'nevo_seed' (future: auto-fetched from Nevo).
+ALTER TABLE case_law ADD COLUMN IF NOT EXISTS document_id UUID REFERENCES documents(id) ON DELETE SET NULL;
+ALTER TABLE case_law ADD COLUMN IF NOT EXISTS extraction_status TEXT DEFAULT 'pending';
+    -- 'pending' | 'processing' | 'completed' | 'failed'
+ALTER TABLE case_law ADD COLUMN IF NOT EXISTS halacha_extraction_status TEXT DEFAULT 'pending';
+ALTER TABLE case_law ADD COLUMN IF NOT EXISTS practice_area TEXT DEFAULT '';
+ALTER TABLE case_law ADD COLUMN IF NOT EXISTS appeal_subtype TEXT DEFAULT '';
+ALTER TABLE case_law ADD COLUMN IF NOT EXISTS headnote TEXT DEFAULT '';
+    -- chair-editable abstract shown in search results.
+ALTER TABLE case_law ADD COLUMN IF NOT EXISTS source_type TEXT DEFAULT '';
+    -- 'court_ruling' | 'appeals_committee'
+
+-- practice_area is closed to the three appeals committee domains.
+DO $$ BEGIN
+    ALTER TABLE case_law ADD CONSTRAINT case_law_practice_area_check
+        CHECK (practice_area IN ('', 'rishuy_uvniya', 'betterment_levy', 'compensation_197'));
+EXCEPTION WHEN duplicate_object THEN NULL; END $$;
+
+CREATE INDEX IF NOT EXISTS idx_case_law_source_kind ON case_law(source_kind);
+CREATE INDEX IF NOT EXISTS idx_case_law_practice ON case_law(practice_area, appeal_subtype);
+
+-- precedent_chunks: full-text chunks of an uploaded ruling, with embeddings.
+-- Analog of document_chunks for case_law rows where source_kind='external_upload'.
+CREATE TABLE IF NOT EXISTS precedent_chunks (
+    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+    case_law_id UUID REFERENCES case_law(id) ON DELETE CASCADE,
+    chunk_index INTEGER NOT NULL,
+    content TEXT NOT NULL,
+    section_type TEXT DEFAULT 'other',
+        -- intro | facts | legal_analysis | ruling | conclusion | other
+    page_number INTEGER,
+    embedding vector(1024),
+    created_at TIMESTAMPTZ DEFAULT now()
+);
+CREATE INDEX IF NOT EXISTS idx_precedent_chunks_case_law ON precedent_chunks(case_law_id);
+CREATE INDEX IF NOT EXISTS idx_precedent_chunks_section ON precedent_chunks(case_law_id, section_type);
+CREATE INDEX IF NOT EXISTS idx_precedent_chunks_vec
+    ON precedent_chunks USING ivfflat (embedding vector_cosine_ops) WITH (lists = 50);
+
+-- halachot: extracted binding rules. One halacha = one rule + verbatim quote.
+-- Embedded separately for rule-precision semantic match (chunks centroid is
+-- dominated by surrounding context). All halachot start as pending_review;
+-- only approved/published rows are visible to search_precedent_library.
+CREATE TABLE IF NOT EXISTS halachot (
+    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+    case_law_id UUID REFERENCES case_law(id) ON DELETE CASCADE,
+    halacha_index INTEGER NOT NULL,
+    rule_statement TEXT NOT NULL,
+    rule_type TEXT DEFAULT 'binding',
+        -- binding | interpretive | procedural | obiter
+    reasoning_summary TEXT DEFAULT '',
+    supporting_quote TEXT NOT NULL,
+    page_reference TEXT DEFAULT '',
+    practice_areas TEXT[] DEFAULT '{}',
+    subject_tags TEXT[] DEFAULT '{}',
+    cites TEXT[] DEFAULT '{}',
+    confidence NUMERIC(3,2) DEFAULT 0.0,
+    quote_verified BOOLEAN DEFAULT FALSE,
+    review_status TEXT DEFAULT 'pending_review',
+        -- pending_review | approved | rejected | published
+    reviewer TEXT DEFAULT '',
+    reviewed_at TIMESTAMPTZ,
+    embedding vector(1024),
+    created_at TIMESTAMPTZ DEFAULT now(),
+    updated_at TIMESTAMPTZ DEFAULT now()
+);
+CREATE INDEX IF NOT EXISTS idx_halachot_case_law ON halachot(case_law_id);
+CREATE INDEX IF NOT EXISTS idx_halachot_status ON halachot(review_status);
+CREATE INDEX IF NOT EXISTS idx_halachot_practice ON halachot USING gin(practice_areas);
+CREATE INDEX IF NOT EXISTS idx_halachot_tags ON halachot USING gin(subject_tags);
+CREATE INDEX IF NOT EXISTS idx_halachot_vec
+    ON halachot USING ivfflat (embedding vector_cosine_ops) WITH (lists = 50);
+"""
+
+
 async def init_schema() -> None:
    pool = await get_pool()
    async with pool.acquire() as conn:
@@ -528,7 +613,8 @@ async def init_schema() -> None:
        await conn.execute(SCHEMA_V4_SQL)
        await conn.execute(SCHEMA_V5_SQL)
        await conn.execute(SCHEMA_V6_SQL)
-    logger.info("Database schema initialized (v1-v6)")
+        await conn.execute(SCHEMA_V7_SQL)
+    logger.info("Database schema initialized (v1-v7)")


 # ── Case CRUD ───────────────────────────────────────────────────────
@@ -1518,3 +1604,590 @@ async def detect_appraiser_conflicts(case_id: UUID) -> list[dict]:
            "entries": entries,
        })
    return conflicts
+
+
+# ── V7: External precedent library + halachot ─────────────────────
+
+
+def _row_to_case_law(row: asyncpg.Record) -> dict:
+    """Normalize a case_law row, parsing subject_tags JSONB to list."""
+    d = dict(row)
+    if isinstance(d.get("subject_tags"), str):
+        try:
+            d["subject_tags"] = json.loads(d["subject_tags"])
+        except (TypeError, ValueError):
+            d["subject_tags"] = []
+    if d.get("date") is not None:
+        d["date"] = d["date"].isoformat()
+    return d
+
+
+async def get_case_law(case_law_id: UUID) -> dict | None:
+    pool = await get_pool()
+    row = await pool.fetchrow(
+        "SELECT * FROM case_law WHERE id = $1", case_law_id,
+    )
+    return _row_to_case_law(row) if row else None
+
+
+async def get_case_law_by_citation(case_number: str) -> dict | None:
+    pool = await get_pool()
+    row = await pool.fetchrow(
+        "SELECT * FROM case_law WHERE case_number = $1", case_number,
+    )
+    return _row_to_case_law(row) if row else None
+
+
+async def create_external_case_law(
+    case_number: str,
+    case_name: str,
+    full_text: str,
+    court: str = "",
+    decision_date: date | None = None,
+    practice_area: str = "",
+    appeal_subtype: str = "",
+    subject_tags: list[str] | None = None,
+    summary: str = "",
+    headnote: str = "",
+    key_quote: str = "",
+    source_url: str = "",
+    source_type: str = "",
+    precedent_level: str = "",
+    is_binding: bool = True,
+    document_id: UUID | None = None,
+) -> dict:
+    """Insert a chair-uploaded external precedent into case_law.
+
+    If a row with this ``case_number`` already exists with
+    source_kind='cited_only' (auto-discovered), promote it to
+    source_kind='external_upload' and fill in the missing fields.
+    """
+    pool = await get_pool()
+    tags_json = json.dumps(subject_tags or [], ensure_ascii=False)
+    async with pool.acquire() as conn:
+        existing = await conn.fetchrow(
+            "SELECT id, source_kind FROM case_law WHERE case_number = $1",
+            case_number,
+        )
+        if existing:
+            row = await conn.fetchrow(
+                """
+                UPDATE case_law SET
+                    case_name = $2,
+                    court = COALESCE(NULLIF($3, ''), court),
+                    date = COALESCE($4, date),
+                    practice_area = $5,
+                    appeal_subtype = $6,
+                    subject_tags = $7,
+                    summary = COALESCE(NULLIF($8, ''), summary),
+                    headnote = $9,
+                    key_quote = COALESCE(NULLIF($10, ''), key_quote),
+                    full_text = $11,
+                    source_url = COALESCE(NULLIF($12, ''), source_url),
+                    source_type = $13,
+                    precedent_level = $14,
+                    is_binding = $15,
+                    document_id = COALESCE($16, document_id),
+                    source_kind = 'external_upload',
+                    extraction_status = 'processing',
+                    halacha_extraction_status = 'pending'
+                WHERE id = $1
+                RETURNING *
+                """,
+                existing["id"], case_name, court, decision_date,
+                practice_area, appeal_subtype, tags_json, summary, headnote,
+                key_quote, full_text, source_url, source_type,
+                precedent_level, is_binding, document_id,
+            )
+        else:
+            row = await conn.fetchrow(
+                """
+                INSERT INTO case_law (
+                    case_number, case_name, court, date, subject_tags,
+                    summary, key_quote, full_text, source_url,
+                    source_kind, document_id, extraction_status,
+                    halacha_extraction_status, practice_area, appeal_subtype,
+                    headnote, source_type, precedent_level, is_binding
+                ) VALUES (
+                    $1, $2, $3, $4, $5, $6, $7, $8, $9,
+                    'external_upload', $10, 'processing', 'pending',
+                    $11, $12, $13, $14, $15, $16
+                )
+                RETURNING *
+                """,
+                case_number, case_name, court, decision_date, tags_json,
+                summary, key_quote, full_text, source_url,
+                document_id, practice_area, appeal_subtype, headnote,
+                source_type, precedent_level, is_binding,
+            )
+    return _row_to_case_law(row)
+
+
+async def update_case_law(case_law_id: UUID, **fields) -> dict | None:
+    """Patch metadata fields on a case_law row.
+
+    Allowed fields: case_name, court, date, practice_area, appeal_subtype,
+    subject_tags, summary, headnote, key_quote, source_url, source_type,
+    precedent_level, is_binding.
+    """
+    allowed = {
+        "case_name", "court", "date", "practice_area", "appeal_subtype",
+        "subject_tags", "summary", "headnote", "key_quote", "source_url",
+        "source_type", "precedent_level", "is_binding",
+    }
+    updates = {k: v for k, v in fields.items() if k in allowed}
+    if not updates:
+        return await get_case_law(case_law_id)
+
+    pool = await get_pool()
+    set_parts = []
+    params: list = [case_law_id]
+    for i, (k, v) in enumerate(updates.items(), start=2):
+        if k == "subject_tags":
+            v = json.dumps(v or [], ensure_ascii=False)
+        set_parts.append(f"{k} = ${i}")
+        params.append(v)
+    sql = f"UPDATE case_law SET {', '.join(set_parts)} WHERE id = $1 RETURNING *"
+    row = await pool.fetchrow(sql, *params)
+    return _row_to_case_law(row) if row else None
+
+
+async def set_case_law_extraction_status(case_law_id: UUID, status: str) -> None:
+    pool = await get_pool()
+    await pool.execute(
+        "UPDATE case_law SET extraction_status = $2 WHERE id = $1",
+        case_law_id, status,
+    )
+
+
+async def set_case_law_halacha_status(case_law_id: UUID, status: str) -> None:
+    pool = await get_pool()
+    await pool.execute(
+        "UPDATE case_law SET halacha_extraction_status = $2 WHERE id = $1",
+        case_law_id, status,
+    )
+
+
+async def list_external_case_law(
+    practice_area: str = "",
+    court: str = "",
+    precedent_level: str = "",
+    source_type: str = "",
+    search: str = "",
+    limit: int = 100,
+    offset: int = 0,
+) -> list[dict]:
+    """List chair-uploaded precedents, with simple filters."""
+    pool = await get_pool()
+    conditions = ["source_kind = 'external_upload'"]
+    params: list = []
+    idx = 1
+    if practice_area:
+        conditions.append(f"practice_area = ${idx}")
+        params.append(practice_area)
+        idx += 1
+    if court:
+        conditions.append(f"court ILIKE ${idx}")
+        params.append(f"%{court}%")
+        idx += 1
+    if precedent_level:
+        conditions.append(f"precedent_level = ${idx}")
+        params.append(precedent_level)
+        idx += 1
+    if source_type:
+        conditions.append(f"source_type = ${idx}")
+        params.append(source_type)
+        idx += 1
+    if search:
+        conditions.append(
+            f"(case_number ILIKE ${idx} OR case_name ILIKE ${idx} "
+            f"OR summary ILIKE ${idx} OR headnote ILIKE ${idx})"
+        )
+        params.append(f"%{search}%")
+        idx += 1
+    where_sql = " AND ".join(conditions)
+    params.extend([limit, offset])
+    sql = f"""
+        SELECT id, case_number, case_name, court, date, practice_area,
+               appeal_subtype, source_type, precedent_level, is_binding,
+               summary, headnote, subject_tags, source_kind,
+               extraction_status, halacha_extraction_status,
+               created_at,
+               (SELECT COUNT(*) FROM halachot h WHERE h.case_law_id = case_law.id) AS halachot_count,
+               (SELECT COUNT(*) FROM halachot h WHERE h.case_law_id = case_law.id
+                  AND h.review_status IN ('approved', 'published')) AS approved_count
+        FROM case_law
+        WHERE {where_sql}
+        ORDER BY created_at DESC
+        LIMIT ${idx} OFFSET ${idx + 1}
+    """
+    rows = await pool.fetch(sql, *params)
+    return [_row_to_case_law(r) for r in rows]
+
+
+async def delete_case_law(case_law_id: UUID) -> bool:
+    """Delete a precedent and cascade chunks + halachot."""
+    pool = await get_pool()
+    result = await pool.execute(
+        "DELETE FROM case_law WHERE id = $1", case_law_id,
+    )
+    return result == "DELETE 1"
+
+
+async def store_precedent_chunks(
+    case_law_id: UUID, chunks: list[dict],
+) -> int:
+    """Replace precedent chunks for a case_law row.
+
+    Each chunk dict has: chunk_index, content, section_type, page_number,
+    embedding (list[float] or None).
+    """
+    pool = await get_pool()
+    async with pool.acquire() as conn:
+        await conn.execute(
+            "DELETE FROM precedent_chunks WHERE case_law_id = $1",
+            case_law_id,
+        )
+        for c in chunks:
+            await conn.execute(
+                """INSERT INTO precedent_chunks
+                   (case_law_id, chunk_index, content, section_type,
+                    page_number, embedding)
+                   VALUES ($1, $2, $3, $4, $5, $6)""",
+                case_law_id,
+                c["chunk_index"],
+                c["content"],
+                c.get("section_type", "other"),
+                c.get("page_number"),
+                c.get("embedding"),
+            )
+    return len(chunks)
+
+
+async def list_precedent_chunks(
+    case_law_id: UUID,
+    section_types: tuple[str, ...] | None = None,
+) -> list[dict]:
+    pool = await get_pool()
+    if section_types:
+        rows = await pool.fetch(
+            """SELECT id, chunk_index, content, section_type, page_number
+               FROM precedent_chunks
+               WHERE case_law_id = $1 AND section_type = ANY($2::text[])
+               ORDER BY chunk_index""",
+            case_law_id, list(section_types),
+        )
+    else:
+        rows = await pool.fetch(
+            """SELECT id, chunk_index, content, section_type, page_number
+               FROM precedent_chunks
+               WHERE case_law_id = $1
+               ORDER BY chunk_index""",
+            case_law_id,
+        )
+    return [dict(r) for r in rows]
+
+
+async def delete_halachot(case_law_id: UUID) -> int:
+    pool = await get_pool()
+    result = await pool.execute(
+        "DELETE FROM halachot WHERE case_law_id = $1", case_law_id,
+    )
+    # result is e.g. "DELETE 5" — extract the number.
+    try:
+        return int(result.split()[-1])
+    except (ValueError, IndexError):
+        return 0
+
+
+async def store_halachot(case_law_id: UUID, halachot: list[dict]) -> int:
+    """Bulk-insert extracted halachot. Always with review_status='pending_review'."""
+    if not halachot:
+        return 0
+    pool = await get_pool()
+    async with pool.acquire() as conn:
+        for i, h in enumerate(halachot):
+            await conn.execute(
+                """INSERT INTO halachot
+                   (case_law_id, halacha_index, rule_statement, rule_type,
+                    reasoning_summary, supporting_quote, page_reference,
+                    practice_areas, subject_tags, cites, confidence,
+                    quote_verified, embedding, review_status)
+                   VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11,
+                           $12, $13, 'pending_review')""",
+                case_law_id,
+                i,
+                h["rule_statement"],
+                h.get("rule_type", "binding"),
+                h.get("reasoning_summary", ""),
+                h["supporting_quote"],
+                h.get("page_reference", ""),
+                h.get("practice_areas", []),
+                h.get("subject_tags", []),
+                h.get("cites", []),
+                h.get("confidence", 0.0),
+                h.get("quote_verified", False),
+                h.get("embedding"),
+            )
+    return len(halachot)
+
+
+async def list_halachot(
+    case_law_id: UUID | None = None,
+    review_status: str | None = None,
+    practice_area: str | None = None,
+    limit: int = 200,
+    offset: int = 0,
+) -> list[dict]:
+    pool = await get_pool()
+    conditions = []
+    params: list = []
+    idx = 1
+    if case_law_id is not None:
+        conditions.append(f"h.case_law_id = ${idx}")
+        params.append(case_law_id)
+        idx += 1
+    if review_status:
+        conditions.append(f"h.review_status = ${idx}")
+        params.append(review_status)
+        idx += 1
+    if practice_area:
+        conditions.append(f"${idx} = ANY(h.practice_areas)")
+        params.append(practice_area)
+        idx += 1
+    where_sql = f"WHERE {' AND '.join(conditions)}" if conditions else ""
+    params.extend([limit, offset])
+    sql = f"""
+        SELECT h.id, h.case_law_id, h.halacha_index, h.rule_statement,
+               h.rule_type, h.reasoning_summary, h.supporting_quote,
+               h.page_reference, h.practice_areas, h.subject_tags,
+               h.cites, h.confidence, h.quote_verified, h.review_status,
+               h.reviewer, h.reviewed_at, h.created_at, h.updated_at,
+               cl.case_number, cl.case_name, cl.court, cl.date AS decision_date,
+               cl.precedent_level
+        FROM halachot h
+        LEFT JOIN case_law cl ON cl.id = h.case_law_id
+        {where_sql}
+        ORDER BY h.case_law_id, h.halacha_index
+        LIMIT ${idx} OFFSET ${idx + 1}
+    """
+    rows = await pool.fetch(sql, *params)
+    out = []
+    for r in rows:
+        d = dict(r)
+        if d.get("decision_date") is not None:
+            d["decision_date"] = d["decision_date"].isoformat()
+        out.append(d)
+    return out
+
+
+async def update_halacha(
+    halacha_id: UUID,
+    review_status: str | None = None,
+    reviewer: str = "",
+    rule_statement: str | None = None,
+    reasoning_summary: str | None = None,
+    subject_tags: list[str] | None = None,
+    practice_areas: list[str] | None = None,
+) -> dict | None:
+    """Update a halacha — used by the chair to approve/reject/edit."""
+    pool = await get_pool()
+    set_parts: list[str] = []
+    params: list = [halacha_id]
+    idx = 2
+    if review_status is not None:
+        set_parts.append(f"review_status = ${idx}")
+        params.append(review_status)
+        idx += 1
+        if review_status in ("approved", "rejected", "published"):
+            set_parts.append(f"reviewed_at = now()")
+            set_parts.append(f"reviewer = ${idx}")
+            params.append(reviewer)
+            idx += 1
+    if rule_statement is not None:
+        set_parts.append(f"rule_statement = ${idx}")
+        params.append(rule_statement)
+        idx += 1
+    if reasoning_summary is not None:
+        set_parts.append(f"reasoning_summary = ${idx}")
+        params.append(reasoning_summary)
+        idx += 1
+    if subject_tags is not None:
+        set_parts.append(f"subject_tags = ${idx}")
+        params.append(subject_tags)
+        idx += 1
+    if practice_areas is not None:
+        set_parts.append(f"practice_areas = ${idx}")
+        params.append(practice_areas)
+        idx += 1
+    if not set_parts:
+        return None
+    set_parts.append("updated_at = now()")
+    sql = f"UPDATE halachot SET {', '.join(set_parts)} WHERE id = $1 RETURNING *"
+    row = await pool.fetchrow(sql, *params)
+    return dict(row) if row else None
+
+
+async def search_precedent_library_semantic(
+    query_embedding: list[float],
+    practice_area: str = "",
+    court: str = "",
+    precedent_level: str = "",
+    appeal_subtype: str = "",
+    is_binding: bool | None = None,
+    subject_tag: str = "",
+    limit: int = 10,
+    include_halachot: bool = True,
+) -> list[dict]:
+    """Semantic search over chair-uploaded precedents.
+
+    Returns merged halachot + chunks. Halachot are pre-distilled rules, so
+    they get a small score boost. Only ``approved`` / ``published`` halachot
+    are visible (per chair-review policy). Chunks are visible regardless
+    of halacha review status.
+    """
+    pool = await get_pool()
+    halacha_filters = ["h.review_status IN ('approved', 'published')"]
+    chunk_filters = ["cl.source_kind = 'external_upload'"]
+    h_params: list = [query_embedding, limit]
+    c_params: list = [query_embedding, limit]
+    h_idx = 3
+    c_idx = 3
+
+    if practice_area:
+        halacha_filters.append(f"${h_idx} = ANY(h.practice_areas)")
+        h_params.append(practice_area)
+        h_idx += 1
+        chunk_filters.append(f"cl.practice_area = ${c_idx}")
+        c_params.append(practice_area)
+        c_idx += 1
+    if court:
+        halacha_filters.append(f"cl.court ILIKE ${h_idx}")
+        h_params.append(f"%{court}%")
+        h_idx += 1
+        chunk_filters.append(f"cl.court ILIKE ${c_idx}")
+        c_params.append(f"%{court}%")
+        c_idx += 1
+    if precedent_level:
+        halacha_filters.append(f"cl.precedent_level = ${h_idx}")
+        h_params.append(precedent_level)
+        h_idx += 1
+        chunk_filters.append(f"cl.precedent_level = ${c_idx}")
+        c_params.append(precedent_level)
+        c_idx += 1
+    if appeal_subtype:
+        halacha_filters.append(f"cl.appeal_subtype = ${h_idx}")
+        h_params.append(appeal_subtype)
+        h_idx += 1
+        chunk_filters.append(f"cl.appeal_subtype = ${c_idx}")
+        c_params.append(appeal_subtype)
+        c_idx += 1
+    if is_binding is not None:
+        halacha_filters.append(f"cl.is_binding = ${h_idx}")
+        h_params.append(is_binding)
+        h_idx += 1
+        chunk_filters.append(f"cl.is_binding = ${c_idx}")
+        c_params.append(is_binding)
+        c_idx += 1
+    if subject_tag:
+        halacha_filters.append(f"${h_idx} = ANY(h.subject_tags)")
+        h_params.append(subject_tag)
+        h_idx += 1
+
+    halacha_sql = f"""
+        SELECT h.id AS halacha_id, h.case_law_id, h.rule_statement,
+               h.reasoning_summary, h.supporting_quote, h.page_reference,
+               h.practice_areas, h.subject_tags, h.confidence, h.rule_type,
+               cl.case_number, cl.case_name, cl.court, cl.date AS decision_date,
+               cl.precedent_level,
+               1 - (h.embedding <=> $1) AS score
+        FROM halachot h
+        JOIN case_law cl ON cl.id = h.case_law_id
+        WHERE {' AND '.join(halacha_filters)}
+          AND h.embedding IS NOT NULL
+        ORDER BY h.embedding <=> $1
+        LIMIT $2
+    """
+
+    chunk_sql = f"""
+        SELECT pc.id AS chunk_id, pc.case_law_id, pc.content,
+               pc.section_type, pc.page_number,
+               cl.case_number, cl.case_name, cl.court, cl.date AS decision_date,
+               cl.precedent_level, cl.practice_area,
+               1 - (pc.embedding <=> $1) AS score
+        FROM precedent_chunks pc
+        JOIN case_law cl ON cl.id = pc.case_law_id
+        WHERE {' AND '.join(chunk_filters)}
+          AND pc.embedding IS NOT NULL
+        ORDER BY pc.embedding <=> $1
+        LIMIT $2
+    """
+
+    results: list[dict] = []
+    if include_halachot:
+        rows = await pool.fetch(halacha_sql, *h_params)
+        for r in rows:
+            d = dict(r)
+            if d.get("decision_date") is not None:
+                d["decision_date"] = d["decision_date"].isoformat()
+            d["score"] = float(d["score"]) + 0.05  # rule-level boost
+            d["type"] = "halacha"
+            results.append(d)
+
+    rows = await pool.fetch(chunk_sql, *c_params)
+    for r in rows:
+        d = dict(r)
+        if d.get("decision_date") is not None:
+            d["decision_date"] = d["decision_date"].isoformat()
+        d["score"] = float(d["score"])
+        d["type"] = "passage"
+        results.append(d)
+
+    results.sort(key=lambda x: x["score"], reverse=True)
+    return results[:limit]
+
+
+async def precedent_library_stats() -> dict:
+    """Aggregate stats for the /precedents stats tab."""
+    pool = await get_pool()
+    async with pool.acquire() as conn:
+        total = await conn.fetchval(
+            "SELECT COUNT(*) FROM case_law WHERE source_kind = 'external_upload'"
+        )
+        by_practice = await conn.fetch(
+            """SELECT practice_area, COUNT(*) AS n
+               FROM case_law
+               WHERE source_kind = 'external_upload'
+               GROUP BY practice_area
+               ORDER BY n DESC"""
+        )
+        by_level = await conn.fetch(
+            """SELECT precedent_level, COUNT(*) AS n
+               FROM case_law
+               WHERE source_kind = 'external_upload'
+               GROUP BY precedent_level
+               ORDER BY n DESC"""
+        )
+        halachot_total = await conn.fetchval(
+            "SELECT COUNT(*) FROM halachot"
+        )
+        halachot_pending = await conn.fetchval(
+            "SELECT COUNT(*) FROM halachot WHERE review_status = 'pending_review'"
+        )
+        halachot_approved = await conn.fetchval(
+            "SELECT COUNT(*) FROM halachot WHERE review_status IN ('approved', 'published')"
+        )
+    return {
+        "precedents_total": int(total or 0),
+        "by_practice_area": [
+            {"practice_area": r["practice_area"], "count": int(r["n"])}
+            for r in by_practice
+        ],
+        "by_precedent_level": [
+            {"precedent_level": r["precedent_level"], "count": int(r["n"])}
+            for r in by_level
+        ],
+        "halachot_total": int(halachot_total or 0),
+        "halachot_pending": int(halachot_pending or 0),
+        "halachot_approved": int(halachot_approved or 0),
+    }