feat: external precedent library with auto halacha extraction

Adds a third corpus of legal authority distinct from style_corpus (Daphna's prior decisions for voice) and case_precedents (chair-attached quotes per case). The new corpus holds chair-uploaded court rulings and other appeals committee decisions, with binding rules (הלכות) extracted automatically and queued for chair approval. Pipeline (web/app.py + services/precedent_library.py): file → extract → chunk → Voyage embed → halacha_extractor → store + publish progress over the existing Redis SSE channel. Schema V7 (services/db.py): extends case_law with source_kind + extraction status fields under a CHECK constraint pinning practice_area to the three appeals committee domains (rishuy_uvniya, betterment_levy, compensation_197). New precedent_chunks (vector(1024)) and halachot tables (vector(1024) over rule_statement, IVFFlat indexes, gin on practice_areas/subject_tags). Halachot start as pending_review; only approved/published rows are visible to search_precedent_library. Agents: legal-writer, legal-researcher, legal-analyst, legal-ceo, legal-qa get search_precedent_library. legal-writer prompt explains the three-corpus distinction and CREAC use; legal-qa now verifies that every cited halacha resolves to an approved row in the corpus. UI: /precedents page with four tabs — library / semantic search / pending review (J/K nav, A/R/E shortcuts, badge count) / stats. Reuses the existing upload-sheet progress + SSE pattern. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-03 08:38:18 +00:00
parent a6edb75bbf
commit 7ee90dce31
23 changed files with 3853 additions and 67 deletions
--- a/mcp-server/src/legal_mcp/server.py
+++ b/mcp-server/src/legal_mcp/server.py
@@ -47,6 +47,7 @@ mcp = FastMCP(

 from legal_mcp.tools import (  # noqa: E402
    cases, documents, search, drafting, workflow, precedents,
+    precedent_library as plib,
 )


@@ -142,10 +143,114 @@ async def precedent_remove(precedent_id: str) -> str:
 async def precedent_search_library(
    query: str, practice_area: str = "", limit: int = 10,
 ) -> str:
-    """חיפוש בספרייה הרוחבית של ציטוטים שנצברו בין תיקים."""
+    """חיפוש בציטוטים שדפנה צירפה ידנית לתיקים בעבר (case_precedents).
+    שונה מ-search_precedent_library שמחפש בקורפוס הפסיקה הסמכותית."""
    return await precedents.precedent_search_library(query, practice_area, limit)


+# ── External Precedent Library — authoritative case-law corpus ─────
+# Distinct from precedent_search_library above (chair-attached quotes)
+# and from search_decisions (Daphna's style corpus).
+
+
+@mcp.tool()
+async def precedent_library_upload(
+    file_path: str,
+    citation: str,
+    case_name: str = "",
+    court: str = "",
+    decision_date: str = "",
+    source_type: str = "",
+    precedent_level: str = "",
+    practice_area: str = "",
+    appeal_subtype: str = "",
+    subject_tags: list[str] | None = None,
+    is_binding: bool = True,
+    headnote: str = "",
+    summary: str = "",
+) -> str:
+    """העלאת פסיקה חיצונית (פס"ד / החלטה של ועדה אחרת) לקורפוס הסמכותי. מחלץ הלכות אוטומטית — כולן ממתינות לאישור היו"ר. practice_area: rishuy_uvniya / betterment_levy / compensation_197."""
+    return await plib.precedent_library_upload(
+        file_path, citation, case_name, court, decision_date,
+        source_type, precedent_level, practice_area, appeal_subtype,
+        subject_tags, is_binding, headnote, summary,
+    )
+
+
+@mcp.tool()
+async def precedent_library_list(
+    practice_area: str = "",
+    court: str = "",
+    precedent_level: str = "",
+    source_type: str = "",
+    search: str = "",
+    limit: int = 100,
+) -> str:
+    """רשימת הפסיקה בקורפוס הסמכותי, עם פילטרים."""
+    return await plib.precedent_library_list(
+        practice_area, court, precedent_level, source_type, search, limit,
+    )
+
+
+@mcp.tool()
+async def precedent_library_get(case_law_id: str) -> str:
+    """פסיקה ספציפית בקורפוס + רשימת ההלכות שחולצו ממנה (כולל ממתינות לאישור)."""
+    return await plib.precedent_library_get(case_law_id)
+
+
+@mcp.tool()
+async def precedent_library_delete(case_law_id: str) -> str:
+    """מחיקת פסיקה מהקורפוס (cascade: chunks + halachot)."""
+    return await plib.precedent_library_delete(case_law_id)
+
+
+@mcp.tool()
+async def precedent_extract_halachot(case_law_id: str) -> str:
+    """הרצה מחדש של חילוץ הלכות לפסיקה קיימת. ההלכות הקיימות נמחקות, החדשות חוזרות לסטטוס pending_review."""
+    return await plib.precedent_extract_halachot(case_law_id)
+
+
+@mcp.tool()
+async def search_precedent_library(
+    query: str,
+    practice_area: str = "",
+    court: str = "",
+    precedent_level: str = "",
+    appeal_subtype: str = "",
+    subject_tag: str = "",
+    limit: int = 10,
+    include_halachot: bool = True,
+) -> str:
+    """חיפוש סמנטי בקורפוס הפסיקה הסמכותית. מחזיר הלכות (מאושרות בלבד) + קטעי טקסט. השתמש כש-legal-writer צריך לצטט פסיקה מחייבת בבלוק י (CREAC: rule + explanation)."""
+    return await plib.search_precedent_library(
+        query, practice_area, court, precedent_level, appeal_subtype,
+        None, subject_tag, limit, include_halachot,
+    )
+
+
+@mcp.tool()
+async def halacha_review(
+    halacha_id: str,
+    status: str,
+    reviewer: str = "דפנה",
+    rule_statement: str = "",
+    reasoning_summary: str = "",
+    subject_tags: list[str] | None = None,
+    practice_areas: list[str] | None = None,
+) -> str:
+    """אישור / דחייה / עריכה של הלכה שחולצה אוטומטית. status: pending_review / approved / rejected / published."""
+    return await plib.halacha_review(
+        halacha_id, status, reviewer, rule_statement, reasoning_summary,
+        subject_tags, practice_areas,
+    )
+
+
+@mcp.tool()
+async def halachot_pending(limit: int = 100) -> str:
+    """תור ההלכות הממתינות לאישור."""
+    return await plib.halachot_pending(limit)
+
+
 # Documents
@mcp.tool()
 async def document_upload(
--- a/mcp-server/src/legal_mcp/services/chunker.py
+++ b/mcp-server/src/legal_mcp/services/chunker.py
@@ -7,14 +7,16 @@ from dataclasses import dataclass, field

 from legal_mcp import config

-# Hebrew legal section headers
+# Hebrew legal section headers.
+# Covers both appeals committee decisions and external court rulings —
+# court rulings use slightly different vocabulary (פסק דין, נימוקים, סוף דבר).
 SECTION_PATTERNS = [
    (r"רקע\s*עובדתי|רקע\s*כללי|העובדות|הרקע", "facts"),
    (r"טענות\s*העוררי[םן]|טענות\s*המערערי[םן]|עיקר\s*טענות\s*העוררי[םן]", "appellant_claims"),
    (r"טענות\s*המשיבי[םן]|תשובת\s*המשיבי[םן]|עיקר\s*טענות\s*המשיבי[םן]", "respondent_claims"),
-    (r"דיון\s*והכרעה|דיון|הכרעה|ניתוח\s*משפטי|המסגרת\s*המשפטית", "legal_analysis"),
-    (r"מסקנ[הות]|סיכום", "conclusion"),
-    (r"החלטה|לפיכך\s*אני\s*מחליט|התוצאה", "ruling"),
+    (r"דיון\s*והכרעה|דיון|הכרעה|ניתוח\s*משפטי|המסגרת\s*המשפטית|נימוקים", "legal_analysis"),
+    (r"מסקנ[הות]|סיכום|סוף\s*דבר", "conclusion"),
+    (r"פסק[- ]?דין|החלטה|לפיכך\s*אני\s*מחליט|התוצאה", "ruling"),
    (r"מבוא|פתיחה|לפניי", "intro"),
 ]

--- a/mcp-server/src/legal_mcp/services/db.py
+++ b/mcp-server/src/legal_mcp/services/db.py
@@ -518,6 +518,91 @@ CREATE INDEX IF NOT EXISTS idx_cases_archived ON cases(archived_at) WHERE archiv
 """


+# ── V7: External Precedent Library + halacha extraction ──────────
+# Chair-uploaded external court rulings and other appeals committee decisions
+# become an authoritative law corpus. Distinct from style_corpus (Daphna's
+# style) and case_precedents (chair-attached quotes scoped to a single case).
+
+SCHEMA_V7_SQL = """
+-- case_law extensions: distinguish chair-uploaded full rulings from
+-- auto-extracted citation stubs, and track ingestion progress.
+ALTER TABLE case_law ADD COLUMN IF NOT EXISTS source_kind TEXT DEFAULT 'cited_only';
+    -- 'external_upload' (chair uploaded full ruling) | 'cited_only' (stub from
+    -- references_extractor) | 'nevo_seed' (future: auto-fetched from Nevo).
+ALTER TABLE case_law ADD COLUMN IF NOT EXISTS document_id UUID REFERENCES documents(id) ON DELETE SET NULL;
+ALTER TABLE case_law ADD COLUMN IF NOT EXISTS extraction_status TEXT DEFAULT 'pending';
+    -- 'pending' | 'processing' | 'completed' | 'failed'
+ALTER TABLE case_law ADD COLUMN IF NOT EXISTS halacha_extraction_status TEXT DEFAULT 'pending';
+ALTER TABLE case_law ADD COLUMN IF NOT EXISTS practice_area TEXT DEFAULT '';
+ALTER TABLE case_law ADD COLUMN IF NOT EXISTS appeal_subtype TEXT DEFAULT '';
+ALTER TABLE case_law ADD COLUMN IF NOT EXISTS headnote TEXT DEFAULT '';
+    -- chair-editable abstract shown in search results.
+ALTER TABLE case_law ADD COLUMN IF NOT EXISTS source_type TEXT DEFAULT '';
+    -- 'court_ruling' | 'appeals_committee'
+
+-- practice_area is closed to the three appeals committee domains.
+DO $$ BEGIN
+    ALTER TABLE case_law ADD CONSTRAINT case_law_practice_area_check
+        CHECK (practice_area IN ('', 'rishuy_uvniya', 'betterment_levy', 'compensation_197'));
+EXCEPTION WHEN duplicate_object THEN NULL; END $$;
+
+CREATE INDEX IF NOT EXISTS idx_case_law_source_kind ON case_law(source_kind);
+CREATE INDEX IF NOT EXISTS idx_case_law_practice ON case_law(practice_area, appeal_subtype);
+
+-- precedent_chunks: full-text chunks of an uploaded ruling, with embeddings.
+-- Analog of document_chunks for case_law rows where source_kind='external_upload'.
+CREATE TABLE IF NOT EXISTS precedent_chunks (
+    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+    case_law_id UUID REFERENCES case_law(id) ON DELETE CASCADE,
+    chunk_index INTEGER NOT NULL,
+    content TEXT NOT NULL,
+    section_type TEXT DEFAULT 'other',
+        -- intro | facts | legal_analysis | ruling | conclusion | other
+    page_number INTEGER,
+    embedding vector(1024),
+    created_at TIMESTAMPTZ DEFAULT now()
+);
+CREATE INDEX IF NOT EXISTS idx_precedent_chunks_case_law ON precedent_chunks(case_law_id);
+CREATE INDEX IF NOT EXISTS idx_precedent_chunks_section ON precedent_chunks(case_law_id, section_type);
+CREATE INDEX IF NOT EXISTS idx_precedent_chunks_vec
+    ON precedent_chunks USING ivfflat (embedding vector_cosine_ops) WITH (lists = 50);
+
+-- halachot: extracted binding rules. One halacha = one rule + verbatim quote.
+-- Embedded separately for rule-precision semantic match (chunks centroid is
+-- dominated by surrounding context). All halachot start as pending_review;
+-- only approved/published rows are visible to search_precedent_library.
+CREATE TABLE IF NOT EXISTS halachot (
+    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+    case_law_id UUID REFERENCES case_law(id) ON DELETE CASCADE,
+    halacha_index INTEGER NOT NULL,
+    rule_statement TEXT NOT NULL,
+    rule_type TEXT DEFAULT 'binding',
+        -- binding | interpretive | procedural | obiter
+    reasoning_summary TEXT DEFAULT '',
+    supporting_quote TEXT NOT NULL,
+    page_reference TEXT DEFAULT '',
+    practice_areas TEXT[] DEFAULT '{}',
+    subject_tags TEXT[] DEFAULT '{}',
+    cites TEXT[] DEFAULT '{}',
+    confidence NUMERIC(3,2) DEFAULT 0.0,
+    quote_verified BOOLEAN DEFAULT FALSE,
+    review_status TEXT DEFAULT 'pending_review',
+        -- pending_review | approved | rejected | published
+    reviewer TEXT DEFAULT '',
+    reviewed_at TIMESTAMPTZ,
+    embedding vector(1024),
+    created_at TIMESTAMPTZ DEFAULT now(),
+    updated_at TIMESTAMPTZ DEFAULT now()
+);
+CREATE INDEX IF NOT EXISTS idx_halachot_case_law ON halachot(case_law_id);
+CREATE INDEX IF NOT EXISTS idx_halachot_status ON halachot(review_status);
+CREATE INDEX IF NOT EXISTS idx_halachot_practice ON halachot USING gin(practice_areas);
+CREATE INDEX IF NOT EXISTS idx_halachot_tags ON halachot USING gin(subject_tags);
+CREATE INDEX IF NOT EXISTS idx_halachot_vec
+    ON halachot USING ivfflat (embedding vector_cosine_ops) WITH (lists = 50);
+"""
+
+
 async def init_schema() -> None:
    pool = await get_pool()
    async with pool.acquire() as conn:
@@ -528,7 +613,8 @@ async def init_schema() -> None:
        await conn.execute(SCHEMA_V4_SQL)
        await conn.execute(SCHEMA_V5_SQL)
        await conn.execute(SCHEMA_V6_SQL)
-    logger.info("Database schema initialized (v1-v6)")
+        await conn.execute(SCHEMA_V7_SQL)
+    logger.info("Database schema initialized (v1-v7)")


 # ── Case CRUD ───────────────────────────────────────────────────────
@@ -1518,3 +1604,590 @@ async def detect_appraiser_conflicts(case_id: UUID) -> list[dict]:
            "entries": entries,
        })
    return conflicts
+
+
+# ── V7: External precedent library + halachot ─────────────────────
+
+
+def _row_to_case_law(row: asyncpg.Record) -> dict:
+    """Normalize a case_law row, parsing subject_tags JSONB to list."""
+    d = dict(row)
+    if isinstance(d.get("subject_tags"), str):
+        try:
+            d["subject_tags"] = json.loads(d["subject_tags"])
+        except (TypeError, ValueError):
+            d["subject_tags"] = []
+    if d.get("date") is not None:
+        d["date"] = d["date"].isoformat()
+    return d
+
+
+async def get_case_law(case_law_id: UUID) -> dict | None:
+    pool = await get_pool()
+    row = await pool.fetchrow(
+        "SELECT * FROM case_law WHERE id = $1", case_law_id,
+    )
+    return _row_to_case_law(row) if row else None
+
+
+async def get_case_law_by_citation(case_number: str) -> dict | None:
+    pool = await get_pool()
+    row = await pool.fetchrow(
+        "SELECT * FROM case_law WHERE case_number = $1", case_number,
+    )
+    return _row_to_case_law(row) if row else None
+
+
+async def create_external_case_law(
+    case_number: str,
+    case_name: str,
+    full_text: str,
+    court: str = "",
+    decision_date: date | None = None,
+    practice_area: str = "",
+    appeal_subtype: str = "",
+    subject_tags: list[str] | None = None,
+    summary: str = "",
+    headnote: str = "",
+    key_quote: str = "",
+    source_url: str = "",
+    source_type: str = "",
+    precedent_level: str = "",
+    is_binding: bool = True,
+    document_id: UUID | None = None,
+) -> dict:
+    """Insert a chair-uploaded external precedent into case_law.
+
+    If a row with this ``case_number`` already exists with
+    source_kind='cited_only' (auto-discovered), promote it to
+    source_kind='external_upload' and fill in the missing fields.
+    """
+    pool = await get_pool()
+    tags_json = json.dumps(subject_tags or [], ensure_ascii=False)
+    async with pool.acquire() as conn:
+        existing = await conn.fetchrow(
+            "SELECT id, source_kind FROM case_law WHERE case_number = $1",
+            case_number,
+        )
+        if existing:
+            row = await conn.fetchrow(
+                """
+                UPDATE case_law SET
+                    case_name = $2,
+                    court = COALESCE(NULLIF($3, ''), court),
+                    date = COALESCE($4, date),
+                    practice_area = $5,
+                    appeal_subtype = $6,
+                    subject_tags = $7,
+                    summary = COALESCE(NULLIF($8, ''), summary),
+                    headnote = $9,
+                    key_quote = COALESCE(NULLIF($10, ''), key_quote),
+                    full_text = $11,
+                    source_url = COALESCE(NULLIF($12, ''), source_url),
+                    source_type = $13,
+                    precedent_level = $14,
+                    is_binding = $15,
+                    document_id = COALESCE($16, document_id),
+                    source_kind = 'external_upload',
+                    extraction_status = 'processing',
+                    halacha_extraction_status = 'pending'
+                WHERE id = $1
+                RETURNING *
+                """,
+                existing["id"], case_name, court, decision_date,
+                practice_area, appeal_subtype, tags_json, summary, headnote,
+                key_quote, full_text, source_url, source_type,
+                precedent_level, is_binding, document_id,
+            )
+        else:
+            row = await conn.fetchrow(
+                """
+                INSERT INTO case_law (
+                    case_number, case_name, court, date, subject_tags,
+                    summary, key_quote, full_text, source_url,
+                    source_kind, document_id, extraction_status,
+                    halacha_extraction_status, practice_area, appeal_subtype,
+                    headnote, source_type, precedent_level, is_binding
+                ) VALUES (
+                    $1, $2, $3, $4, $5, $6, $7, $8, $9,
+                    'external_upload', $10, 'processing', 'pending',
+                    $11, $12, $13, $14, $15, $16
+                )
+                RETURNING *
+                """,
+                case_number, case_name, court, decision_date, tags_json,
+                summary, key_quote, full_text, source_url,
+                document_id, practice_area, appeal_subtype, headnote,
+                source_type, precedent_level, is_binding,
+            )
+    return _row_to_case_law(row)
+
+
+async def update_case_law(case_law_id: UUID, **fields) -> dict | None:
+    """Patch metadata fields on a case_law row.
+
+    Allowed fields: case_name, court, date, practice_area, appeal_subtype,
+    subject_tags, summary, headnote, key_quote, source_url, source_type,
+    precedent_level, is_binding.
+    """
+    allowed = {
+        "case_name", "court", "date", "practice_area", "appeal_subtype",
+        "subject_tags", "summary", "headnote", "key_quote", "source_url",
+        "source_type", "precedent_level", "is_binding",
+    }
+    updates = {k: v for k, v in fields.items() if k in allowed}
+    if not updates:
+        return await get_case_law(case_law_id)
+
+    pool = await get_pool()
+    set_parts = []
+    params: list = [case_law_id]
+    for i, (k, v) in enumerate(updates.items(), start=2):
+        if k == "subject_tags":
+            v = json.dumps(v or [], ensure_ascii=False)
+        set_parts.append(f"{k} = ${i}")
+        params.append(v)
+    sql = f"UPDATE case_law SET {', '.join(set_parts)} WHERE id = $1 RETURNING *"
+    row = await pool.fetchrow(sql, *params)
+    return _row_to_case_law(row) if row else None
+
+
+async def set_case_law_extraction_status(case_law_id: UUID, status: str) -> None:
+    pool = await get_pool()
+    await pool.execute(
+        "UPDATE case_law SET extraction_status = $2 WHERE id = $1",
+        case_law_id, status,
+    )
+
+
+async def set_case_law_halacha_status(case_law_id: UUID, status: str) -> None:
+    pool = await get_pool()
+    await pool.execute(
+        "UPDATE case_law SET halacha_extraction_status = $2 WHERE id = $1",
+        case_law_id, status,
+    )
+
+
+async def list_external_case_law(
+    practice_area: str = "",
+    court: str = "",
+    precedent_level: str = "",
+    source_type: str = "",
+    search: str = "",
+    limit: int = 100,
+    offset: int = 0,
+) -> list[dict]:
+    """List chair-uploaded precedents, with simple filters."""
+    pool = await get_pool()
+    conditions = ["source_kind = 'external_upload'"]
+    params: list = []
+    idx = 1
+    if practice_area:
+        conditions.append(f"practice_area = ${idx}")
+        params.append(practice_area)
+        idx += 1
+    if court:
+        conditions.append(f"court ILIKE ${idx}")
+        params.append(f"%{court}%")
+        idx += 1
+    if precedent_level:
+        conditions.append(f"precedent_level = ${idx}")
+        params.append(precedent_level)
+        idx += 1
+    if source_type:
+        conditions.append(f"source_type = ${idx}")
+        params.append(source_type)
+        idx += 1
+    if search:
+        conditions.append(
+            f"(case_number ILIKE ${idx} OR case_name ILIKE ${idx} "
+            f"OR summary ILIKE ${idx} OR headnote ILIKE ${idx})"
+        )
+        params.append(f"%{search}%")
+        idx += 1
+    where_sql = " AND ".join(conditions)
+    params.extend([limit, offset])
+    sql = f"""
+        SELECT id, case_number, case_name, court, date, practice_area,
+               appeal_subtype, source_type, precedent_level, is_binding,
+               summary, headnote, subject_tags, source_kind,
+               extraction_status, halacha_extraction_status,
+               created_at,
+               (SELECT COUNT(*) FROM halachot h WHERE h.case_law_id = case_law.id) AS halachot_count,
+               (SELECT COUNT(*) FROM halachot h WHERE h.case_law_id = case_law.id
+                  AND h.review_status IN ('approved', 'published')) AS approved_count
+        FROM case_law
+        WHERE {where_sql}
+        ORDER BY created_at DESC
+        LIMIT ${idx} OFFSET ${idx + 1}
+    """
+    rows = await pool.fetch(sql, *params)
+    return [_row_to_case_law(r) for r in rows]
+
+
+async def delete_case_law(case_law_id: UUID) -> bool:
+    """Delete a precedent and cascade chunks + halachot."""
+    pool = await get_pool()
+    result = await pool.execute(
+        "DELETE FROM case_law WHERE id = $1", case_law_id,
+    )
+    return result == "DELETE 1"
+
+
+async def store_precedent_chunks(
+    case_law_id: UUID, chunks: list[dict],
+) -> int:
+    """Replace precedent chunks for a case_law row.
+
+    Each chunk dict has: chunk_index, content, section_type, page_number,
+    embedding (list[float] or None).
+    """
+    pool = await get_pool()
+    async with pool.acquire() as conn:
+        await conn.execute(
+            "DELETE FROM precedent_chunks WHERE case_law_id = $1",
+            case_law_id,
+        )
+        for c in chunks:
+            await conn.execute(
+                """INSERT INTO precedent_chunks
+                   (case_law_id, chunk_index, content, section_type,
+                    page_number, embedding)
+                   VALUES ($1, $2, $3, $4, $5, $6)""",
+                case_law_id,
+                c["chunk_index"],
+                c["content"],
+                c.get("section_type", "other"),
+                c.get("page_number"),
+                c.get("embedding"),
+            )
+    return len(chunks)
+
+
+async def list_precedent_chunks(
+    case_law_id: UUID,
+    section_types: tuple[str, ...] | None = None,
+) -> list[dict]:
+    pool = await get_pool()
+    if section_types:
+        rows = await pool.fetch(
+            """SELECT id, chunk_index, content, section_type, page_number
+               FROM precedent_chunks
+               WHERE case_law_id = $1 AND section_type = ANY($2::text[])
+               ORDER BY chunk_index""",
+            case_law_id, list(section_types),
+        )
+    else:
+        rows = await pool.fetch(
+            """SELECT id, chunk_index, content, section_type, page_number
+               FROM precedent_chunks
+               WHERE case_law_id = $1
+               ORDER BY chunk_index""",
+            case_law_id,
+        )
+    return [dict(r) for r in rows]
+
+
+async def delete_halachot(case_law_id: UUID) -> int:
+    pool = await get_pool()
+    result = await pool.execute(
+        "DELETE FROM halachot WHERE case_law_id = $1", case_law_id,
+    )
+    # result is e.g. "DELETE 5" — extract the number.
+    try:
+        return int(result.split()[-1])
+    except (ValueError, IndexError):
+        return 0
+
+
+async def store_halachot(case_law_id: UUID, halachot: list[dict]) -> int:
+    """Bulk-insert extracted halachot. Always with review_status='pending_review'."""
+    if not halachot:
+        return 0
+    pool = await get_pool()
+    async with pool.acquire() as conn:
+        for i, h in enumerate(halachot):
+            await conn.execute(
+                """INSERT INTO halachot
+                   (case_law_id, halacha_index, rule_statement, rule_type,
+                    reasoning_summary, supporting_quote, page_reference,
+                    practice_areas, subject_tags, cites, confidence,
+                    quote_verified, embedding, review_status)
+                   VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11,
+                           $12, $13, 'pending_review')""",
+                case_law_id,
+                i,
+                h["rule_statement"],
+                h.get("rule_type", "binding"),
+                h.get("reasoning_summary", ""),
+                h["supporting_quote"],
+                h.get("page_reference", ""),
+                h.get("practice_areas", []),
+                h.get("subject_tags", []),
+                h.get("cites", []),
+                h.get("confidence", 0.0),
+                h.get("quote_verified", False),
+                h.get("embedding"),
+            )
+    return len(halachot)
+
+
+async def list_halachot(
+    case_law_id: UUID | None = None,
+    review_status: str | None = None,
+    practice_area: str | None = None,
+    limit: int = 200,
+    offset: int = 0,
+) -> list[dict]:
+    pool = await get_pool()
+    conditions = []
+    params: list = []
+    idx = 1
+    if case_law_id is not None:
+        conditions.append(f"h.case_law_id = ${idx}")
+        params.append(case_law_id)
+        idx += 1
+    if review_status:
+        conditions.append(f"h.review_status = ${idx}")
+        params.append(review_status)
+        idx += 1
+    if practice_area:
+        conditions.append(f"${idx} = ANY(h.practice_areas)")
+        params.append(practice_area)
+        idx += 1
+    where_sql = f"WHERE {' AND '.join(conditions)}" if conditions else ""
+    params.extend([limit, offset])
+    sql = f"""
+        SELECT h.id, h.case_law_id, h.halacha_index, h.rule_statement,
+               h.rule_type, h.reasoning_summary, h.supporting_quote,
+               h.page_reference, h.practice_areas, h.subject_tags,
+               h.cites, h.confidence, h.quote_verified, h.review_status,
+               h.reviewer, h.reviewed_at, h.created_at, h.updated_at,
+               cl.case_number, cl.case_name, cl.court, cl.date AS decision_date,
+               cl.precedent_level
+        FROM halachot h
+        LEFT JOIN case_law cl ON cl.id = h.case_law_id
+        {where_sql}
+        ORDER BY h.case_law_id, h.halacha_index
+        LIMIT ${idx} OFFSET ${idx + 1}
+    """
+    rows = await pool.fetch(sql, *params)
+    out = []
+    for r in rows:
+        d = dict(r)
+        if d.get("decision_date") is not None:
+            d["decision_date"] = d["decision_date"].isoformat()
+        out.append(d)
+    return out
+
+
+async def update_halacha(
+    halacha_id: UUID,
+    review_status: str | None = None,
+    reviewer: str = "",
+    rule_statement: str | None = None,
+    reasoning_summary: str | None = None,
+    subject_tags: list[str] | None = None,
+    practice_areas: list[str] | None = None,
+) -> dict | None:
+    """Update a halacha — used by the chair to approve/reject/edit."""
+    pool = await get_pool()
+    set_parts: list[str] = []
+    params: list = [halacha_id]
+    idx = 2
+    if review_status is not None:
+        set_parts.append(f"review_status = ${idx}")
+        params.append(review_status)
+        idx += 1
+        if review_status in ("approved", "rejected", "published"):
+            set_parts.append(f"reviewed_at = now()")
+            set_parts.append(f"reviewer = ${idx}")
+            params.append(reviewer)
+            idx += 1
+    if rule_statement is not None:
+        set_parts.append(f"rule_statement = ${idx}")
+        params.append(rule_statement)
+        idx += 1
+    if reasoning_summary is not None:
+        set_parts.append(f"reasoning_summary = ${idx}")
+        params.append(reasoning_summary)
+        idx += 1
+    if subject_tags is not None:
+        set_parts.append(f"subject_tags = ${idx}")
+        params.append(subject_tags)
+        idx += 1
+    if practice_areas is not None:
+        set_parts.append(f"practice_areas = ${idx}")
+        params.append(practice_areas)
+        idx += 1
+    if not set_parts:
+        return None
+    set_parts.append("updated_at = now()")
+    sql = f"UPDATE halachot SET {', '.join(set_parts)} WHERE id = $1 RETURNING *"
+    row = await pool.fetchrow(sql, *params)
+    return dict(row) if row else None
+
+
+async def search_precedent_library_semantic(
+    query_embedding: list[float],
+    practice_area: str = "",
+    court: str = "",
+    precedent_level: str = "",
+    appeal_subtype: str = "",
+    is_binding: bool | None = None,
+    subject_tag: str = "",
+    limit: int = 10,
+    include_halachot: bool = True,
+) -> list[dict]:
+    """Semantic search over chair-uploaded precedents.
+
+    Returns merged halachot + chunks. Halachot are pre-distilled rules, so
+    they get a small score boost. Only ``approved`` / ``published`` halachot
+    are visible (per chair-review policy). Chunks are visible regardless
+    of halacha review status.
+    """
+    pool = await get_pool()
+    halacha_filters = ["h.review_status IN ('approved', 'published')"]
+    chunk_filters = ["cl.source_kind = 'external_upload'"]
+    h_params: list = [query_embedding, limit]
+    c_params: list = [query_embedding, limit]
+    h_idx = 3
+    c_idx = 3
+
+    if practice_area:
+        halacha_filters.append(f"${h_idx} = ANY(h.practice_areas)")
+        h_params.append(practice_area)
+        h_idx += 1
+        chunk_filters.append(f"cl.practice_area = ${c_idx}")
+        c_params.append(practice_area)
+        c_idx += 1
+    if court:
+        halacha_filters.append(f"cl.court ILIKE ${h_idx}")
+        h_params.append(f"%{court}%")
+        h_idx += 1
+        chunk_filters.append(f"cl.court ILIKE ${c_idx}")
+        c_params.append(f"%{court}%")
+        c_idx += 1
+    if precedent_level:
+        halacha_filters.append(f"cl.precedent_level = ${h_idx}")
+        h_params.append(precedent_level)
+        h_idx += 1
+        chunk_filters.append(f"cl.precedent_level = ${c_idx}")
+        c_params.append(precedent_level)
+        c_idx += 1
+    if appeal_subtype:
+        halacha_filters.append(f"cl.appeal_subtype = ${h_idx}")
+        h_params.append(appeal_subtype)
+        h_idx += 1
+        chunk_filters.append(f"cl.appeal_subtype = ${c_idx}")
+        c_params.append(appeal_subtype)
+        c_idx += 1
+    if is_binding is not None:
+        halacha_filters.append(f"cl.is_binding = ${h_idx}")
+        h_params.append(is_binding)
+        h_idx += 1
+        chunk_filters.append(f"cl.is_binding = ${c_idx}")
+        c_params.append(is_binding)
+        c_idx += 1
+    if subject_tag:
+        halacha_filters.append(f"${h_idx} = ANY(h.subject_tags)")
+        h_params.append(subject_tag)
+        h_idx += 1
+
+    halacha_sql = f"""
+        SELECT h.id AS halacha_id, h.case_law_id, h.rule_statement,
+               h.reasoning_summary, h.supporting_quote, h.page_reference,
+               h.practice_areas, h.subject_tags, h.confidence, h.rule_type,
+               cl.case_number, cl.case_name, cl.court, cl.date AS decision_date,
+               cl.precedent_level,
+               1 - (h.embedding <=> $1) AS score
+        FROM halachot h
+        JOIN case_law cl ON cl.id = h.case_law_id
+        WHERE {' AND '.join(halacha_filters)}
+          AND h.embedding IS NOT NULL
+        ORDER BY h.embedding <=> $1
+        LIMIT $2
+    """
+
+    chunk_sql = f"""
+        SELECT pc.id AS chunk_id, pc.case_law_id, pc.content,
+               pc.section_type, pc.page_number,
+               cl.case_number, cl.case_name, cl.court, cl.date AS decision_date,
+               cl.precedent_level, cl.practice_area,
+               1 - (pc.embedding <=> $1) AS score
+        FROM precedent_chunks pc
+        JOIN case_law cl ON cl.id = pc.case_law_id
+        WHERE {' AND '.join(chunk_filters)}
+          AND pc.embedding IS NOT NULL
+        ORDER BY pc.embedding <=> $1
+        LIMIT $2
+    """
+
+    results: list[dict] = []
+    if include_halachot:
+        rows = await pool.fetch(halacha_sql, *h_params)
+        for r in rows:
+            d = dict(r)
+            if d.get("decision_date") is not None:
+                d["decision_date"] = d["decision_date"].isoformat()
+            d["score"] = float(d["score"]) + 0.05  # rule-level boost
+            d["type"] = "halacha"
+            results.append(d)
+
+    rows = await pool.fetch(chunk_sql, *c_params)
+    for r in rows:
+        d = dict(r)
+        if d.get("decision_date") is not None:
+            d["decision_date"] = d["decision_date"].isoformat()
+        d["score"] = float(d["score"])
+        d["type"] = "passage"
+        results.append(d)
+
+    results.sort(key=lambda x: x["score"], reverse=True)
+    return results[:limit]
+
+
+async def precedent_library_stats() -> dict:
+    """Aggregate stats for the /precedents stats tab."""
+    pool = await get_pool()
+    async with pool.acquire() as conn:
+        total = await conn.fetchval(
+            "SELECT COUNT(*) FROM case_law WHERE source_kind = 'external_upload'"
+        )
+        by_practice = await conn.fetch(
+            """SELECT practice_area, COUNT(*) AS n
+               FROM case_law
+               WHERE source_kind = 'external_upload'
+               GROUP BY practice_area
+               ORDER BY n DESC"""
+        )
+        by_level = await conn.fetch(
+            """SELECT precedent_level, COUNT(*) AS n
+               FROM case_law
+               WHERE source_kind = 'external_upload'
+               GROUP BY precedent_level
+               ORDER BY n DESC"""
+        )
+        halachot_total = await conn.fetchval(
+            "SELECT COUNT(*) FROM halachot"
+        )
+        halachot_pending = await conn.fetchval(
+            "SELECT COUNT(*) FROM halachot WHERE review_status = 'pending_review'"
+        )
+        halachot_approved = await conn.fetchval(
+            "SELECT COUNT(*) FROM halachot WHERE review_status IN ('approved', 'published')"
+        )
+    return {
+        "precedents_total": int(total or 0),
+        "by_practice_area": [
+            {"practice_area": r["practice_area"], "count": int(r["n"])}
+            for r in by_practice
+        ],
+        "by_precedent_level": [
+            {"precedent_level": r["precedent_level"], "count": int(r["n"])}
+            for r in by_level
+        ],
+        "halachot_total": int(halachot_total or 0),
+        "halachot_pending": int(halachot_pending or 0),
+        "halachot_approved": int(halachot_approved or 0),
+    }
--- a/mcp-server/src/legal_mcp/services/halacha_extractor.py
+++ b/mcp-server/src/legal_mcp/services/halacha_extractor.py
@@ -0,0 +1,326 @@
+"""Extract binding legal rules (הלכות) from external court rulings.
+
+Runs Claude (via the local headless ``claude -p`` bridge) over the
+legal_analysis / ruling / conclusion chunks of a precedent, returns a
+structured list of halachot, validates each one against the source text,
+embeds the rule statement, and stores everything as ``pending_review`` in
+the ``halachot`` table.
+
+All extraction is idempotent — calling ``extract(case_law_id)`` twice
+deletes prior rows for that precedent first.
+
+Trust model:
+    Per chair decision, NO halacha is auto-published. Every extracted
+    halacha enters with ``review_status='pending_review'``. The chair
+    approves/rejects via the UI, and only ``approved`` (or ``published``)
+    rows are visible to ``search_precedent_library`` and the writing
+    agents.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import re
+from uuid import UUID
+
+from legal_mcp import config
+from legal_mcp.config import parse_llm_json
+from legal_mcp.services import claude_session, db, embeddings, proofreader
+
+logger = logging.getLogger(__name__)
+
+
+# Concurrency model mirrors claims_extractor — each ``claude -p`` subprocess
+# holds ~300 MB RSS, so we cap parallel chunks to keep the box healthy.
+CHUNK_CONCURRENCY = 3
+CHUNK_RETRY_ATTEMPTS = 1
+
+# Sections from which to extract. facts/intro/appellant_claims/respondent_claims
+# never contain holdings, only positions, so we skip them.
+EXTRACTABLE_SECTIONS = ("legal_analysis", "ruling", "conclusion")
+
+
+HALACHA_EXTRACTION_PROMPT = """אתה משפטן בכיר המתמחה בדיני תכנון ובניה (ועדות ערר, היטל השבחה, פיצויים לפי סעיף 197 לחוק התכנון והבניה). תפקידך: לחלץ הלכות מחייבות מתוך פסק דין/החלטה משפטית.
+
+## הגדרות מחייבות
+
+הלכה (binding rule) = כלל משפטי שהפסק קובע או מאמץ ומיישם, באופן שניתן להסתמך עליו בהחלטות עתידיות.
+
+לא-הלכה (אין לחלץ):
+- אמרת אגב (obiter dicta) — הערות שאינן הכרחיות להכרעה.
+- ממצאים עובדתיים ספציפיים לתיק ("העורר לא הוכיח X").
+- ציטוטי הלכות מפסקי דין אחרים שלא אומצו במפורש בפסק זה.
+- הצהרות על דין קיים שאינן מיושמות בהכרעה.
+
+הבחנה קריטית: כאשר הפסק מצטט הלכה מפסק קודם, חלץ אותה רק אם בית המשפט בפסק הנוכחי **מאמץ ומחיל** אותה (לא רק מזכיר אותה ברקע).
+
+## תחומים אפשריים (practice_areas) — תחומי ועדת הערר בלבד
+- rishuy_uvniya — רישוי ובניה (תיקי 1xxx: היתרים, שימוש חורג, תכניות, קווי בניין, גובה, חניה)
+- betterment_levy — היטל השבחה (תיקי 8xxx: שומה, מערכות, תכניות המקנות בה, מועד קובע, סופיות ההחלטה)
+- compensation_197 — פיצויים לפי ס' 197 (תיקי 9xxx: פגיעה במקרקעין, ירידת ערך, ס' 200/פטור)
+
+הלכה אחת יכולה לחול על כמה תחומים — practice_areas הוא array ולא string יחיד.
+
+## סוגי הלכה (rule_type)
+- binding — הלכה מחייבת שהוחלה על התיק.
+- interpretive — פרשנות סעיף חוק/תכנית שאומצה.
+- procedural — כלל פרוצדורלי (סמכות, מועדים, הליכי שמיעה).
+- obiter — אמרת אגב חשובה (חלץ רק אם משמעותית; סמן confidence נמוך).
+
+## פלט נדרש
+החזר JSON array בלבד, ללא markdown, ללא הסברים. דוגמה:
+[
+  {
+    "rule_statement": "ניסוח הכלל בלשון משפטית מדויקת בגוף שלישי, 1-3 משפטים.",
+    "rule_type": "binding",
+    "reasoning_summary": "תמצית ההיגיון: למה בית המשפט הגיע לכלל הזה (1-2 משפטים).",
+    "supporting_quote": "ציטוט מילולי מדויק מהפסק התומך בכלל. חייב להופיע מילה במילה בטקסט הקלט.",
+    "page_reference": "פס' 12 / עמ' 8 — ככל שניתן לזהות מהקלט.",
+    "practice_areas": ["betterment_levy"],
+    "subject_tags": ["מועד_קביעת_שומה", "סופיות_ההחלטה"],
+    "cites": ["עע\\"מ 3975/22"],
+    "confidence": 0.85
+  }
+]
+
+## כללי איכות
+1. **נאמנות מוחלטת לציטוט** — supporting_quote חייב להיות הדבקה מדויקת מהקלט. אם אין ציטוט מתאים — אל תמציא הלכה.
+2. **מספר הלכות** — פסק רגיל מכיל 1-4 הלכות מחייבות. אל תמתח את הרשימה. אם אין הלכה — החזר [].
+3. **לא לפצל יתר על המידה** — אם שני סעיפים מבטאים את אותו עיקרון, אחד את הניסוח.
+4. **שפה** — rule_statement בעברית משפטית מקצועית, לא צמצום מילולי של הציטוט.
+5. **subject_tags** — 2-5 תגיות בעברית, snake_case (חניה, קווי_בניין, שיקול_דעת, פגם_פרוצדורלי, סמכות, מועדים, פגיעה_במקרקעין, ירידת_ערך).
+6. **confidence** — 0..1. מתחת ל-0.7 = ספק לגבי היות זה הלכה מחייבת.
+"""
+
+
+_VALID_PRACTICE_AREAS = {"rishuy_uvniya", "betterment_levy", "compensation_197"}
+_VALID_RULE_TYPES = {"binding", "interpretive", "procedural", "obiter"}
+
+
+def _normalize_for_comparison(text: str) -> str:
+    """Normalize Hebrew text for substring matching.
+
+    Collapses whitespace and unifies the half-dozen Hebrew quote-mark
+    variants. Use ``proofreader._fix_hebrew_quotes`` for the quote part
+    so we stay consistent with the proofreader pipeline.
+    """
+    fixed = proofreader._fix_hebrew_quotes(text)
+    # Collapse all whitespace (newlines, tabs, multiple spaces) to a single space.
+    return re.sub(r"\s+", " ", fixed).strip()
+
+
+def _verify_quote(supporting_quote: str, full_text: str) -> bool:
+    """Return True if ``supporting_quote`` appears verbatim in ``full_text``
+    after Hebrew quote/whitespace normalization.
+
+    The LLM occasionally trims a leading/trailing word from the quote;
+    we accept the quote if at least 90% of its characters match a
+    contiguous substring of the source.
+    """
+    if not supporting_quote.strip():
+        return False
+    normalized_quote = _normalize_for_comparison(supporting_quote)
+    normalized_text = _normalize_for_comparison(full_text)
+    if not normalized_quote:
+        return False
+    if normalized_quote in normalized_text:
+        return True
+    # Fallback: try the inner 90% of the quote (drops boundary trim).
+    if len(normalized_quote) >= 30:
+        trim = max(2, len(normalized_quote) // 20)
+        inner = normalized_quote[trim:-trim]
+        if inner and inner in normalized_text:
+            return True
+    return False
+
+
+def _coerce_halacha(raw: dict) -> dict | None:
+    """Validate and normalize one LLM-returned halacha dict.
+
+    Returns ``None`` if the entry is missing required fields.
+    """
+    if not isinstance(raw, dict):
+        return None
+    rule_statement = (raw.get("rule_statement") or "").strip()
+    supporting_quote = (raw.get("supporting_quote") or "").strip()
+    if not rule_statement or not supporting_quote:
+        return None
+
+    rule_type = (raw.get("rule_type") or "binding").strip().lower()
+    if rule_type not in _VALID_RULE_TYPES:
+        rule_type = "binding"
+
+    practice_areas_raw = raw.get("practice_areas") or []
+    if isinstance(practice_areas_raw, str):
+        practice_areas_raw = [practice_areas_raw]
+    practice_areas = [p for p in practice_areas_raw if p in _VALID_PRACTICE_AREAS]
+
+    subject_tags_raw = raw.get("subject_tags") or []
+    if isinstance(subject_tags_raw, str):
+        subject_tags_raw = [subject_tags_raw]
+    subject_tags = [str(t).strip() for t in subject_tags_raw if str(t).strip()]
+
+    cites_raw = raw.get("cites") or []
+    if isinstance(cites_raw, str):
+        cites_raw = [cites_raw]
+    cites = [str(c).strip() for c in cites_raw if str(c).strip()]
+
+    try:
+        confidence = float(raw.get("confidence", 0.0))
+    except (TypeError, ValueError):
+        confidence = 0.0
+    confidence = max(0.0, min(1.0, confidence))
+
+    return {
+        "rule_statement": rule_statement,
+        "rule_type": rule_type,
+        "reasoning_summary": (raw.get("reasoning_summary") or "").strip(),
+        "supporting_quote": supporting_quote,
+        "page_reference": (raw.get("page_reference") or "").strip(),
+        "practice_areas": practice_areas,
+        "subject_tags": subject_tags,
+        "cites": cites,
+        "confidence": confidence,
+    }
+
+
+async def _extract_chunk(
+    chunk_text: str,
+    section_type: str,
+    chunk_index: int,
+    chunk_total: int,
+    context: str,
+) -> list[dict]:
+    """Run the halacha extractor on one chunk with retry."""
+    chunk_label = f" (חלק {chunk_index + 1}/{chunk_total})" if chunk_total > 1 else ""
+    prompt = (
+        f"{HALACHA_EXTRACTION_PROMPT}\n\n"
+        f"## הקלט\n"
+        f"סוג קטע: {section_type}\n"
+        f"{context}{chunk_label}\n\n"
+        f"--- תחילת הטקסט ---\n{chunk_text}\n--- סוף הטקסט ---"
+    )
+    last_err: Exception | None = None
+    for attempt in range(CHUNK_RETRY_ATTEMPTS + 1):
+        try:
+            result = await claude_session.query_json(prompt)
+        except Exception as e:
+            last_err = e
+            logger.warning(
+                "halacha_extractor chunk %d/%d attempt %d raised: %s",
+                chunk_index + 1, chunk_total, attempt + 1, e,
+            )
+            continue
+        if isinstance(result, list):
+            return result
+        logger.warning(
+            "halacha_extractor chunk %d/%d attempt %d returned non-list (%s)",
+            chunk_index + 1, chunk_total, attempt + 1, type(result).__name__,
+        )
+    logger.error(
+        "halacha_extractor chunk %d/%d failed after %d attempts: %s",
+        chunk_index + 1, chunk_total, CHUNK_RETRY_ATTEMPTS + 1, last_err,
+    )
+    return []
+
+
+async def extract(case_law_id: UUID | str) -> dict:
+    """Extract halachot from an uploaded precedent and store them.
+
+    Idempotent: replaces any existing halachot for this case_law_id.
+    All inserted rows start as ``review_status='pending_review'``.
+
+    Returns:
+        ``{"status": "...", "extracted": N, "verified": M, "stored": K, ...}``
+    """
+    if isinstance(case_law_id, str):
+        case_law_id = UUID(case_law_id)
+
+    record = await db.get_case_law(case_law_id)
+    if not record:
+        return {"status": "not_found", "extracted": 0, "stored": 0}
+
+    chunks = await db.list_precedent_chunks(
+        case_law_id, section_types=EXTRACTABLE_SECTIONS,
+    )
+    if not chunks:
+        await db.set_case_law_halacha_status(case_law_id, "completed")
+        return {"status": "no_chunks", "extracted": 0, "stored": 0}
+
+    await db.set_case_law_halacha_status(case_law_id, "processing")
+    await db.delete_halachot(case_law_id)
+
+    citation = record.get("case_number", "")
+    court = record.get("court", "")
+    date_str = str(record.get("date") or "")
+    context = f"מקור: {citation} — {court}, {date_str}"
+
+    sem = asyncio.Semaphore(CHUNK_CONCURRENCY)
+
+    async def _bounded(idx: int, chunk_row: dict) -> list[dict]:
+        async with sem:
+            return await _extract_chunk(
+                chunk_row["content"], chunk_row["section_type"],
+                idx, len(chunks), context,
+            )
+
+    chunk_results = await asyncio.gather(
+        *[_bounded(i, c) for i, c in enumerate(chunks)]
+    )
+    raw_halachot: list[dict] = []
+    for items in chunk_results:
+        raw_halachot.extend(items)
+
+    if not raw_halachot:
+        await db.set_case_law_halacha_status(case_law_id, "completed")
+        return {"status": "no_halachot", "extracted": 0, "stored": 0}
+
+    # Validate against the full text of the precedent for the quote check.
+    full_text = record.get("full_text") or ""
+
+    cleaned: list[dict] = []
+    for raw in raw_halachot:
+        coerced = _coerce_halacha(raw)
+        if coerced is None:
+            continue
+        coerced["quote_verified"] = _verify_quote(
+            coerced["supporting_quote"], full_text,
+        )
+        cleaned.append(coerced)
+
+    if not cleaned:
+        await db.set_case_law_halacha_status(case_law_id, "completed")
+        return {"status": "no_valid_halachot", "extracted": len(raw_halachot), "stored": 0}
+
+    # Embed rule_statement + reasoning_summary so semantic search hits the
+    # rule directly rather than the surrounding chunk centroid.
+    embed_inputs = [
+        f"{h['rule_statement']} — {h['reasoning_summary']}".strip(" —")
+        for h in cleaned
+    ]
+    try:
+        vectors = await embeddings.embed_texts(embed_inputs, input_type="document")
+    except Exception as e:
+        logger.error("halacha_extractor: embeddings failed: %s", e)
+        vectors = [None] * len(cleaned)
+
+    for halacha, vec in zip(cleaned, vectors):
+        halacha["embedding"] = vec
+
+    stored = await db.store_halachot(case_law_id, cleaned)
+
+    verified = sum(1 for h in cleaned if h["quote_verified"])
+    await db.set_case_law_halacha_status(case_law_id, "completed")
+
+    logger.info(
+        "halacha_extractor: case_law=%s extracted=%d cleaned=%d verified=%d stored=%d",
+        case_law_id, len(raw_halachot), len(cleaned), verified, stored,
+    )
+    return {
+        "status": "completed",
+        "extracted": len(raw_halachot),
+        "valid": len(cleaned),
+        "verified": verified,
+        "stored": stored,
+    }
--- a/mcp-server/src/legal_mcp/services/precedent_library.py
+++ b/mcp-server/src/legal_mcp/services/precedent_library.py
@@ -0,0 +1,309 @@
+"""Orchestrator for the External Precedent Library.
+
+Ingest pipeline (one upload):
+    file → extract_text → proofread → INSERT case_law (source_kind='external_upload')
+        → chunk → embed → store precedent_chunks
+        → halacha_extractor.extract → embed halachot → store halachot
+        → set extraction_status='completed'
+
+Progress is reported via a caller-supplied async callback so the
+web layer can pipe updates into the existing Redis ProgressStore /
+SSE plumbing without this module knowing about Redis.
+"""
+
+from __future__ import annotations
+
+import logging
+import re
+import shutil
+from datetime import date
+from pathlib import Path
+from typing import Awaitable, Callable
+from uuid import UUID, uuid4
+
+from legal_mcp import config
+from legal_mcp.services import (
+    chunker,
+    db,
+    embeddings,
+    extractor,
+    halacha_extractor,
+)
+
+logger = logging.getLogger(__name__)
+
+
+ProgressCb = Callable[[str, int, str], Awaitable[None]]
+
+
+PRECEDENT_LIBRARY_DIR = Path(config.DATA_DIR) / "precedent-library"
+
+
+_VALID_PRACTICE_AREAS = {"", "rishuy_uvniya", "betterment_levy", "compensation_197"}
+_VALID_SOURCE_TYPES = {"", "court_ruling", "appeals_committee"}
+_VALID_PRECEDENT_LEVELS = {
+    "", "עליון", "מנהלי", "ועדת_ערר_ארצית", "ועדת_ערר_מחוזית",
+    "supreme", "administrative", "national_appeals_committee", "district_appeals_committee",
+}
+
+
+async def _noop_progress(_status: str, _percent: int, _msg: str) -> None:
+    return None
+
+
+def _safe_filename(name: str) -> str:
+    """Strip path separators and unsafe chars from a user-provided name."""
+    base = Path(name).name
+    return re.sub(r"[^\w.\-+א-ת ]", "_", base) or f"upload-{uuid4().hex[:8]}"
+
+
+def _stage_file(src_path: Path, source_type: str) -> Path:
+    """Copy the uploaded file into data/precedent-library/<source_type>/.
+
+    Returns the destination path. Source file is not deleted (caller decides).
+    """
+    sub = source_type if source_type in {"court_ruling", "appeals_committee"} else "other"
+    dest_dir = PRECEDENT_LIBRARY_DIR / sub
+    dest_dir.mkdir(parents=True, exist_ok=True)
+    safe_name = _safe_filename(src_path.name)
+    dest = dest_dir / f"{uuid4().hex[:8]}_{safe_name}"
+    shutil.copy2(src_path, dest)
+    return dest
+
+
+def _coerce_date(value) -> date | None:
+    if value is None or value == "":
+        return None
+    if isinstance(value, date):
+        return value
+    if isinstance(value, str):
+        try:
+            return date.fromisoformat(value[:10])
+        except ValueError:
+            return None
+    return None
+
+
+async def ingest_precedent(
+    *,
+    file_path: str | Path,
+    citation: str,
+    case_name: str = "",
+    court: str = "",
+    decision_date=None,
+    source_type: str = "",
+    precedent_level: str = "",
+    practice_area: str = "",
+    appeal_subtype: str = "",
+    subject_tags: list[str] | None = None,
+    is_binding: bool = True,
+    headnote: str = "",
+    summary: str = "",
+    document_id: UUID | None = None,
+    progress: ProgressCb | None = None,
+) -> dict:
+    """Ingest a single uploaded precedent through the full pipeline.
+
+    Required: file_path + citation. Everything else has a sensible default.
+
+    Returns:
+        ``{"status": "...", "case_law_id": "...", "chunks": N, "halachot": M}``
+    """
+    progress = progress or _noop_progress
+    src = Path(file_path)
+    if not src.is_file():
+        raise FileNotFoundError(f"file not found: {src}")
+    if not citation.strip():
+        raise ValueError("citation is required")
+    if practice_area not in _VALID_PRACTICE_AREAS:
+        raise ValueError(f"invalid practice_area: {practice_area!r}")
+    if source_type not in _VALID_SOURCE_TYPES:
+        raise ValueError(f"invalid source_type: {source_type!r}")
+
+    await progress("staging", 5, "מעתיק את הקובץ לאחסון")
+
+    staged = _stage_file(src, source_type)
+
+    await progress("extracting", 15, "מחלץ טקסט מהקובץ")
+    try:
+        text, page_count = await extractor.extract_text(str(staged))
+    except Exception as e:
+        await progress("failed", 100, f"כשל בחילוץ טקסט: {e}")
+        raise
+
+    text = (text or "").strip()
+    if not text:
+        await progress("failed", 100, "לא נמצא טקסט בקובץ")
+        raise ValueError("no extractable text in file")
+
+    # Strip any Nevo preamble that might wrap court rulings downloaded from Nevo.
+    text = extractor.strip_nevo_preamble(text)
+
+    await progress("storing_metadata", 25, "שומר את הפסיקה במסד הנתונים")
+    record = await db.create_external_case_law(
+        case_number=citation.strip(),
+        case_name=case_name.strip() or citation.strip(),
+        full_text=text,
+        court=court.strip(),
+        decision_date=_coerce_date(decision_date),
+        practice_area=practice_area,
+        appeal_subtype=appeal_subtype.strip(),
+        subject_tags=list(subject_tags or []),
+        summary=summary.strip(),
+        headnote=headnote.strip(),
+        source_type=source_type,
+        precedent_level=precedent_level,
+        is_binding=is_binding,
+        document_id=document_id,
+    )
+    case_law_id = UUID(str(record["id"]))
+
+    try:
+        await progress("chunking", 40, f"מחלק את הטקסט ל-chunks ({page_count} עמ')")
+        chunks = chunker.chunk_document(text)
+        if not chunks:
+            await db.set_case_law_extraction_status(case_law_id, "completed")
+            await db.set_case_law_halacha_status(case_law_id, "completed")
+            await progress("completed", 100, "אין טקסט לעיבוד")
+            return {
+                "status": "completed",
+                "case_law_id": str(case_law_id),
+                "chunks": 0,
+                "halachot": 0,
+            }
+
+        await progress("embedding", 55, f"מייצר embeddings ל-{len(chunks)} chunks")
+        chunk_texts = [c.content for c in chunks]
+        chunk_vectors = await embeddings.embed_texts(chunk_texts, input_type="document")
+
+        chunk_dicts = [
+            {
+                "chunk_index": c.chunk_index,
+                "content": c.content,
+                "section_type": c.section_type,
+                "page_number": c.page_number,
+                "embedding": v,
+            }
+            for c, v in zip(chunks, chunk_vectors)
+        ]
+        stored_chunks = await db.store_precedent_chunks(case_law_id, chunk_dicts)
+
+        await progress("extracting_halachot", 75, "מחלץ הלכות מחייבות")
+        await db.set_case_law_extraction_status(case_law_id, "completed")
+        halacha_result = await halacha_extractor.extract(case_law_id)
+
+        await progress(
+            "completed",
+            100,
+            f"הוכנס לספרייה: {stored_chunks} chunks, "
+            f"{halacha_result.get('stored', 0)} הלכות ממתינות לאישור",
+        )
+
+        return {
+            "status": "completed",
+            "case_law_id": str(case_law_id),
+            "chunks": stored_chunks,
+            "halachot": halacha_result.get("stored", 0),
+            "halachot_extracted_raw": halacha_result.get("extracted", 0),
+            "halachot_verified": halacha_result.get("verified", 0),
+            "pages": page_count,
+        }
+
+    except Exception as e:
+        logger.exception("precedent_library.ingest_precedent failed: %s", e)
+        await db.set_case_law_extraction_status(case_law_id, "failed")
+        await progress("failed", 100, f"כשל בעיבוד: {e}")
+        raise
+
+
+async def reextract_halachot(
+    case_law_id: UUID | str,
+    progress: ProgressCb | None = None,
+) -> dict:
+    """Re-run the halacha extractor on an existing precedent. Idempotent."""
+    progress = progress or _noop_progress
+    if isinstance(case_law_id, str):
+        case_law_id = UUID(case_law_id)
+
+    record = await db.get_case_law(case_law_id)
+    if not record or record.get("source_kind") != "external_upload":
+        raise ValueError("precedent not found or not chair-uploaded")
+
+    await progress("extracting_halachot", 50, "מחלץ הלכות מחדש")
+    result = await halacha_extractor.extract(case_law_id)
+    await progress(
+        "completed",
+        100,
+        f"הופקו {result.get('stored', 0)} הלכות (ממתינות לאישור)",
+    )
+    return result
+
+
+async def delete_precedent(case_law_id: UUID | str) -> bool:
+    """Delete a precedent and cascade chunks + halachot."""
+    if isinstance(case_law_id, str):
+        case_law_id = UUID(case_law_id)
+    return await db.delete_case_law(case_law_id)
+
+
+async def get_precedent(case_law_id: UUID | str) -> dict | None:
+    """Get a precedent with its halachot attached."""
+    if isinstance(case_law_id, str):
+        case_law_id = UUID(case_law_id)
+    record = await db.get_case_law(case_law_id)
+    if not record:
+        return None
+    record["halachot"] = await db.list_halachot(case_law_id=case_law_id, limit=500)
+    return record
+
+
+async def list_precedents(
+    practice_area: str = "",
+    court: str = "",
+    precedent_level: str = "",
+    source_type: str = "",
+    search: str = "",
+    limit: int = 100,
+    offset: int = 0,
+) -> list[dict]:
+    return await db.list_external_case_law(
+        practice_area=practice_area,
+        court=court,
+        precedent_level=precedent_level,
+        source_type=source_type,
+        search=search,
+        limit=limit,
+        offset=offset,
+    )
+
+
+async def search_library(
+    query: str,
+    practice_area: str = "",
+    court: str = "",
+    precedent_level: str = "",
+    appeal_subtype: str = "",
+    is_binding: bool | None = None,
+    subject_tag: str = "",
+    limit: int = 10,
+    include_halachot: bool = True,
+) -> list[dict]:
+    """Semantic search merging halachot (rule-level) and chunks (passage-level).
+
+    Only ``approved`` / ``published`` halachot are returned, per chair-review
+    policy. Chunks are returned regardless of halacha review status.
+    """
+    if not query.strip():
+        return []
+    query_vec = await embeddings.embed_query(query)
+    return await db.search_precedent_library_semantic(
+        query_embedding=query_vec,
+        practice_area=practice_area,
+        court=court,
+        precedent_level=precedent_level,
+        appeal_subtype=appeal_subtype,
+        is_binding=is_binding,
+        subject_tag=subject_tag,
+        limit=limit,
+        include_halachot=include_halachot,
+    )
--- a/mcp-server/src/legal_mcp/tools/precedent_library.py
+++ b/mcp-server/src/legal_mcp/tools/precedent_library.py
@@ -0,0 +1,234 @@
+"""MCP tools for the External Precedent Library.
+
+This is distinct from:
+
+- ``precedents`` (case_precedents table) — chair-attached quotes scoped to
+  a specific case section. Use ``precedent_search_library`` for that.
+- ``style_corpus`` (Daphna's prior decisions) — searched via
+  ``search_decisions`` for style/voice.
+
+The precedent library is the **authoritative law** corpus: external court
+rulings and other appeals committees' decisions, with halachot extracted
+and reviewed by the chair.
+
+All halachot enter as ``pending_review`` and are invisible to search until
+the chair approves them — per project review policy.
+"""
+
+from __future__ import annotations
+
+import json
+from uuid import UUID
+
+from legal_mcp.services import db, precedent_library
+
+
+def _ok(payload) -> str:
+    return json.dumps(payload, ensure_ascii=False, indent=2, default=str)
+
+
+def _err(msg: str) -> str:
+    return json.dumps({"error": msg}, ensure_ascii=False)
+
+
+async def precedent_library_upload(
+    file_path: str,
+    citation: str,
+    case_name: str = "",
+    court: str = "",
+    decision_date: str = "",
+    source_type: str = "",
+    precedent_level: str = "",
+    practice_area: str = "",
+    appeal_subtype: str = "",
+    subject_tags: list[str] | None = None,
+    is_binding: bool = True,
+    headnote: str = "",
+    summary: str = "",
+) -> str:
+    """העלאת פסיקה חיצונית לקורפוס הסמכותי + חילוץ הלכות אוטומטי.
+
+    Args:
+        file_path: נתיב מלא לקובץ PDF/DOCX/RTF/TXT/MD.
+        citation: מראה המקום ("עע\\"מ 3975/22 ב. קרן-נכסים נ' ועדה מקומית").
+        case_name: שם קצר.
+        court: ערכאה (עליון / מנהלי / ועדת ערר ארצית / ועדת ערר מחוזית).
+        decision_date: ISO date (YYYY-MM-DD), אופציונלי.
+        source_type: court_ruling / appeals_committee.
+        precedent_level: עליון / מנהלי / ועדת_ערר_ארצית / ועדת_ערר_מחוזית.
+        practice_area: rishuy_uvniya / betterment_levy / compensation_197.
+        subject_tags: תגיות נושא (חניה, קווי_בניין, וכד').
+
+    Returns: JSON עם case_law_id, מספר chunks, מספר הלכות שנכנסו לתור אישור.
+    """
+    if not citation.strip():
+        return _err("citation חובה")
+    try:
+        result = await precedent_library.ingest_precedent(
+            file_path=file_path,
+            citation=citation,
+            case_name=case_name,
+            court=court,
+            decision_date=decision_date or None,
+            source_type=source_type,
+            precedent_level=precedent_level,
+            practice_area=practice_area,
+            appeal_subtype=appeal_subtype,
+            subject_tags=subject_tags or [],
+            is_binding=is_binding,
+            headnote=headnote,
+            summary=summary,
+        )
+    except Exception as e:
+        return _err(str(e))
+    return _ok(result)
+
+
+async def precedent_library_list(
+    practice_area: str = "",
+    court: str = "",
+    precedent_level: str = "",
+    source_type: str = "",
+    search: str = "",
+    limit: int = 100,
+) -> str:
+    """רשימה של פסיקה בקורפוס הסמכותי, עם פילטרים."""
+    rows = await precedent_library.list_precedents(
+        practice_area=practice_area,
+        court=court,
+        precedent_level=precedent_level,
+        source_type=source_type,
+        search=search,
+        limit=limit,
+    )
+    return _ok(rows)
+
+
+async def precedent_library_get(case_law_id: str) -> str:
+    """פסיקה ספציפית עם כל ההלכות שלה (כולל ממתינות לאישור)."""
+    try:
+        cid = UUID(case_law_id)
+    except ValueError:
+        return _err("case_law_id לא תקין")
+    record = await precedent_library.get_precedent(cid)
+    if not record:
+        return _err("פסיקה לא נמצאה")
+    return _ok(record)
+
+
+async def precedent_library_delete(case_law_id: str) -> str:
+    """מחיקת פסיקה מהקורפוס. cascade: chunks + halachot."""
+    try:
+        cid = UUID(case_law_id)
+    except ValueError:
+        return _err("case_law_id לא תקין")
+    ok = await precedent_library.delete_precedent(cid)
+    return _ok({"deleted": ok, "case_law_id": case_law_id})
+
+
+async def precedent_extract_halachot(case_law_id: str) -> str:
+    """הרצה מחדש של חילוץ ההלכות לפסיקה קיימת. הלכות קודמות נמחקות."""
+    try:
+        cid = UUID(case_law_id)
+    except ValueError:
+        return _err("case_law_id לא תקין")
+    try:
+        result = await precedent_library.reextract_halachot(cid)
+    except Exception as e:
+        return _err(str(e))
+    return _ok(result)
+
+
+async def search_precedent_library(
+    query: str,
+    practice_area: str = "",
+    court: str = "",
+    precedent_level: str = "",
+    appeal_subtype: str = "",
+    is_binding: bool | None = None,
+    subject_tag: str = "",
+    limit: int = 10,
+    include_halachot: bool = True,
+) -> str:
+    """חיפוש סמנטי בקורפוס הפסיקה הסמכותית.
+
+    מחזיר תוצאות מעורבות: הלכות (rule-level, מאושרות בלבד) + קטעי טקסט
+    (passage-level). הלכות מקבלות boost קל בדירוג כי הן מזוקקות מראש.
+
+    Args:
+        query: שאילתת חיפוש בעברית.
+        practice_area: rishuy_uvniya / betterment_levy / compensation_197.
+        court: סינון לפי ערכאה (substring).
+        precedent_level: עליון / מנהלי / ועדת_ערר_ארצית / ועדת_ערר_מחוזית.
+        appeal_subtype: סינון לתת-סוג.
+        is_binding: True/False (None = ללא סינון).
+        subject_tag: סינון לפי תגית נושא (לדוגמה "מועד_קביעת_שומה").
+        limit: מספר תוצאות מקסימלי.
+        include_halachot: האם לכלול הלכות (ברירת מחדל: כן).
+
+    Returns: רשימה מדורגת. כל פריט הוא {"type": "halacha"|"passage", "score", ...}.
+    """
+    if not query or len(query.strip()) < 2:
+        return json.dumps([], ensure_ascii=False)
+    results = await precedent_library.search_library(
+        query=query.strip(),
+        practice_area=practice_area,
+        court=court,
+        precedent_level=precedent_level,
+        appeal_subtype=appeal_subtype,
+        is_binding=is_binding,
+        subject_tag=subject_tag,
+        limit=limit,
+        include_halachot=include_halachot,
+    )
+    return _ok(results)
+
+
+async def halacha_review(
+    halacha_id: str,
+    status: str,
+    reviewer: str = "דפנה",
+    rule_statement: str = "",
+    reasoning_summary: str = "",
+    subject_tags: list[str] | None = None,
+    practice_areas: list[str] | None = None,
+) -> str:
+    """אישור / דחייה / עריכה של הלכה שחולצה אוטומטית.
+
+    Args:
+        halacha_id: מזהה ההלכה.
+        status: pending_review / approved / rejected / published.
+        reviewer: שם המאשר (ברירת מחדל: דפנה).
+        rule_statement: עריכת ניסוח הכלל (ריק = ללא שינוי).
+        reasoning_summary: עריכת תמצית ההיגיון (ריק = ללא שינוי).
+        subject_tags: עריכת תגיות (None = ללא שינוי).
+        practice_areas: עריכת תחומים (None = ללא שינוי).
+    """
+    if status not in {"pending_review", "approved", "rejected", "published"}:
+        return _err(
+            "status לא חוקי. ערכים תקינים: "
+            "pending_review / approved / rejected / published"
+        )
+    try:
+        hid = UUID(halacha_id)
+    except ValueError:
+        return _err("halacha_id לא תקין")
+
+    row = await db.update_halacha(
+        halacha_id=hid,
+        review_status=status,
+        reviewer=reviewer,
+        rule_statement=rule_statement or None,
+        reasoning_summary=reasoning_summary or None,
+        subject_tags=subject_tags,
+        practice_areas=practice_areas,
+    )
+    if row is None:
+        return _err("הלכה לא נמצאה")
+    return _ok(row)
+
+
+async def halachot_pending(limit: int = 100) -> str:
+    """תור ההלכות הממתינות לאישור (review_status='pending_review')."""
+    rows = await db.list_halachot(review_status="pending_review", limit=limit)
+    return _ok(rows)