feat: #34 citation graph + #32 wide-modal precedent edit + #13 verify

## #34 — Daphna's internal citation graph New schema V16 (V15 was already used by proceeding_type): table ``precedent_internal_citations`` (source→cited, with cited_case_law_id nullable for citations whose target isn't in the corpus yet) + 3 indexes (source, target, unlinked). New service ``citation_extractor.py`` with regex patterns for ערר / בל"מ / עע"מ / בר"מ / עמ"נ / ע"א / בג"ץ / רע"א — accepts both ``\/`` and ``-`` separators, requires actual parenthesized district label to avoid greedy mid-paragraph captures. Resolves citations against ``case_law.case_number`` substring; default confidence 0.90 linked, 0.75 unlinked. ON CONFLICT DO NOTHING on (source, cited_case_number). 3 new MCP tools: ``extract_internal_citations``, ``list_internal_citations``, ``list_incoming_citations``. Optional flag ``include_cited_by=True`` on ``search_internal_decisions`` appends cited-by candidates as ``match_type='cited_by'`` stubs. Bulk-extracted from 40 internal_committee rows authored by דפנה תמיר: **353 distinct citations, 348 stored, 96 linked / 252 unlinked**. Top citers: 1079/24 (30), 1024/24 (19), 1009/25 (18). Top unlinked target: ע"א 3213/97 (cited 5x) — natural #35 candidates. ## #32 — Wide-modal precedent edit `precedent-edit-sheet.tsx`: ``<Sheet side="left">`` → centered ``<Dialog>`` with ``sm:max-w-4xl`` ``max-h-[90vh]`` ``overflow-y-auto``. Component API unchanged so existing callers (`/precedents/[id]/page.tsx`, `library-list-panel.tsx`) work as-is. RTL preserved. Mobile falls back to near-full-width via shadcn default. ## #13 — 403/17 verification `case_law e151fc25-...` (אהרון ברק - תכנית רחביה) already in perfect shape after Stage A work: all metadata fields populated, 351 halachot with avg_conf=0.864 (well above 0.78 threshold). No re-extraction needed; closing task as verified. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-26 10:37:53 +00:00
parent 9f4f8c60a4
commit 7ad995aade
6 changed files with 797 additions and 33 deletions
--- a/mcp-server/src/legal_mcp/server.py
+++ b/mcp-server/src/legal_mcp/server.py
@@ -56,6 +56,7 @@ from legal_mcp.tools import (  # noqa: E402
    internal_decisions as int_tools,
    legal_arguments as la_tools,
    missing_precedents as mp_tools,
+    citations as cit_tools,
 )


@@ -447,6 +448,7 @@ async def search_internal_decisions(
    chair_name: str = "",
    limit: int = 10,
    include_halachot: bool = True,
+    include_cited_by: bool = False,
 ) -> str:
    """חיפוש בהחלטות ועדות ערר לתכנון ובנייה (כל המחוזות).

@@ -461,9 +463,13 @@ async def search_internal_decisions(
        chair_name: שם יו"ר הוועדה לסינון. ריק = כל היו"רים
        limit: מספר תוצאות מקסימלי
        include_halachot: האם לכלול הלכות שחולצו
+        include_cited_by: True = הוסף תוצאות עקיפות — לכל hit הוסף גם החלטות
+            שהוא מצטט (מתוך citation graph). שימושי לחיפוש "כל הקשור ל-X"
+            כשרוצים להרחיב מעבר לטקסט המקורי. default False.
    """
    return await search.search_internal_decisions(
        query, practice_area, appeal_subtype, district, chair_name, limit, include_halachot,
+        include_cited_by=include_cited_by,
    )


@@ -803,6 +809,67 @@ async def missing_precedent_close(
    )


+# ── Internal citations graph (TaskMaster #34) ─────────────────────
+
+
+@mcp.tool()
+async def extract_internal_citations(
+    case_law_id: str = "",
+    chair_name: str = "",
+    limit: int = 0,
+) -> str:
+    """חילוץ ציטוטים פנימיים מהחלטות ועדת ערר ושמירה ב-citation graph.
+
+    משתמש בדפוסי regex עבריים ("ונפנה ל…", "כפי שקבעתי…", "ראה החלטתי…")
+    לזיהוי הפניות בין החלטות. אם case_law_id סופק — מריץ על שורה אחת
+    (שימושי אחרי upload). אם chair_name סופק — מריץ על כל ההחלטות של
+    אותו יו"ר. אם שניהם ריקים — מריץ על כל ה-internal_committee corpus.
+
+    איידמפוטנטי: ניתן להריץ שוב ושוב בלי כפילויות. ציטוטים שמופנים
+    להחלטות שעדיין לא בקורפוס נשמרים כ-unlinked (cited_case_law_id=NULL)
+    ויראו ב-list_internal_citations כשהיו"ר יחליט אם להעלות אותן.
+    """
+    return await cit_tools.extract_internal_citations(
+        case_law_id=case_law_id,
+        chair_name=chair_name,
+        limit=limit,
+    )
+
+
+@mcp.tool()
+async def list_internal_citations(
+    case_law_id: str = "",
+    linked_only: bool = False,
+    limit: int = 50,
+) -> str:
+    """רשימת ציטוטים יוצאים מהחלטה (מה ההחלטה מצטטת).
+
+    משתמש לקבלת תמונה של בסיס הפסיקה שהחלטה הסתמכה עליו.
+    linked_only=True מסנן רק ציטוטים שזוהו ב-case_law של הקורפוס.
+    """
+    return await cit_tools.list_internal_citations(
+        case_law_id=case_law_id,
+        linked_only=linked_only,
+        limit=limit,
+    )
+
+
+@mcp.tool()
+async def list_incoming_citations(
+    case_law_id: str = "",
+    limit: int = 50,
+) -> str:
+    """רשימת ציטוטים נכנסים אל החלטה (אילו החלטות מצטטות אותה).
+
+    שימוש: רוצים לדעת אילו החלטות של דפנה (או של ועדות אחרות) הסתמכו
+    על פסק דין מסוים — מעבירים את ה-case_law_id של פסק הדין.
+    """
+    return await cit_tools.list_incoming_citations(
+        case_law_id=case_law_id,
+        limit=limit,
+    )
+
+
@mcp.tool()
 async def record_chair_feedback(
    case_number: str,
--- a/mcp-server/src/legal_mcp/services/citation_extractor.py
+++ b/mcp-server/src/legal_mcp/services/citation_extractor.py
@@ -0,0 +1,434 @@
+"""Internal citation graph extractor (TaskMaster #34).
+
+When Daphna (or any other internal_committee chair) cites another committee
+decision inside the body of a ruling, she uses fairly stable phrases:
+
+    "ונפנה לערר 1110/20 ירושלים שקופה …"
+    "כפי שקבעתי בערר 1041/24 …"
+    "בדומה לעמדתי בהחלטה ערר 8048/24 …"
+    "כפי שנקבע במחוז ת\"א בערר 1234/20 …"
+    "ראה החלטתי בערר 1015-01-24 …"
+
+This module scans the ``full_text`` of internal-committee ``case_law`` rows,
+extracts those citations via regex, tries to link each cited case_number to a
+row already in ``case_law`` (any source_kind), and stores the result in
+``precedent_internal_citations``. Unresolved citations are kept with
+``cited_case_law_id = NULL`` so the chair can see what's missing from the
+corpus (and ``search_internal_decisions`` can surface "cited but absent" gaps).
+
+The result is a *citation graph* that downstream tools (search, researcher
+agent) can join on to surface "decisions cited by this one" alongside
+keyword/semantic hits — without re-running an LLM on every query.
+
+Patterns are *intentionally* permissive: we accept stray Hebrew quote marks
+(both straight ``"`` and curly ``״``), optional district parens, and several
+trigger phrases. False positives are de-duplicated downstream by the
+``UNIQUE (source_case_law_id, cited_case_number)`` constraint and by case-
+number normalization (see ``_normalize_case_number``).
+"""
+
+from __future__ import annotations
+
+import logging
+import re
+from typing import Iterator
+from uuid import UUID
+
+from legal_mcp.services import db
+
+logger = logging.getLogger(__name__)
+
+
+# ── Patterns ─────────────────────────────────────────────────────────
+#
+# Two pattern families:
+#   1. Appeals-committee citations ("ערר" / "בל\"מ") — primary target.
+#      These are the ones we resolve against ``case_law``.
+#   2. Court rulings ("עע\"מ", "בר\"מ", "עמ\"נ", "ע\"א", "בג\"ץ", "רע\"א").
+#      Stored as unlinked rows by default, so the researcher knows the
+#      decision quotes a higher court.
+#
+# Trigger words ("ונפנה", "כפי שקבעתי", "בדומה ל…", "ראה החלטתי",
+# "כפי שנקבע") are *optional* — many citations appear without one (Daphna
+# often introduces a quote with just "כפי שצוין בערר…"). We therefore
+# match the citation core (prefix + number) and capture the surrounding
+# sentence as context.
+#
+# Regex notes:
+#   * Hebrew gershayim/quotation: both straight (") and curly (״) are
+#     accepted via the character class [\"״].
+#   * Case numbers can be NNNN/YY, NNNN-YY, or NNNN-MM-YY (the third form
+#     is the Nevo "filed" format: 1015-01-24 means file #1015 of Jan 2024).
+#   * Optional district paren: ערר (ועדות ערר - תכנון ובנייה ירושלים)
+#     1110/20 — we allow up to 60 chars of parenthetical content.
+#   * \b doesn't behave well with Hebrew, so we anchor by whitespace or
+#     punctuation lookarounds.
+
+_TRIGGER = (
+    r"(?:ונפנה\s+ל|"
+    r"כפי\s+ש(?:קבעתי|נקבע|פסקתי)\s+ב|"
+    r"בדומה\s+ל(?:עמדתי\s+ב)?|"
+    r"ראה\s+(?:את\s+)?(?:החלטתי\s+ב|פסיקת\s+ה?ועדה\s+ב)?|"
+    r"בעניין\s+|"
+    r"בהחלטת(?:י|ה|נו)?\s+ב?)?"
+)
+
+# Optional district / committee parenthetical between the prefix and the
+# case number. Matches things like "(ועדות ערר - תכנון ובנייה ירושלים)"
+# or "(ירושלים)" or "(מרכז)". Up to 80 chars to be safe. Required actual
+# parentheses (the `\(` and `\)` are NOT optional) — otherwise the regex
+# greedily absorbs the next sentence's content and skips intermediate
+# citations like "ראה גם ערר 1041/24 …\nכפי שקבעתי בערר (…) 1110/20".
+_DISTRICT_PAREN = r"(?:\s*\([^)\n]{0,80}\)\s*)?"
+
+# Case-number core: 3-5 digits, optional separator and 2-4 digits (and
+# optional third group for the NNNN-MM-YY format).
+_NUM_RX = r"(\d{3,5}(?:[-/]\d{2,4}(?:[-/]\d{2,4})?)?)"
+
+_PATTERNS = [
+    # 1. Appeals-committee — ערר / בל"מ
+    (
+        "appeals_committee",
+        re.compile(
+            _TRIGGER
+            + r"(ערר|בל[\"״]מ)"
+            + _DISTRICT_PAREN
+            + r"\s*"
+            + _NUM_RX,
+            re.UNICODE,
+        ),
+    ),
+    # 2. Higher courts — עע"מ, בר"מ, עמ"נ, ע"א, בג"ץ, רע"א, דנ"א, בש"א
+    (
+        "court_ruling",
+        re.compile(
+            _TRIGGER
+            + r"(עע[\"״]מ|בר[\"״]מ|עמ[\"״]נ|ע[\"״]א|בג[\"״]ץ|רע[\"״]א|דנ[\"״]א|בש[\"״]א)"
+            + r"\s*"
+            + _NUM_RX,
+            re.UNICODE,
+        ),
+    ),
+]
+
+
+# Context window for storing the match (characters before/after).
+_CTX_BEFORE = 120
+_CTX_AFTER = 240
+
+
+def _normalize_case_number(raw: str) -> str:
+    """Normalize a case-number for matching.
+
+    The same case can appear in the corpus as "1110/20", "1110-20",
+    "ערר 1110/20", "1110-01-20" — different rules for the third form,
+    which is the Nevo file format. We canonicalize by:
+      * stripping non-digit/separator chars
+      * unifying "/" → "-"
+      * lowercasing
+    The result is used only for matching, never for display.
+    """
+    cleaned = re.sub(r"[^\d/\-]", "", raw or "")
+    return cleaned.replace("/", "-").strip("-")
+
+
+def extract_citations_from_text(text: str) -> Iterator[dict]:
+    """Yield citation dicts extracted from ``text``.
+
+    Each dict has:
+        prefix: matched prefix (ערר / בל\"מ / עע\"מ / …)
+        case_number: raw number as captured
+        case_number_norm: normalized (slashes → dashes, digits only)
+        raw: the full matched span
+        context: ±300 chars surrounding the match (whitespace normalized)
+        pattern_kind: 'appeals_committee' or 'court_ruling'
+    """
+    if not text:
+        return
+    seen: set[tuple[str, str]] = set()
+    for kind, pattern in _PATTERNS:
+        for m in pattern.finditer(text):
+            # The `_TRIGGER` is wrapped in (?:...) so it does not add a
+            # capture group; group(1) is the prefix, group(2) is the number.
+            prefix = (m.group(1) or "").strip()
+            number = (m.group(2) or "").strip()
+            if not prefix or not number:
+                continue
+            norm = _normalize_case_number(number)
+            if not norm:
+                continue
+            key = (kind, norm)
+            if key in seen:
+                continue
+            seen.add(key)
+
+            start = max(0, m.start() - _CTX_BEFORE)
+            end = min(len(text), m.end() + _CTX_AFTER)
+            context = text[start:end].replace("\n", " ").strip()
+            context = re.sub(r"\s+", " ", context)
+
+            yield {
+                "prefix": prefix,
+                "case_number": number,
+                "case_number_norm": norm,
+                "raw": m.group(0).strip(),
+                "context": context[:1000],
+                "pattern_kind": kind,
+            }
+
+
+async def _resolve_case_law_id(case_number_norm: str) -> UUID | None:
+    """Try to resolve a normalized citation to an existing case_law row.
+
+    Strategy:
+      1. Exact match on normalized case_number column (after rewriting
+         existing case_numbers the same way).
+      2. Substring match — the corpus often stores the full Nevo header
+         ("ערר ‏(‏ועדות ערר - תכנון ובנייה ירושלים‏)‏ 1110/20 …"), so we
+         search by ``case_number ILIKE '%1110/20%' OR '%1110-20%'``.
+
+    Returns None if no row matches.
+    """
+    if not case_number_norm:
+        return None
+    pool = await db.get_pool()
+    # Build the two raw forms (with slash and with dash) for substring match.
+    parts = case_number_norm.split("-")
+    if len(parts) >= 2:
+        slash_form = "/".join(parts[:2]) if len(parts) == 2 else parts[0] + "/" + parts[-1]
+    else:
+        slash_form = case_number_norm
+    dash_form = case_number_norm
+
+    async with pool.acquire() as conn:
+        # Substring match on either form (covers full Nevo headers and short forms).
+        row = await conn.fetchrow(
+            """
+            SELECT id FROM case_law
+             WHERE case_number ILIKE $1 OR case_number ILIKE $2
+             ORDER BY (source_kind = 'internal_committee') DESC,
+                      LENGTH(case_number) ASC
+             LIMIT 1
+            """,
+            f"%{slash_form}%",
+            f"%{dash_form}%",
+        )
+    return UUID(str(row["id"])) if row else None
+
+
+async def extract_and_store(case_law_id: UUID) -> dict:
+    """Extract citations from a single ``case_law`` row's ``full_text``,
+    resolve them against the corpus, and INSERT into
+    ``precedent_internal_citations`` (ON CONFLICT DO NOTHING).
+
+    Returns: {extracted: N, linked: M, new: K, skipped: S}
+        extracted — total distinct citations found in the text
+        linked    — how many resolved to an existing case_law row
+        new       — rows actually inserted (not pre-existing)
+        skipped   — citations skipped (self-citation, already stored)
+    """
+    pool = await db.get_pool()
+    async with pool.acquire() as conn:
+        row = await conn.fetchrow(
+            "SELECT id, case_number, full_text FROM case_law WHERE id = $1",
+            case_law_id,
+        )
+    if not row:
+        return {"extracted": 0, "linked": 0, "new": 0, "skipped": 0, "error": "not_found"}
+
+    text = row["full_text"] or ""
+    own_norm = _normalize_case_number(row["case_number"] or "")
+
+    extracted = 0
+    linked = 0
+    new_count = 0
+    skipped = 0
+
+    for cit in extract_citations_from_text(text):
+        extracted += 1
+        if cit["case_number_norm"] == own_norm:
+            # Self-citation (e.g. document headers repeating the case number).
+            skipped += 1
+            continue
+
+        cited_id = await _resolve_case_law_id(cit["case_number_norm"])
+        if cited_id is not None and cited_id == case_law_id:
+            skipped += 1
+            continue
+        if cited_id is not None:
+            linked += 1
+
+        async with pool.acquire() as conn:
+            result = await conn.execute(
+                """
+                INSERT INTO precedent_internal_citations (
+                    source_case_law_id, cited_case_number, cited_case_law_id,
+                    match_context, match_pattern, confidence
+                )
+                VALUES ($1, $2, $3, $4, $5, $6)
+                ON CONFLICT (source_case_law_id, cited_case_number) DO NOTHING
+                """,
+                case_law_id,
+                f"{cit['prefix']} {cit['case_number']}",
+                cited_id,
+                cit["context"],
+                cit["pattern_kind"],
+                0.90 if cited_id is not None else 0.75,
+            )
+        # asyncpg execute returns 'INSERT 0 N' — N is rows inserted.
+        try:
+            n_inserted = int(result.split()[-1])
+        except (ValueError, IndexError):
+            n_inserted = 0
+        if n_inserted == 1:
+            new_count += 1
+        else:
+            skipped += 1
+
+    return {
+        "extracted": extracted,
+        "linked": linked,
+        "new": new_count,
+        "skipped": skipped,
+    }
+
+
+async def extract_all_internal_committee(
+    chair_name_filter: str = "",
+    limit: int = 0,
+) -> dict:
+    """Run extraction over every internal-committee row in ``case_law``.
+
+    Args:
+        chair_name_filter: if non-empty, restrict to rows where chair_name
+            matches (exact match). Useful for running on Daphna only.
+        limit: hard cap on number of rows processed (0 = no cap).
+
+    Returns: summary dict with per-row counts and aggregate totals.
+    """
+    pool = await db.get_pool()
+    conditions = ["source_kind = 'internal_committee'", "full_text <> ''"]
+    params: list = []
+    if chair_name_filter:
+        conditions.append("chair_name = $1")
+        params.append(chair_name_filter)
+    where = " WHERE " + " AND ".join(conditions)
+    limit_clause = f" LIMIT {int(limit)}" if limit and limit > 0 else ""
+    sql = f"SELECT id, case_number FROM case_law{where} ORDER BY created_at{limit_clause}"
+
+    async with pool.acquire() as conn:
+        rows = await conn.fetch(sql, *params)
+
+    totals = {
+        "processed": 0,
+        "extracted": 0,
+        "linked": 0,
+        "new": 0,
+        "skipped": 0,
+        "failed": 0,
+        "chair_name_filter": chair_name_filter,
+        "row_count": len(rows),
+    }
+
+    for r in rows:
+        try:
+            stats = await extract_and_store(UUID(str(r["id"])))
+            totals["processed"] += 1
+            totals["extracted"] += stats.get("extracted", 0)
+            totals["linked"] += stats.get("linked", 0)
+            totals["new"] += stats.get("new", 0)
+            totals["skipped"] += stats.get("skipped", 0)
+        except Exception as e:
+            logger.exception("citation extraction failed for %s: %s", r["case_number"], e)
+            totals["failed"] += 1
+
+    return totals
+
+
+async def list_citations_for_case_law(
+    case_law_id: UUID,
+    linked_only: bool = False,
+) -> list[dict]:
+    """Return all citations *from* the given case_law row (outgoing edges)."""
+    pool = await db.get_pool()
+    where = "pic.source_case_law_id = $1"
+    if linked_only:
+        where += " AND pic.cited_case_law_id IS NOT NULL"
+    sql = f"""
+        SELECT pic.id::text AS id,
+               pic.cited_case_number,
+               pic.cited_case_law_id::text AS cited_case_law_id,
+               pic.match_context,
+               pic.match_pattern,
+               pic.confidence::float AS confidence,
+               pic.created_at,
+               cl.case_number AS target_case_number,
+               cl.case_name AS target_case_name,
+               cl.chair_name AS target_chair_name,
+               cl.district AS target_district
+          FROM precedent_internal_citations pic
+          LEFT JOIN case_law cl ON cl.id = pic.cited_case_law_id
+         WHERE {where}
+         ORDER BY pic.created_at
+    """
+    async with pool.acquire() as conn:
+        rows = await conn.fetch(sql, case_law_id)
+    return [dict(r) for r in rows]
+
+
+async def list_citations_to_case_law(case_law_id: UUID) -> list[dict]:
+    """Return all citations *to* the given case_law row (incoming edges).
+
+    Useful for "which Daphna decisions cite this ruling?" queries.
+    """
+    pool = await db.get_pool()
+    sql = """
+        SELECT pic.id::text AS id,
+               pic.source_case_law_id::text AS source_case_law_id,
+               pic.cited_case_number,
+               pic.match_context,
+               pic.match_pattern,
+               pic.confidence::float AS confidence,
+               pic.created_at,
+               cl.case_number AS source_case_number,
+               cl.case_name AS source_case_name,
+               cl.chair_name AS source_chair_name,
+               cl.district AS source_district
+          FROM precedent_internal_citations pic
+          JOIN case_law cl ON cl.id = pic.source_case_law_id
+         WHERE pic.cited_case_law_id = $1
+         ORDER BY pic.created_at DESC
+    """
+    async with pool.acquire() as conn:
+        rows = await conn.fetch(sql, case_law_id)
+    return [dict(r) for r in rows]
+
+
+async def get_cited_case_law_ids(source_case_law_ids: list[UUID]) -> dict[str, list[str]]:
+    """Bulk-fetch outgoing citation case_law_ids for the given source rows.
+
+    Returns: {source_case_law_id (str): [cited_case_law_id (str), ...]} —
+        only including linked (resolved) citations.
+
+    Used by search.search_internal_decisions(include_cited_by=True) to
+    expand result sets with the precedents the hits themselves cite,
+    without running a separate roundtrip per row.
+    """
+    if not source_case_law_ids:
+        return {}
+    pool = await db.get_pool()
+    async with pool.acquire() as conn:
+        rows = await conn.fetch(
+            """
+            SELECT source_case_law_id::text AS source_id,
+                   cited_case_law_id::text AS cited_id
+              FROM precedent_internal_citations
+             WHERE source_case_law_id = ANY($1::uuid[])
+               AND cited_case_law_id IS NOT NULL
+            """,
+            list(source_case_law_ids),
+        )
+    out: dict[str, list[str]] = {}
+    for r in rows:
+        out.setdefault(r["source_id"], []).append(r["cited_id"])
+    return out
--- a/mcp-server/src/legal_mcp/services/db.py
+++ b/mcp-server/src/legal_mcp/services/db.py
@@ -875,6 +875,36 @@ CREATE UNIQUE INDEX IF NOT EXISTS uq_cases_number_proc
 """


+# ── V16: Internal citations graph (TaskMaster #34) ────────────────
+# Auto-extracted citation graph between Daphna's (and other internal_committee)
+# decisions. When an internal decision cites another committee decision in a
+# patterned way ("ונפנה ל…", "כפי שקבעתי…", "ראה החלטתי…"), the citation
+# extractor records the link here. ``cited_case_law_id`` is populated when the
+# cited case_number resolves to a row in ``case_law``; otherwise it stays NULL
+# and shows up in ``idx_pic_unlinked`` so the chair can decide whether to
+# upload the missing decision.
+SCHEMA_V16_SQL = """
+CREATE TABLE IF NOT EXISTS precedent_internal_citations (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    source_case_law_id UUID NOT NULL REFERENCES case_law(id) ON DELETE CASCADE,
+    cited_case_number TEXT NOT NULL,
+    cited_case_law_id UUID REFERENCES case_law(id) ON DELETE SET NULL,
+    match_context TEXT,
+    match_pattern TEXT,
+    confidence NUMERIC(3,2) DEFAULT 0.85,
+    created_at TIMESTAMPTZ DEFAULT NOW(),
+    UNIQUE (source_case_law_id, cited_case_number)
+);
+CREATE INDEX IF NOT EXISTS idx_pic_source
+    ON precedent_internal_citations(source_case_law_id);
+CREATE INDEX IF NOT EXISTS idx_pic_target
+    ON precedent_internal_citations(cited_case_law_id);
+CREATE INDEX IF NOT EXISTS idx_pic_unlinked
+    ON precedent_internal_citations(cited_case_number)
+    WHERE cited_case_law_id IS NULL;
+"""
+
+
 async def _run_schema_migrations(pool: asyncpg.Pool) -> None:
    async with pool.acquire() as conn:
        await conn.execute(SCHEMA_SQL)
@@ -893,7 +923,8 @@ async def _run_schema_migrations(pool: asyncpg.Pool) -> None:
        await conn.execute(SCHEMA_V13_SQL)
        await conn.execute(SCHEMA_V14_SQL)
        await conn.execute(SCHEMA_V15_SQL)
-    logger.info("Database schema initialized (v1-v15)")
+        await conn.execute(SCHEMA_V16_SQL)
+    logger.info("Database schema initialized (v1-v16)")


 async def init_schema() -> None:
--- a/mcp-server/src/legal_mcp/tools/citations.py
+++ b/mcp-server/src/legal_mcp/tools/citations.py
@@ -0,0 +1,135 @@
+"""MCP tools for the internal-decisions citation graph (TaskMaster #34).
+
+The citation graph captures pointers between Daphna's (and other internal
+committee chairs') decisions: when one ruling cites another, ``precedent_
+internal_citations`` records the edge — resolved against ``case_law`` when
+the cited row exists, kept as a stub when it doesn't.
+
+Three tools:
+
+- ``extract_internal_citations`` — run regex extraction on one row (by id) or
+  on every internal-committee row filtered by chair (e.g. Daphna only).
+  Idempotent: re-running does not duplicate rows (ON CONFLICT DO NOTHING).
+- ``list_internal_citations`` — outgoing edges from a source row. Optional
+  ``linked_only`` filter for rows resolved to existing case_law UUIDs.
+- ``list_incoming_citations`` — incoming edges to a target row ("which
+  Daphna decisions cite this ruling?").
+
+These tools are *manual triggers*. The pipeline runs them after a new
+internal-decision upload, but the chair / researcher can also re-run on
+demand (for example after fixing OCR or after uploading a previously-
+missing decision so that newer rows now link to it).
+"""
+
+from __future__ import annotations
+
+import json
+from uuid import UUID
+
+from legal_mcp.services import citation_extractor
+
+
+def _ok(payload) -> str:
+    return json.dumps(payload, ensure_ascii=False, indent=2, default=str)
+
+
+def _err(msg: str) -> str:
+    return json.dumps({"error": msg}, ensure_ascii=False)
+
+
+async def extract_internal_citations(
+    case_law_id: str = "",
+    chair_name: str = "",
+    limit: int = 0,
+) -> str:
+    """חילוץ ציטוטים פנימיים מהחלטות ועדת ערר ושמירה ב-precedent_internal_citations.
+
+    Args:
+        case_law_id: UUID של החלטה ספציפית. אם ריק וגם chair_name ריק — מריץ
+            על כל ההחלטות internal_committee. אם מסופק, חייב לעבור על שורה אחת
+            בלבד (משתמש בזה אחרי upload).
+        chair_name: שם יו"ר (כגון 'דפנה תמיר'). מסנן את האצווה. ריק = כל היו"רים.
+        limit: עליון על מספר רשומות שיעובדו (0 = ללא הגבלה). שימושי לבדיקה.
+
+    הכלי איידמפוטנטי — ON CONFLICT DO NOTHING על (source_case_law_id, cited_case_number).
+    מחזיר סטטיסטיקה: extracted, linked, new, skipped, failed.
+    """
+    if case_law_id.strip() and chair_name.strip():
+        return _err("יש לספק case_law_id או chair_name, לא שניהם")
+
+    if case_law_id.strip():
+        try:
+            cl_uuid = UUID(case_law_id.strip())
+        except ValueError:
+            return _err("case_law_id לא תקין")
+        try:
+            stats = await citation_extractor.extract_and_store(cl_uuid)
+        except Exception as e:
+            return _err(str(e))
+        return _ok(stats)
+
+    try:
+        stats = await citation_extractor.extract_all_internal_committee(
+            chair_name_filter=chair_name.strip(),
+            limit=int(limit) if limit else 0,
+        )
+    except Exception as e:
+        return _err(str(e))
+    return _ok(stats)
+
+
+async def list_internal_citations(
+    case_law_id: str = "",
+    linked_only: bool = False,
+    limit: int = 50,
+) -> str:
+    """רשימת ציטוטים יוצאים מהחלטה (מה ההחלטה הזו מצטטת).
+
+    Args:
+        case_law_id: UUID של ה-case_law (חובה).
+        linked_only: True = רק ציטוטים שקושרו ל-case_law קיים בקורפוס.
+        limit: עליון על מספר תוצאות (default 50).
+
+    Returns: JSON עם list של ציטוטים, כולל target_case_number/name/chair
+        כשהם linked. אם linked_only=False, ציטוטים בלתי קושרים יחזרו עם
+        cited_case_law_id=null וניתן להעלות אותם דרך internal_decision_upload.
+    """
+    if not case_law_id.strip():
+        return _err("case_law_id חובה")
+    try:
+        cl_uuid = UUID(case_law_id.strip())
+    except ValueError:
+        return _err("case_law_id לא תקין")
+    try:
+        rows = await citation_extractor.list_citations_for_case_law(
+            cl_uuid, linked_only=bool(linked_only),
+        )
+    except Exception as e:
+        return _err(str(e))
+    return _ok({"items": rows[: max(1, int(limit))], "count": len(rows)})
+
+
+async def list_incoming_citations(
+    case_law_id: str = "",
+    limit: int = 50,
+) -> str:
+    """רשימת ציטוטים נכנסים אל החלטה (אילו החלטות מצטטות אותה).
+
+    שימוש: רוצים לדעת אילו החלטות של דפנה הסתמכו על פסק דין מסוים?
+    מעבירים את ה-case_law_id של פסק הדין הזה.
+
+    Args:
+        case_law_id: UUID של ה-target case_law (חובה).
+        limit: עליון על מספר תוצאות.
+    """
+    if not case_law_id.strip():
+        return _err("case_law_id חובה")
+    try:
+        cl_uuid = UUID(case_law_id.strip())
+    except ValueError:
+        return _err("case_law_id לא תקין")
+    try:
+        rows = await citation_extractor.list_citations_to_case_law(cl_uuid)
+    except Exception as e:
+        return _err(str(e))
+    return _ok({"items": rows[: max(1, int(limit))], "count": len(rows)})
--- a/mcp-server/src/legal_mcp/tools/search.py
+++ b/mcp-server/src/legal_mcp/tools/search.py
@@ -189,6 +189,7 @@ async def search_internal_decisions(
    chair_name: str = "",
    limit: int = 10,
    include_halachot: bool = True,
+    include_cited_by: bool = False,
 ) -> str:
    """חיפוש בהחלטות ועדות ערר לתכנון ובנייה (כל המחוזות).

@@ -200,42 +201,135 @@ async def search_internal_decisions(
        chair_name: שם יו"ר הוועדה לסינון. ריק = כל היו"רים
        limit: מספר תוצאות מקסימלי
        include_halachot: האם לכלול הלכות שחולצו
+        include_cited_by: True = אחרי החיפוש הראשי, הוסף החלטות שה-hits
+            הראשיים מצטטים (מתוך precedent_internal_citations). default False
+            כדי לא לשבור caller-ים קיימים. match_type='cited_by' מציין שזו
+            תוצאה משנית.
    """
    from legal_mcp.services import internal_decisions as int_svc

+    # Bump the limit a bit when we're expanding via citations — the
+    # citation step is cheap and a few extra primary hits make the
+    # expansion more useful.
+    primary_limit = limit if not include_cited_by else max(limit, limit * 2)
+
    results = await int_svc.search_internal(
        query,
        practice_area=practice_area,
        appeal_subtype=appeal_subtype,
        district=district,
        chair_name=chair_name,
-        limit=limit,
+        limit=primary_limit,
        include_halachot=include_halachot,
    )

    if not results:
        return "לא נמצאו החלטות ועדת ערר רלוונטיות."

+    # Cap primary results back to ``limit`` (we over-fetched only to seed
+    # the citation expansion below — the user asked for ``limit`` items).
+    primary = results[:limit]
+
    formatted = []
-    for r in results:
-        entry = {
-            "score": round(float(r["score"]), 4),
-            "type": r.get("type", "passage"),
-            "case_number": r.get("case_number"),
-            "case_name": r.get("case_name"),
-            "court": r.get("court"),
-            "district": r.get("district"),
-            "chair_name": r.get("chair_name"),
-            "decision_date": r.get("decision_date"),
-        }
-        if r.get("type") == "halacha":
-            entry["rule"] = r.get("rule_statement")
-            entry["quote"] = r.get("supporting_quote")
-            entry["rule_type"] = r.get("rule_type")
-        else:
-            entry["content"] = r.get("content", "")
-            entry["section"] = r.get("section_type")
-            entry["page"] = r.get("page_number")
-        formatted.append(entry)
+    seen_case_law_ids: set[str] = set()
+    for r in primary:
+        clid = str(r.get("case_law_id") or "")
+        if clid:
+            seen_case_law_ids.add(clid)
+        formatted.append(_format_internal_row(r, match_type="primary"))
+
+    if include_cited_by and seen_case_law_ids:
+        from uuid import UUID
+        from legal_mcp.services import citation_extractor
+
+        try:
+            source_uuids = [UUID(s) for s in seen_case_law_ids]
+            cited_map = await citation_extractor.get_cited_case_law_ids(source_uuids)
+        except Exception as e:
+            logger.warning("include_cited_by lookup failed: %s", e)
+            cited_map = {}
+
+        # Flatten + dedup the cited case_law_ids that aren't already in
+        # the primary set.
+        cited_ids: set[str] = set()
+        for ids in cited_map.values():
+            for cid in ids:
+                if cid and cid not in seen_case_law_ids:
+                    cited_ids.add(cid)
+
+        if cited_ids:
+            cited_rows = await _fetch_case_law_summaries(list(cited_ids))
+            for row in cited_rows:
+                formatted.append(_format_internal_row(row, match_type="cited_by"))

    return json.dumps(formatted, ensure_ascii=False, indent=2)
+
+
+def _format_internal_row(r: dict, *, match_type: str = "primary") -> dict:
+    """Shape an internal-decision hit (or a cited_by stub) for the MCP response."""
+    entry: dict = {
+        "score": round(float(r.get("score", 0.0)), 4),
+        "type": r.get("type", "passage"),
+        "case_number": r.get("case_number"),
+        "case_name": r.get("case_name"),
+        "court": r.get("court"),
+        "district": r.get("district"),
+        "chair_name": r.get("chair_name"),
+        "decision_date": r.get("decision_date"),
+        "match_type": match_type,
+    }
+    if r.get("type") == "halacha":
+        entry["rule"] = r.get("rule_statement")
+        entry["quote"] = r.get("supporting_quote")
+        entry["rule_type"] = r.get("rule_type")
+    else:
+        entry["content"] = r.get("content", "")
+        entry["section"] = r.get("section_type")
+        entry["page"] = r.get("page_number")
+    return entry
+
+
+async def _fetch_case_law_summaries(case_law_ids: list[str]) -> list[dict]:
+    """Pull lightweight metadata for a set of case_law UUIDs (cited-by stubs).
+
+    Doesn't pull chunks/halachot — the goal is to surface the existence of
+    the related precedent, not to repeat search. The caller can drill in
+    via search_internal_decisions with chair_name+case_number if they want
+    full passages.
+    """
+    from uuid import UUID
+    pool = await db.get_pool()
+    uuid_list = []
+    for s in case_law_ids:
+        try:
+            uuid_list.append(UUID(s))
+        except ValueError:
+            continue
+    if not uuid_list:
+        return []
+    async with pool.acquire() as conn:
+        rows = await conn.fetch(
+            """
+            SELECT id::text AS case_law_id,
+                   case_number,
+                   case_name,
+                   court,
+                   district,
+                   chair_name,
+                   date AS decision_date,
+                   headnote AS content
+              FROM case_law
+             WHERE id = ANY($1::uuid[])
+            """,
+            uuid_list,
+        )
+    out: list[dict] = []
+    for r in rows:
+        d = dict(r)
+        if d.get("decision_date") is not None:
+            d["decision_date"] = d["decision_date"].isoformat()
+        # Stub rows show up with score 0 — they're not ranked, they're context.
+        d["score"] = 0.0
+        d["type"] = "passage"
+        out.append(d)
+    return out