legal-ai/mcp-server/src/legal_mcp/services/db.py

"""Database service - asyncpg connection pool and queries."""

from __future__ import annotations

import asyncio
import hashlib
import json
import logging
import re
from datetime import date
from uuid import UUID, uuid4

import asyncpg
from pgvector.asyncpg import register_vector

from legal_mcp import config
from legal_mcp.services import halacha_quality

logger = logging.getLogger(__name__)

_pool: asyncpg.Pool | None = None
_schema_ready: bool = False
_init_lock: asyncio.Lock = asyncio.Lock()


async def get_pool() -> asyncpg.Pool:
    """Return the connection pool, creating it (and running schema init) lazily.

    The MCP server's `lifespan` no longer blocks on schema init — it's done
    here on first DB access. This keeps the `initialize`/`tools/list` MCP
    handshake immediate so Claude Code never sees a stale "No such tool".
    """
    global _pool, _schema_ready
    if _pool is not None and _schema_ready:
        return _pool

    async with _init_lock:
        if _pool is None:
            # First, ensure pgvector extension exists (before registering type codec)
            conn = await asyncpg.connect(config.POSTGRES_URL)
            try:
                await conn.execute('CREATE EXTENSION IF NOT EXISTS vector')
                await conn.execute('CREATE EXTENSION IF NOT EXISTS "uuid-ossp"')
            finally:
                await conn.close()

            _pool = await asyncpg.create_pool(
                config.POSTGRES_URL,
                min_size=2,
                max_size=10,
                init=_init_connection,
            )

        if not _schema_ready:
            await _run_schema_migrations(_pool)
            _schema_ready = True

    return _pool


async def _init_connection(conn: asyncpg.Connection) -> None:
    await register_vector(conn)


async def close_pool() -> None:
    global _pool
    if _pool:
        await _pool.close()
        _pool = None


# ── Schema ──────────────────────────────────────────────────────────

SCHEMA_SQL = """

CREATE TABLE IF NOT EXISTS cases (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    case_number TEXT UNIQUE NOT NULL,
    title TEXT NOT NULL,
    appellants JSONB DEFAULT '[]',
    respondents JSONB DEFAULT '[]',
    subject TEXT DEFAULT '',
    property_address TEXT DEFAULT '',
    permit_number TEXT DEFAULT '',
    committee_type TEXT DEFAULT 'ועדה מקומית',
    status TEXT DEFAULT 'new',
    hearing_date DATE,
    decision_date DATE,
    tags JSONB DEFAULT '[]',
    notes TEXT DEFAULT '',
    created_at TIMESTAMPTZ DEFAULT now(),
    updated_at TIMESTAMPTZ DEFAULT now()
);

CREATE TABLE IF NOT EXISTS documents (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    case_id UUID REFERENCES cases(id) ON DELETE CASCADE,
    doc_type TEXT NOT NULL,
    title TEXT NOT NULL,
    file_path TEXT NOT NULL,
    extracted_text TEXT DEFAULT '',
    extraction_status TEXT DEFAULT 'pending',
    page_count INTEGER,
    metadata JSONB DEFAULT '{}',
    created_at TIMESTAMPTZ DEFAULT now()
);

-- INV-TOOL3 / GAP-52: SHA-256 of the uploaded file bytes, for idempotent upload
-- (re-uploading the same file to a case returns the existing document). Empty
-- default = legacy rows with unknown hash; never matched as a duplicate.
ALTER TABLE documents ADD COLUMN IF NOT EXISTS content_hash text NOT NULL DEFAULT '';

CREATE TABLE IF NOT EXISTS document_chunks (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    document_id UUID REFERENCES documents(id) ON DELETE CASCADE,
    case_id UUID REFERENCES cases(id) ON DELETE CASCADE,
    chunk_index INTEGER NOT NULL,
    content TEXT NOT NULL,
    section_type TEXT DEFAULT 'other',
    embedding vector(1024),
    page_number INTEGER,
    created_at TIMESTAMPTZ DEFAULT now()
);

CREATE TABLE IF NOT EXISTS style_corpus (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    document_id UUID REFERENCES documents(id) ON DELETE SET NULL,
    decision_number TEXT,
    decision_date DATE,
    subject_categories JSONB DEFAULT '[]',
    full_text TEXT NOT NULL,
    summary TEXT DEFAULT '',
    outcome TEXT DEFAULT '',
    key_principles JSONB DEFAULT '[]',
    practice_area TEXT DEFAULT 'appeals_committee',
    appeal_subtype TEXT DEFAULT '',
    created_at TIMESTAMPTZ DEFAULT now()
);

CREATE TABLE IF NOT EXISTS style_patterns (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    pattern_type TEXT NOT NULL,
    pattern_text TEXT NOT NULL,
    frequency INTEGER DEFAULT 1,
    context TEXT DEFAULT '',
    examples JSONB DEFAULT '[]',
    appeal_subtype TEXT DEFAULT '',
    created_at TIMESTAMPTZ DEFAULT now()
);

CREATE INDEX IF NOT EXISTS idx_chunks_embedding
    ON document_chunks USING ivfflat (embedding vector_cosine_ops)
    WITH (lists = 100);

CREATE INDEX IF NOT EXISTS idx_chunks_case ON document_chunks(case_id);
CREATE INDEX IF NOT EXISTS idx_chunks_doc ON document_chunks(document_id);
CREATE INDEX IF NOT EXISTS idx_docs_case ON documents(case_id);
CREATE INDEX IF NOT EXISTS idx_cases_status ON cases(status);
CREATE INDEX IF NOT EXISTS idx_cases_number ON cases(case_number);
"""


MIGRATIONS_SQL = """
ALTER TABLE cases ADD COLUMN IF NOT EXISTS expected_outcome TEXT DEFAULT '';

CREATE TABLE IF NOT EXISTS audit_log (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    action TEXT NOT NULL,
    case_id UUID REFERENCES cases(id) ON DELETE SET NULL,
    document_id UUID REFERENCES documents(id) ON DELETE SET NULL,
    details JSONB DEFAULT '{}',
    actor TEXT DEFAULT 'system',
    created_at TIMESTAMPTZ DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_audit_case ON audit_log(case_id);
CREATE INDEX IF NOT EXISTS idx_audit_action ON audit_log(action);
CREATE INDEX IF NOT EXISTS idx_audit_created ON audit_log(created_at DESC);
"""

# ── Phase 3: Workflow expansion ────────────────────────────────────

SCHEMA_V3_SQL = """

-- הרחבת decisions עם שדות חדשים
ALTER TABLE decisions ADD COLUMN IF NOT EXISTS direction_doc JSONB DEFAULT NULL;
ALTER TABLE decisions ADD COLUMN IF NOT EXISTS outcome_reasoning TEXT DEFAULT '';

-- הרחבת cases עם appeal_type (אם לא קיים)
ALTER TABLE cases ADD COLUMN IF NOT EXISTS appeal_type TEXT DEFAULT '';
ALTER TABLE cases ADD COLUMN IF NOT EXISTS practice_area TEXT DEFAULT 'appeals_committee';
ALTER TABLE cases ADD COLUMN IF NOT EXISTS appeal_subtype TEXT DEFAULT '';
-- active_draft_path = path to the DOCX that is the current source of truth
-- for this case's decision text. Set to the latest טיוטה-v*.docx after export,
-- or the latest עריכה-v*.docx after user upload. Used by revise_draft to know
-- what file to base Track Changes revisions on.
ALTER TABLE cases ADD COLUMN IF NOT EXISTS active_draft_path TEXT;

-- הרחבת style_corpus עם practice_area / appeal_subtype
ALTER TABLE style_corpus ADD COLUMN IF NOT EXISTS practice_area TEXT DEFAULT 'appeals_committee';
ALTER TABLE style_corpus ADD COLUMN IF NOT EXISTS appeal_subtype TEXT DEFAULT '';

-- הרחבת style_patterns עם appeal_subtype לניתוח סגנון נפרד לכל סוג ערר
ALTER TABLE style_patterns ADD COLUMN IF NOT EXISTS appeal_subtype TEXT DEFAULT '';

-- decision_lessons: per-decision learnings the chair / curator / style_analyzer
-- attaches to a corpus row. The generic legal-decision-lessons.md file stays
-- as the source of truth for cross-corpus patterns; this table stores the
-- granular "what we learned from THIS decision" notes that drive the writer's
-- future drafts and let the curator look up prior observations on the same row.
CREATE TABLE IF NOT EXISTS decision_lessons (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    style_corpus_id UUID NOT NULL REFERENCES style_corpus(id) ON DELETE CASCADE,
    lesson_text TEXT NOT NULL,
    category TEXT DEFAULT 'general',           -- style / structure / lexicon / tabular / general
    source TEXT DEFAULT 'manual',              -- manual / curator / chair / style_analyzer
    applied_to_skill BOOLEAN DEFAULT false,    -- has this been promoted into SKILL.md?
    created_by TEXT DEFAULT 'chaim',
    created_at TIMESTAMPTZ DEFAULT now(),
    updated_at TIMESTAMPTZ DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_decision_lessons_corpus ON decision_lessons(style_corpus_id);
CREATE INDEX IF NOT EXISTS idx_decision_lessons_applied ON decision_lessons(applied_to_skill);

-- chat_conversations / chat_messages: persistent history for the
-- "שיחה עם הסוכן" tab on /training. Each conversation can optionally be
-- scoped to a single style_corpus row (when the chair starts a chat
-- "about decision X"). claude_session_id is the value the local claude
-- CLI returns in stream-json — we pass it back via `--resume` on the
-- next message so the model continues the same conversation without
-- re-loading the system prompt every time.
CREATE TABLE IF NOT EXISTS chat_conversations (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    title TEXT NOT NULL DEFAULT 'שיחה חדשה',
    style_corpus_id UUID REFERENCES style_corpus(id) ON DELETE SET NULL,
    claude_session_id TEXT,
    system_prompt_version TEXT DEFAULT 'v1',
    created_at TIMESTAMPTZ DEFAULT now(),
    last_message_at TIMESTAMPTZ DEFAULT now()
);

CREATE TABLE IF NOT EXISTS chat_messages (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    conversation_id UUID NOT NULL REFERENCES chat_conversations(id) ON DELETE CASCADE,
    role TEXT NOT NULL,                -- 'user' | 'assistant'
    content TEXT NOT NULL,
    raw_events JSONB DEFAULT '[]',     -- stream-json events for the assistant turn (optional, for debug)
    created_at TIMESTAMPTZ DEFAULT now()
);

CREATE INDEX IF NOT EXISTS idx_chat_messages_conv ON chat_messages(conversation_id, created_at);
CREATE INDEX IF NOT EXISTS idx_chat_conv_corpus ON chat_conversations(style_corpus_id);
CREATE INDEX IF NOT EXISTS idx_chat_conv_last ON chat_conversations(last_message_at DESC);

-- טבלת qa_results
CREATE TABLE IF NOT EXISTS qa_results (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    decision_id UUID REFERENCES decisions(id) ON DELETE CASCADE,
    case_id UUID REFERENCES cases(id) ON DELETE CASCADE,
    check_name TEXT NOT NULL,
    passed BOOLEAN NOT NULL,
    severity TEXT DEFAULT 'warning',
    errors JSONB DEFAULT '[]',
    details TEXT DEFAULT '',
    created_at TIMESTAMPTZ DEFAULT now()
);

CREATE INDEX IF NOT EXISTS idx_qa_results_decision ON qa_results(decision_id);
CREATE INDEX IF NOT EXISTS idx_qa_results_case ON qa_results(case_id);

-- טבלת decision_definitions (אם לא קיימת)
CREATE TABLE IF NOT EXISTS decision_definitions (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    decision_id UUID REFERENCES decisions(id) ON DELETE CASCADE,
    term TEXT NOT NULL,
    definition TEXT NOT NULL,
    block_id TEXT DEFAULT 'block-he',
    created_at TIMESTAMPTZ DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_definitions_decision ON decision_definitions(decision_id);

-- טבלת appeal_type_rules (אם לא קיימת)
CREATE TABLE IF NOT EXISTS appeal_type_rules (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    appeal_type TEXT NOT NULL,
    rule_category TEXT NOT NULL,
    rule_key TEXT NOT NULL,
    rule_value JSONB NOT NULL,
    description TEXT DEFAULT '',
    created_at TIMESTAMPTZ DEFAULT now(),
    UNIQUE(appeal_type, rule_category, rule_key)
);

-- image_placeholders על decision_blocks
ALTER TABLE decision_blocks ADD COLUMN IF NOT EXISTS image_placeholders JSONB DEFAULT '[]';
"""

# ── Phase 2: Decision + Knowledge + RAG layers ────────────────────

SCHEMA_V2_SQL = """

-- ═══════════════════════════════════════════════════════════════════
-- Layer 2: Decision
-- ═══════════════════════════════════════════════════════════════════

-- decisions: מטאדטה של החלטה (גרסה אחת = רשומה אחת)
CREATE TABLE IF NOT EXISTS decisions (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    case_id UUID REFERENCES cases(id) ON DELETE CASCADE,
    version INTEGER DEFAULT 1,
    status TEXT DEFAULT 'draft',               -- draft/review/final/published
    outcome TEXT DEFAULT '',                   -- rejected/accepted/partial
    outcome_summary TEXT DEFAULT '',           -- תמצית תוצאה (שורה אחת)
    total_paragraphs INTEGER DEFAULT 0,
    total_words INTEGER DEFAULT 0,
    decision_date DATE,
    author TEXT DEFAULT 'דפנה תמיר',
    panel_members JSONB DEFAULT '[]',
    created_at TIMESTAMPTZ DEFAULT now(),
    updated_at TIMESTAMPTZ DEFAULT now(),
    UNIQUE(case_id, version)
);

-- decision_blocks: 12 בלוקים לפי block-schema.md
CREATE TABLE IF NOT EXISTS decision_blocks (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    decision_id UUID REFERENCES decisions(id) ON DELETE CASCADE,
    block_id TEXT NOT NULL,                    -- block-alef, block-bet, ... block-yod-bet
    block_index INTEGER NOT NULL,              -- 1-12
    title TEXT DEFAULT '',                     -- כותרת הבלוק (ריק לבלוקים ללא כותרת)
    content TEXT DEFAULT '',                   -- תוכן מלא (markdown)
    word_count INTEGER DEFAULT 0,
    weight_percent NUMERIC(5,2) DEFAULT 0,     -- משקל בפועל (%)
    generation_type TEXT DEFAULT '',            -- template-fill/reproduction/paraphrase/...
    model_used TEXT DEFAULT '',                 -- sonnet/opus/script
    temperature NUMERIC(3,2) DEFAULT 0,
    status TEXT DEFAULT 'empty',               -- empty/draft/review/final
    notes TEXT DEFAULT '',
    created_at TIMESTAMPTZ DEFAULT now(),
    updated_at TIMESTAMPTZ DEFAULT now(),
    UNIQUE(decision_id, block_id)
);

-- decision_paragraphs: סעיפים בודדים עם מעקב ציטוטים
CREATE TABLE IF NOT EXISTS decision_paragraphs (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    block_id UUID REFERENCES decision_blocks(id) ON DELETE CASCADE,
    paragraph_number INTEGER NOT NULL,         -- מספור רציף בתוך ההחלטה
    content TEXT NOT NULL,
    word_count INTEGER DEFAULT 0,
    citations JSONB DEFAULT '[]',              -- [{case_law_id, text, type}]
    cross_references JSONB DEFAULT '[]',       -- הפניות לסעיפים אחרים ["סעיף 5 לעיל"]
    created_at TIMESTAMPTZ DEFAULT now()
);

-- claims: טענות צדדים (בלוק ז)
CREATE TABLE IF NOT EXISTS claims (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    case_id UUID REFERENCES cases(id) ON DELETE CASCADE,
    party_role TEXT NOT NULL,                  -- appellant/respondent/permit_applicant/committee
    party_name TEXT DEFAULT '',
    claim_text TEXT NOT NULL,
    claim_index INTEGER DEFAULT 0,             -- סדר הופעה
    source_document TEXT DEFAULT '',            -- מאיזה מסמך חולצה הטענה
    addressed_in_paragraph INTEGER,            -- באיזה סעיף בדיון נענתה
    created_at TIMESTAMPTZ DEFAULT now()
);

-- ═══════════════════════════════════════════════════════════════════
-- Layer 3: Legal Knowledge
-- ═══════════════════════════════════════════════════════════════════

-- case_law: פסיקה (תקדימים)
CREATE TABLE IF NOT EXISTS case_law (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    case_number TEXT UNIQUE NOT NULL,          -- עע"מ 3975/22 או ערר 1011-03-25
    case_name TEXT NOT NULL,                   -- שם קצר: "ב. קרן-נכסים"
    court TEXT DEFAULT '',                     -- בג"ץ / עליון / מנהלי / ועדת ערר
    date DATE,
    subject_tags JSONB DEFAULT '[]',           -- ["proprietary_claims", "parking"]
    summary TEXT DEFAULT '',                   -- תמצית 2-3 משפטים
    key_quote TEXT DEFAULT '',                 -- ציטוט מרכזי
    full_text TEXT DEFAULT '',                 -- טקסט מלא אם זמין
    source_url TEXT DEFAULT '',
    created_at TIMESTAMPTZ DEFAULT now()
);

-- case_law_citations: קשרים בין פסיקה להחלטות שלנו
CREATE TABLE IF NOT EXISTS case_law_citations (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    case_law_id UUID REFERENCES case_law(id) ON DELETE CASCADE,
    decision_id UUID REFERENCES decisions(id) ON DELETE CASCADE,
    paragraph_id UUID REFERENCES decision_paragraphs(id) ON DELETE SET NULL,
    citation_type TEXT DEFAULT 'support',      -- support/distinguish/overrule/obiter
    context_text TEXT DEFAULT '',              -- ההקשר שבו צוטט
    created_at TIMESTAMPTZ DEFAULT now()
);

-- statutory_provisions: חקיקה נפוצה
CREATE TABLE IF NOT EXISTS statutory_provisions (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    statute_name TEXT NOT NULL,                -- "חוק התכנון והבנייה"
    section_number TEXT NOT NULL,              -- "152(א)(2)"
    section_title TEXT DEFAULT '',             -- "זכות ערר"
    full_text TEXT DEFAULT '',                 -- נוסח הסעיף
    common_usage TEXT DEFAULT '',              -- מתי משתמשים
    subject_tags JSONB DEFAULT '[]',
    created_at TIMESTAMPTZ DEFAULT now(),
    UNIQUE(statute_name, section_number)
);

-- transition_phrases: ביטויי מעבר של דפנה
CREATE TABLE IF NOT EXISTS transition_phrases (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    phrase TEXT UNIQUE NOT NULL,               -- "ועל מנת לא לצאת בחסר"
    usage_context TEXT DEFAULT '',             -- מתי להשתמש
    block_types JSONB DEFAULT '[]',            -- באילו בלוקים: ["block-yod"]
    frequency INTEGER DEFAULT 1,              -- כמה פעמים ראינו
    source_decision TEXT DEFAULT '',           -- מאיזו החלטה
    created_at TIMESTAMPTZ DEFAULT now()
);

-- lessons_learned: לקחים מהשוואת טיוטות לגרסאות סופיות
CREATE TABLE IF NOT EXISTS lessons_learned (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    lesson_title TEXT NOT NULL,                -- "Discussion = continuous essay, no sub-headers"
    lesson_text TEXT NOT NULL,                 -- תיאור מלא
    category TEXT DEFAULT '',                  -- structure/style/content/process
    applies_to JSONB DEFAULT '[]',             -- ["block-yod", "all"]
    source_case TEXT DEFAULT '',               -- "הכט 1180-1181"
    severity TEXT DEFAULT 'important',         -- critical/important/nice-to-have
    created_at TIMESTAMPTZ DEFAULT now()
);

-- ═══════════════════════════════════════════════════════════════════
-- Layer 4: Extended RAG
-- ═══════════════════════════════════════════════════════════════════

-- paragraph_embeddings: embeddings של סעיפים בהחלטות
CREATE TABLE IF NOT EXISTS paragraph_embeddings (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    paragraph_id UUID REFERENCES decision_paragraphs(id) ON DELETE CASCADE,
    embedding vector(1024),
    created_at TIMESTAMPTZ DEFAULT now()
);

-- case_law_embeddings: embeddings של פסיקה
CREATE TABLE IF NOT EXISTS case_law_embeddings (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    case_law_id UUID REFERENCES case_law(id) ON DELETE CASCADE,
    chunk_text TEXT NOT NULL,
    embedding vector(1024),
    created_at TIMESTAMPTZ DEFAULT now()
);

-- ═══════════════════════════════════════════════════════════════════
-- Chair Feedback (הערות דפנה על טיוטות)
-- ═══════════════════════════════════════════════════════════════════

CREATE TABLE IF NOT EXISTS chair_feedback (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    case_id UUID REFERENCES cases(id) ON DELETE SET NULL,
    block_id TEXT DEFAULT '',                    -- block-yod, block-vav, etc.
    feedback_text TEXT NOT NULL,                 -- ההערה של דפנה
    category TEXT DEFAULT 'other',              -- missing_content/wrong_tone/wrong_structure/factual_error/style/other
    lesson_extracted TEXT DEFAULT '',            -- הלקח שהופק
    applied_to TEXT[] DEFAULT '{}',             -- לאילו קבצים/כללים הלקח יושם
    resolved BOOLEAN DEFAULT FALSE,             -- האם הלקח יושם
    created_at TIMESTAMPTZ DEFAULT now()
);

CREATE TABLE IF NOT EXISTS tag_company_mappings (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    tag TEXT NOT NULL,                            -- appeal_subtype value (e.g. building_permit)
    tag_label TEXT NOT NULL DEFAULT '',            -- Hebrew display label
    company_id TEXT NOT NULL,                     -- Paperclip company UUID
    company_name TEXT NOT NULL DEFAULT '',         -- cached company name for display
    created_at TIMESTAMPTZ DEFAULT now(),
    UNIQUE(tag, company_id)
);

-- ═══════════════════════════════════════════════════════════════════
-- Indexes
-- ═══════════════════════════════════════════════════════════════════

CREATE INDEX IF NOT EXISTS idx_decisions_case ON decisions(case_id);
CREATE INDEX IF NOT EXISTS idx_decisions_status ON decisions(status);
CREATE INDEX IF NOT EXISTS idx_decision_blocks_decision ON decision_blocks(decision_id);
CREATE INDEX IF NOT EXISTS idx_decision_blocks_block_id ON decision_blocks(block_id);
CREATE INDEX IF NOT EXISTS idx_decision_paragraphs_block ON decision_paragraphs(block_id);
CREATE INDEX IF NOT EXISTS idx_claims_case ON claims(case_id);
CREATE INDEX IF NOT EXISTS idx_claims_role ON claims(party_role);
CREATE INDEX IF NOT EXISTS idx_case_law_subject ON case_law USING gin(subject_tags);
CREATE INDEX IF NOT EXISTS idx_case_law_citations_decision ON case_law_citations(decision_id);
CREATE INDEX IF NOT EXISTS idx_statutory_provisions_statute ON statutory_provisions(statute_name);
CREATE INDEX IF NOT EXISTS idx_transition_phrases_block ON transition_phrases USING gin(block_types);
CREATE INDEX IF NOT EXISTS idx_lessons_category ON lessons_learned(category);
CREATE INDEX IF NOT EXISTS idx_paragraph_embeddings_vec
    ON paragraph_embeddings USING ivfflat (embedding vector_cosine_ops) WITH (lists = 50);
CREATE INDEX IF NOT EXISTS idx_case_law_embeddings_vec
    ON case_law_embeddings USING ivfflat (embedding vector_cosine_ops) WITH (lists = 50);
"""


# ── Phase 4: Methodology alignment ──────────────────────────────

SCHEMA_V4_SQL = """

-- ═══════════════════════════════════════════════════════════════════
-- V4: Methodology alignment (decision-methodology.md)
-- ═══════════════════════════════════════════════════════════════════

-- claims: טיפול בטענות (bundle/skip) + סוג טענה
ALTER TABLE claims ADD COLUMN IF NOT EXISTS claim_type TEXT DEFAULT 'claim';
    -- claim / response / reply
ALTER TABLE claims ADD COLUMN IF NOT EXISTS claim_handling TEXT DEFAULT 'address';
    -- address (דיון מלא) / bundle (קיבוץ) / skip (דילוג)
ALTER TABLE claims ADD COLUMN IF NOT EXISTS bundle_group TEXT DEFAULT '';
    -- שם הקבוצה לקיבוץ (למשל "פגמים פרוצדורליים")
ALTER TABLE claims ADD COLUMN IF NOT EXISTS handling_reason TEXT DEFAULT '';
    -- נימוק לדילוג/קיבוץ (למשל "נבחנה ולא מצאנו ממש")

-- cases: תקן ביקורת + קטגוריות נושא
ALTER TABLE cases ADD COLUMN IF NOT EXISTS standard_of_review TEXT DEFAULT '';
    -- "שיקול דעת תכנוני עצמאי" / "בחינת שומה מכרעת" / ...
ALTER TABLE cases ADD COLUMN IF NOT EXISTS subject_categories JSONB DEFAULT '[]';
    -- ["חניה", "קווי בניין", "גובה", "שימוש חורג", ...]

-- case_law: רמת תקדים + מעמד
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS precedent_level TEXT DEFAULT '';
    -- עליון / מנהלי / ועדת ערר ארצית / ועדת ערר מחוזית
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS is_binding BOOLEAN DEFAULT TRUE;
    -- הלכה מחייבת (true) / אמרת אגב (false)
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS creac_role TEXT DEFAULT '';
    -- rule (הנחה עליונה) / explanation (הרחבה) / analogy (אנלוגיה)

-- decisions: סדר סוגיות + תקן ביקורת
ALTER TABLE decisions ADD COLUMN IF NOT EXISTS issue_order JSONB DEFAULT '[]';
    -- סדר הסוגיות שנקבע ע"י המנצח: [{"title": "...", "type": "threshold/dispositive/secondary"}]
ALTER TABLE decisions ADD COLUMN IF NOT EXISTS claim_handling JSONB DEFAULT '{}';
    -- {"overrides": [{"claim_id": "...", "handling": "bundle", "group": "..."}]}

-- indexes
CREATE INDEX IF NOT EXISTS idx_claims_handling ON claims(claim_handling);
CREATE INDEX IF NOT EXISTS idx_claims_type ON claims(claim_type);
CREATE INDEX IF NOT EXISTS idx_case_law_level ON case_law(precedent_level);
"""


# ── Phase 5: Interim draft (appraiser facts + post-hearing flag) ───

SCHEMA_V5_SQL = """

-- appraiser_facts: תכניות והיתרים שצוינו ע"י כל שמאי בנפרד.
-- בשונה מ-claims (שהוא טענה משפטית), כאן מאוחסנת עובדה עניינית מתוך השומה.
-- שימוש ראשי: זיהוי סתירות בין שמאים על איזו תכנית או היתר חל בנכס.
CREATE TABLE IF NOT EXISTS appraiser_facts (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    case_id UUID NOT NULL REFERENCES cases(id) ON DELETE CASCADE,
    document_id UUID NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
    appraiser_name TEXT NOT NULL,
    fact_type TEXT NOT NULL CHECK (fact_type IN ('plan', 'permit')),
    identifier TEXT NOT NULL,
    details JSONB NOT NULL DEFAULT '{}',
    page_number INTEGER,
    created_at TIMESTAMPTZ DEFAULT now()
);

CREATE INDEX IF NOT EXISTS idx_appraiser_facts_case ON appraiser_facts(case_id, fact_type);
CREATE INDEX IF NOT EXISTS idx_appraiser_facts_identifier ON appraiser_facts(case_id, identifier);

-- V5.1: appraiser_side — which party this appraiser represents.
-- Values: 'committee' (הוועדה), 'appellant' (העורר), 'deciding' (מכריע).
-- Required by extract_appraiser_facts; the chair tags it via the UI before extraction.
-- Set via documents.metadata.appraiser_side at upload/edit time, then propagated here
-- so that conflict rendering in block-tet can label each entry with its side.
ALTER TABLE appraiser_facts ADD COLUMN IF NOT EXISTS appraiser_side TEXT DEFAULT '';
CREATE INDEX IF NOT EXISTS idx_appraiser_facts_side ON appraiser_facts(case_id, appraiser_side);

-- documents.metadata.is_post_hearing: flag for materials submitted after the hearing
-- (השלמות טיעון, הצעות פשרה). Used by block-chet to include them in the proceedings narrative.
-- documents.metadata.appraiser_side: which side the appraiser represents (see above).
-- No schema change needed — uses existing JSONB metadata column.
"""

# ── V6: Case archiving ────────────────────────────────────────────

SCHEMA_V6_SQL = """
-- archived_at: timestamp when the case was moved to the archive screen.
-- NULL = active (default). Set via POST /api/cases/{case_number}/archive.
-- Cleared via POST /api/cases/{case_number}/restore.
-- The /api/cases endpoint filters out archived cases by default;
-- pass ?include_archived=true (or use /api/cases/archived) to see them.
ALTER TABLE cases ADD COLUMN IF NOT EXISTS archived_at TIMESTAMPTZ;
CREATE INDEX IF NOT EXISTS idx_cases_archived ON cases(archived_at) WHERE archived_at IS NOT NULL;
"""


# ── V7: External Precedent Library + halacha extraction ──────────
# Chair-uploaded external court rulings and other appeals committee decisions
# become an authoritative law corpus. Distinct from style_corpus (Daphna's
# style) and case_precedents (chair-attached quotes scoped to a single case).

SCHEMA_V7_SQL = """
-- case_law extensions: distinguish chair-uploaded full rulings from
-- auto-extracted citation stubs, and track ingestion progress.
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS source_kind TEXT DEFAULT 'cited_only';
    -- 'external_upload' (chair uploaded full ruling) | 'cited_only' (stub from
    -- references_extractor) | 'nevo_seed' (future: auto-fetched from Nevo).
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS document_id UUID REFERENCES documents(id) ON DELETE SET NULL;
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS extraction_status TEXT DEFAULT 'pending';
    -- 'pending' | 'processing' | 'completed' | 'failed'
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS halacha_extraction_status TEXT DEFAULT 'pending';
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS metadata_extraction_status TEXT DEFAULT 'pending';
    -- 'pending' | 'processing' | 'completed' | 'failed'. Mirrors the
    -- text/halacha status columns so the UI can show a live badge while the
    -- local-MCP worker drains the metadata queue (previously only the
    -- metadata_extraction_requested_at timestamp existed — no 'processing').
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS practice_area TEXT DEFAULT '';
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS appeal_subtype TEXT DEFAULT '';
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS headnote TEXT DEFAULT '';
    -- chair-editable abstract shown in search results.
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS source_type TEXT DEFAULT '';
    -- 'court_ruling' | 'appeals_committee'

-- practice_area is closed to the three appeals committee domains.
DO $$ BEGIN
    ALTER TABLE case_law ADD CONSTRAINT case_law_practice_area_check
        CHECK (practice_area IN ('', 'rishuy_uvniya', 'betterment_levy', 'compensation_197'));
EXCEPTION WHEN duplicate_object THEN NULL; END $$;

CREATE INDEX IF NOT EXISTS idx_case_law_source_kind ON case_law(source_kind);
CREATE INDEX IF NOT EXISTS idx_case_law_practice ON case_law(practice_area, appeal_subtype);

-- precedent_chunks: full-text chunks of an uploaded ruling, with embeddings.
-- Analog of document_chunks for case_law rows where source_kind='external_upload'.
CREATE TABLE IF NOT EXISTS precedent_chunks (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    case_law_id UUID REFERENCES case_law(id) ON DELETE CASCADE,
    chunk_index INTEGER NOT NULL,
    content TEXT NOT NULL,
    section_type TEXT DEFAULT 'other',
        -- intro | facts | legal_analysis | ruling | conclusion | other
    page_number INTEGER,
    embedding vector(1024),
    created_at TIMESTAMPTZ DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_precedent_chunks_case_law ON precedent_chunks(case_law_id);
CREATE INDEX IF NOT EXISTS idx_precedent_chunks_section ON precedent_chunks(case_law_id, section_type);
CREATE INDEX IF NOT EXISTS idx_precedent_chunks_vec
    ON precedent_chunks USING ivfflat (embedding vector_cosine_ops) WITH (lists = 50);

-- halachot: extracted binding rules. One halacha = one rule + verbatim quote.
-- Embedded separately for rule-precision semantic match (chunks centroid is
-- dominated by surrounding context). All halachot start as pending_review;
-- only approved/published rows are visible to search_precedent_library.
CREATE TABLE IF NOT EXISTS halachot (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    case_law_id UUID REFERENCES case_law(id) ON DELETE CASCADE,
    halacha_index INTEGER NOT NULL,
    rule_statement TEXT NOT NULL,
    rule_type TEXT DEFAULT 'binding',
        -- binding | interpretive | procedural | obiter
    reasoning_summary TEXT DEFAULT '',
    supporting_quote TEXT NOT NULL,
    page_reference TEXT DEFAULT '',
    practice_areas TEXT[] DEFAULT '{}',
    subject_tags TEXT[] DEFAULT '{}',
    cites TEXT[] DEFAULT '{}',
    confidence NUMERIC(3,2) DEFAULT 0.0,
    quote_verified BOOLEAN DEFAULT FALSE,
    review_status TEXT DEFAULT 'pending_review',
        -- pending_review | approved | rejected | published | deferred (#84 snooze)
    reviewer TEXT DEFAULT '',
    reviewed_at TIMESTAMPTZ,
    quality_flags TEXT[] DEFAULT '{}',
        -- non_decision | truncated_quote | thin_restatement | quote_unverified
        -- (any flag blocks auto-approve → routes to pending_review)
    embedding vector(1024),
    created_at TIMESTAMPTZ DEFAULT now(),
    updated_at TIMESTAMPTZ DEFAULT now()
);
ALTER TABLE halachot ADD COLUMN IF NOT EXISTS quality_flags TEXT[] DEFAULT '{}';
CREATE INDEX IF NOT EXISTS idx_halachot_case_law ON halachot(case_law_id);
CREATE INDEX IF NOT EXISTS idx_halachot_status ON halachot(review_status);
CREATE INDEX IF NOT EXISTS idx_halachot_practice ON halachot USING gin(practice_areas);
CREATE INDEX IF NOT EXISTS idx_halachot_tags ON halachot USING gin(subject_tags);
CREATE INDEX IF NOT EXISTS idx_halachot_vec
    ON halachot USING ivfflat (embedding vector_cosine_ops) WITH (lists = 50);
-- #83: halacha_index must be unique per precedent. The extractor assigns it as
-- MAX(halacha_index)+1 under an in-process store-lock + a cross-process advisory
-- lock, so collisions shouldn't occur — but per FireHydrant/OneUptime the
-- constraint is the actual correctness guarantee (the lock is the optimization).
-- A racing/double run now fails LOUDLY instead of silently appending duplicates
-- (the 2026-05/06 over-extraction root cause). Requires clean data first (see
-- scripts: the 6 colliding precedents were renumbered 2026-06-03).
CREATE UNIQUE INDEX IF NOT EXISTS idx_halachot_unique_index
    ON halachot(case_law_id, halacha_index);
"""


# ── V8: Extraction request queue ─────────────────────────────────
# Web UI buttons ("Sparkles" = request metadata extraction; "Refresh" =
# request halacha extraction) run inside the FastAPI container, which has
# no `claude` CLI. They can't run the LLM extractor directly. Instead they
# stamp a request timestamp here, and the chair (or me) runs the MCP tool
# `precedent_process_pending_extractions` from local Claude Code, where the
# CLI is available, to drain the queue. See claude_session.py for the rule.

SCHEMA_V8_SQL = """
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS metadata_extraction_requested_at TIMESTAMPTZ;
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS halacha_extraction_requested_at TIMESTAMPTZ;
CREATE INDEX IF NOT EXISTS idx_case_law_metadata_requested
    ON case_law(metadata_extraction_requested_at)
    WHERE metadata_extraction_requested_at IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_case_law_halacha_requested
    ON case_law(halacha_extraction_requested_at)
    WHERE halacha_extraction_requested_at IS NOT NULL;
"""


# ── V9: Multimodal page-image embeddings ─────────────────────────
# voyage-multimodal-3 (1024-dim) embeds the whole page as an image:
# captures table layout, scanned content, signatures, plans — content
# that text-OCR loses. Ingestion is gated by config.MULTIMODAL_ENABLED;
# search_*_hybrid() merge text-cosine + image-cosine when present.
# image_thumbnail_path is a relative path under DATA_DIR/cases/{case}/
# thumbnails/ or DATA_DIR/precedent-library/thumbnails/ — a small JPEG
# rendered at config.MULTIMODAL_THUMB_DPI for UI preview, distinct from
# the higher-DPI render fed to the embedder (which is not persisted).

SCHEMA_V9_SQL = """
CREATE TABLE IF NOT EXISTS document_image_embeddings (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    document_id UUID REFERENCES documents(id) ON DELETE CASCADE,
    case_id UUID REFERENCES cases(id) ON DELETE CASCADE,
    page_number INTEGER NOT NULL,
    image_thumbnail_path TEXT,
    embedding vector(1024),
    model_name TEXT DEFAULT 'voyage-multimodal-3',
    created_at TIMESTAMPTZ DEFAULT now(),
    UNIQUE(document_id, page_number)
);
CREATE INDEX IF NOT EXISTS idx_doc_img_emb_vec
    ON document_image_embeddings USING ivfflat (embedding vector_cosine_ops)
    WITH (lists = 50);
CREATE INDEX IF NOT EXISTS idx_doc_img_emb_doc
    ON document_image_embeddings(document_id);
CREATE INDEX IF NOT EXISTS idx_doc_img_emb_case
    ON document_image_embeddings(case_id);

CREATE TABLE IF NOT EXISTS precedent_image_embeddings (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    case_law_id UUID REFERENCES case_law(id) ON DELETE CASCADE,
    page_number INTEGER NOT NULL,
    image_thumbnail_path TEXT,
    embedding vector(1024),
    model_name TEXT DEFAULT 'voyage-multimodal-3',
    created_at TIMESTAMPTZ DEFAULT now(),
    UNIQUE(case_law_id, page_number)
);
CREATE INDEX IF NOT EXISTS idx_prec_img_emb_vec
    ON precedent_image_embeddings USING ivfflat (embedding vector_cosine_ops)
    WITH (lists = 50);
CREATE INDEX IF NOT EXISTS idx_prec_img_emb_case_law
    ON precedent_image_embeddings(case_law_id);
"""


SCHEMA_V10_SQL = """
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS chair_name TEXT DEFAULT '';
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS district TEXT DEFAULT '';
ALTER TABLE cases ADD COLUMN IF NOT EXISTS chair_name TEXT DEFAULT '';
CREATE INDEX IF NOT EXISTS idx_case_law_source_kind ON case_law(source_kind);
CREATE INDEX IF NOT EXISTS idx_case_law_chair ON case_law(chair_name) WHERE chair_name <> '';
CREATE INDEX IF NOT EXISTS idx_case_law_district ON case_law(district) WHERE district <> '';
"""

SCHEMA_V11_SQL = """
CREATE TABLE IF NOT EXISTS case_law_relations (
    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    case_law_id UUID NOT NULL REFERENCES case_law(id) ON DELETE CASCADE,
    related_id   UUID NOT NULL REFERENCES case_law(id) ON DELETE CASCADE,
    relation_type TEXT NOT NULL DEFAULT 'same_case_chain',
    created_at TIMESTAMPTZ DEFAULT now(),
    UNIQUE(case_law_id, related_id),
    CHECK (case_law_id <> related_id)
);
CREATE INDEX IF NOT EXISTS idx_clr_a ON case_law_relations(case_law_id);
CREATE INDEX IF NOT EXISTS idx_clr_b ON case_law_relations(related_id);
"""

# ── V12: BM25/lexical search via tsvector ─────────────────────────
# PostgreSQL doesn't ship a Hebrew stemmer; the 'simple' configuration
# lowercases + tokenises on whitespace without stemming — exactly what
# we want for Hebrew. It also preserves alphanumeric tokens like
# "1461/20" (case numbers) which are the prime motivator for adding a
# lexical layer on top of the semantic cosine index.
# Both columns are GENERATED STORED so they stay in sync with the
# source rows for free, and GIN-indexed for ts_rank_cd lookups.
SCHEMA_V12_SQL = """
ALTER TABLE precedent_chunks
    ADD COLUMN IF NOT EXISTS content_tsv tsvector
    GENERATED ALWAYS AS (to_tsvector('simple', content)) STORED;

ALTER TABLE halachot
    ADD COLUMN IF NOT EXISTS rule_tsv tsvector
    GENERATED ALWAYS AS (
        to_tsvector('simple',
            coalesce(rule_statement,'') || ' ' ||
            coalesce(supporting_quote,'') || ' ' ||
            coalesce(reasoning_summary,'')
        )
    ) STORED;

CREATE INDEX IF NOT EXISTS idx_precedent_chunks_tsv
    ON precedent_chunks USING GIN(content_tsv);

CREATE INDEX IF NOT EXISTS idx_halachot_tsv
    ON halachot USING GIN(rule_tsv);
"""


# ── V13: Missing precedents log ───────────────────────────────────
# Track citations that the parties brought up but which are NOT yet in
# the precedent_library. Created by the researcher (auto or chair)
# whenever a citation can't be found in the corpus; closed by uploading
# the actual decision via internal_decision_upload or
# precedent_library_upload, at which point linked_case_law_id points to
# the new case_law row and status flips to 'closed'.
SCHEMA_V13_SQL = """
CREATE TABLE IF NOT EXISTS missing_precedents (
    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    citation TEXT NOT NULL,
    case_name TEXT,
    cited_in_case_id UUID REFERENCES cases(id) ON DELETE CASCADE,
    cited_in_document_id UUID REFERENCES documents(id) ON DELETE SET NULL,
    cited_by_party TEXT CHECK (cited_by_party IN (
        'appellant', 'respondent', 'committee', 'permit_applicant', 'unknown'
    )),
    cited_by_party_name TEXT,
    legal_topic TEXT,
    legal_issue TEXT,
    claim_quote TEXT,
    status TEXT DEFAULT 'open' CHECK (status IN (
        'open', 'uploaded', 'closed', 'irrelevant'
    )),
    linked_case_law_id UUID REFERENCES case_law(id) ON DELETE SET NULL,
    closed_at TIMESTAMPTZ,
    created_at TIMESTAMPTZ DEFAULT NOW(),
    updated_at TIMESTAMPTZ DEFAULT NOW(),
    notes TEXT
);

CREATE INDEX IF NOT EXISTS idx_missing_precedents_case
    ON missing_precedents(cited_in_case_id);
CREATE INDEX IF NOT EXISTS idx_missing_precedents_status
    ON missing_precedents(status);
CREATE INDEX IF NOT EXISTS idx_missing_precedents_citation
    ON missing_precedents(citation);
"""


# ── V14: Legal arguments (aggregated propositions) ────────────────
# After ``claims_extractor`` extracts raw propositions (rows in ``claims``)
# the LLM-driven aggregator groups them into ~6-12 distinct legal arguments
# per party. ``legal_arguments`` holds the consolidated argument; the M:M
# join table ``legal_argument_propositions`` links back to the source
# propositions for traceability ("which raw claims feed this argument?").
SCHEMA_V14_SQL = """
CREATE TABLE IF NOT EXISTS legal_arguments (
    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    case_id UUID NOT NULL REFERENCES cases(id) ON DELETE CASCADE,
    party TEXT NOT NULL CHECK (party IN (
        'appellant', 'respondent', 'committee', 'permit_applicant', 'unknown'
    )),
    argument_index INTEGER NOT NULL,
    argument_title TEXT NOT NULL,
    argument_body TEXT NOT NULL,
    legal_topic TEXT,
    priority TEXT DEFAULT 'substantive' CHECK (priority IN (
        'threshold', 'substantive', 'procedural', 'relief'
    )),
    cited_precedents TEXT[],
    created_at TIMESTAMPTZ DEFAULT NOW(),
    updated_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_legal_arguments_case
    ON legal_arguments(case_id);
CREATE INDEX IF NOT EXISTS idx_legal_arguments_party
    ON legal_arguments(case_id, party);

-- M:M back to ``claims`` (raw propositions).
CREATE TABLE IF NOT EXISTS legal_argument_propositions (
    argument_id UUID NOT NULL REFERENCES legal_arguments(id) ON DELETE CASCADE,
    claim_id UUID NOT NULL REFERENCES claims(id) ON DELETE CASCADE,
    PRIMARY KEY (argument_id, claim_id)
);
"""


# proceeding_type — מבחין בין הליך ערר עיקרי לבל"מ (בקשה להארכת מועד).
# חל גם על case_law (קורפוס) וגם על cases (תיקים חיים). שני הסוגים
# יכולים לחלוק אותו case_number, ולכן ה-uniqueness עוברת ל-(case_number,
# proceeding_type). בקורפוס: רק internal_committee מקבלים ערך מאוכלס;
# פסיקה חיצונית נשארת עם ''.
SCHEMA_V15_SQL = """
-- ------- case_law (קורפוס) -------
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS proceeding_type TEXT NOT NULL DEFAULT '';

ALTER TABLE case_law DROP CONSTRAINT IF EXISTS case_law_proceeding_type_check;
ALTER TABLE case_law ADD CONSTRAINT case_law_proceeding_type_check
    CHECK (proceeding_type IN ('', 'ערר', 'בל"מ'));

-- Backfill לפי appeal_subtype הקיים
UPDATE case_law SET proceeding_type = 'בל"מ'
 WHERE source_kind = 'internal_committee' AND proceeding_type = ''
   AND appeal_subtype LIKE 'extension_request_%';

UPDATE case_law SET proceeding_type = 'ערר'
 WHERE source_kind = 'internal_committee' AND proceeding_type = '';

ALTER TABLE case_law DROP CONSTRAINT IF EXISTS case_law_internal_proceeding_check;
ALTER TABLE case_law ADD CONSTRAINT case_law_internal_proceeding_check
    CHECK (source_kind != 'internal_committee' OR proceeding_type IN ('ערר', 'בל"מ'));

-- החלפת UNIQUE(case_number) ב-partial unique לפי source_kind
ALTER TABLE case_law DROP CONSTRAINT IF EXISTS case_law_case_number_key;
DROP INDEX IF EXISTS case_law_case_number_key;
CREATE UNIQUE INDEX IF NOT EXISTS uq_case_law_internal_number_proc
    ON case_law (case_number, proceeding_type)
    WHERE source_kind = 'internal_committee';
CREATE UNIQUE INDEX IF NOT EXISTS uq_case_law_external_number
    ON case_law (case_number)
    WHERE source_kind <> 'internal_committee';

-- ------- cases (תיקים חיים) -------
ALTER TABLE cases ADD COLUMN IF NOT EXISTS proceeding_type TEXT NOT NULL DEFAULT 'ערר';

ALTER TABLE cases DROP CONSTRAINT IF EXISTS cases_proceeding_type_check;
ALTER TABLE cases ADD CONSTRAINT cases_proceeding_type_check
    CHECK (proceeding_type IN ('ערר', 'בל"מ'));

UPDATE cases SET proceeding_type = 'בל"מ'
 WHERE proceeding_type = 'ערר' AND appeal_subtype LIKE 'extension_request_%';

ALTER TABLE cases DROP CONSTRAINT IF EXISTS cases_case_number_key;
DROP INDEX IF EXISTS cases_case_number_key;
CREATE UNIQUE INDEX IF NOT EXISTS uq_cases_number_proc
    ON cases (case_number, proceeding_type);
"""


# ── V16: Internal citations graph (TaskMaster #34) ────────────────
# Auto-extracted citation graph between Daphna's (and other internal_committee)
# decisions. When an internal decision cites another committee decision in a
# patterned way ("ונפנה ל…", "כפי שקבעתי…", "ראה החלטתי…"), the citation
# extractor records the link here. ``cited_case_law_id`` is populated when the
# cited case_number resolves to a row in ``case_law``; otherwise it stays NULL
# and shows up in ``idx_pic_unlinked`` so the chair can decide whether to
# upload the missing decision.
SCHEMA_V16_SQL = """
CREATE TABLE IF NOT EXISTS precedent_internal_citations (
    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    source_case_law_id UUID NOT NULL REFERENCES case_law(id) ON DELETE CASCADE,
    cited_case_number TEXT NOT NULL,
    cited_case_law_id UUID REFERENCES case_law(id) ON DELETE SET NULL,
    match_context TEXT,
    match_pattern TEXT,
    confidence NUMERIC(3,2) DEFAULT 0.85,
    created_at TIMESTAMPTZ DEFAULT NOW(),
    UNIQUE (source_case_law_id, cited_case_number)
);
CREATE INDEX IF NOT EXISTS idx_pic_source
    ON precedent_internal_citations(source_case_law_id);
CREATE INDEX IF NOT EXISTS idx_pic_target
    ON precedent_internal_citations(cited_case_law_id);
CREATE INDEX IF NOT EXISTS idx_pic_unlinked
    ON precedent_internal_citations(cited_case_number)
    WHERE cited_case_law_id IS NULL;
"""


# ── V17: Parent-doc retrieval (TaskMaster #48) ─────────────────────
# Hierarchical chunking: tiny "child" chunks (~300 tokens) are indexed
# and matched at search time for high recall on focused phrases, but
# every child links upward to a larger "parent" chunk (~1500 tokens)
# that supplies broader context to the LLM. The retrieval step swaps
# the child hit for its parent before returning rows to callers — so
# rule statements, multi-paragraph quotes, and "אשר על כן…" passages
# come back whole instead of clipped mid-sentence.
#
# Schema layout:
#   parent_chunk_id  — self-FK on precedent_chunks. NULL for legacy
#                       rows (single-tier chunking) and for parent
#                       rows themselves. Cascade=SET NULL so deleting
#                       a parent doesn't orphan the children's payload.
#   chunk_role       — 'child' | 'parent'. Defaults to 'child' so any
#                       row created by the pre-V17 ingestion path is
#                       treated as a child without a parent (i.e. the
#                       parent-doc swap is a no-op and the legacy chunk
#                       continues to surface as-is).
#
# Activation is gated by ``config.PARENT_DOC_RETRIEVAL_ENABLED``. Even
# after the schema is in place, search keeps the legacy behaviour
# until both the chunker emits hierarchical chunks *and* the flag is
# flipped on — so this migration is safe to apply ahead of time.
SCHEMA_V17_SQL = """
ALTER TABLE precedent_chunks
    ADD COLUMN IF NOT EXISTS parent_chunk_id UUID
    REFERENCES precedent_chunks(id) ON DELETE SET NULL;

ALTER TABLE precedent_chunks
    ADD COLUMN IF NOT EXISTS chunk_role TEXT DEFAULT 'child';

DO $$ BEGIN
    ALTER TABLE precedent_chunks ADD CONSTRAINT precedent_chunks_role_check
        CHECK (chunk_role IN ('child', 'parent'));
EXCEPTION WHEN duplicate_object THEN NULL; END $$;

CREATE INDEX IF NOT EXISTS idx_precedent_chunks_parent
    ON precedent_chunks(parent_chunk_id);
CREATE INDEX IF NOT EXISTS idx_precedent_chunks_role
    ON precedent_chunks(chunk_role);
"""


# ── V18: RAG telemetry — closed-loop retrieval feedback (TaskMaster #50)
#
# Captures every semantic search call (query, agent, top results,
# latency) so we can compute nDCG@10 over time and surface drift before
# it bites. Relevance signal comes from two places:
#   1. ``cited_in_decision`` — auto-inferred. If a precedent cited in a
#      final draft's ``decision_paragraphs.citations`` also appears in
#      the ``top_case_law_ids`` of a search log for the same case, that
#      hit is treated as highly relevant (score=3).
#   2. ``chair_marked`` — explicit feedback (future hook for the UI).
#
# ``top_case_law_ids`` is intentionally nullable: ``search_decisions``
# returns document chunks from active cases (not case_law rows), so its
# rows log the query but leave the array empty. nDCG aggregation skips
# those.
SCHEMA_V18_SQL = """
CREATE TABLE IF NOT EXISTS search_logs (
    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    search_type TEXT NOT NULL,
        -- 'precedent_library' / 'internal_decisions'
        -- / 'decisions' / 'case_documents' / 'similar_cases'
    query TEXT NOT NULL,
    practice_area TEXT,
    case_id UUID REFERENCES cases(id) ON DELETE SET NULL,
    user_agent TEXT,
        -- 'writer' / 'researcher' / 'analyst' / 'manual' / 'unknown'
    result_count INTEGER,
    top_case_law_ids UUID[],
        -- nullable: empty for search_decisions/search_case_documents
        -- which return document chunks not case_law rows
    duration_ms INTEGER,
    created_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_search_logs_type ON search_logs(search_type);
CREATE INDEX IF NOT EXISTS idx_search_logs_case ON search_logs(case_id);
CREATE INDEX IF NOT EXISTS idx_search_logs_date ON search_logs(created_at DESC);

CREATE TABLE IF NOT EXISTS search_relevance_feedback (
    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    search_log_id UUID REFERENCES search_logs(id) ON DELETE CASCADE,
    case_law_id UUID NOT NULL REFERENCES case_law(id) ON DELETE CASCADE,
    rank INTEGER NOT NULL,
        -- 1-based position in the original results (1 = top hit)
    relevance_score INTEGER NOT NULL
        CHECK (relevance_score IN (0, 1, 2, 3)),
        -- 0=irrelevant, 1=marginal, 2=relevant, 3=highly relevant
    feedback_source TEXT,
        -- 'cited_in_decision' / 'chair_marked' / 'auto_inferred'
    created_at TIMESTAMPTZ DEFAULT NOW(),
    UNIQUE(search_log_id, case_law_id, feedback_source)
);
CREATE INDEX IF NOT EXISTS idx_relevance_log
    ON search_relevance_feedback(search_log_id);
CREATE INDEX IF NOT EXISTS idx_relevance_case_law
    ON search_relevance_feedback(case_law_id);
"""


# ── V19: case_law.citation_formatted ───────────────────────────────
# Full formal citation per the Israeli unified citation rules ("כללי
# הציטוט האחיד"). Stored as Markdown: parties wrapped in **…** so the
# copy-to-clipboard helper can render bold for Word/Docs while keeping
# the plain-text form readable.
#
# Example:
#   ערר (ועדות ערר - תכנון ובנייה ת"א-יפו) 81002-01-21 **אברהם אגסי
#   נ' הועדה המקומית לתכנון ובנייה תל אביב** (נבו 25.9.2025)
SCHEMA_V19_SQL = """
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS citation_formatted TEXT DEFAULT '';
"""


# ── V20: case-name / case-number lexical match ────────────────────
# RC-A fix: the V12 tsvectors cover only chunk *content* + halacha
# text, so a bare case-name query ("אגסי") matched decisions that
# *cite* the case rather than the case itself. case_name and
# case_number live on the parent case_law row, so we add a dedicated
# meta tsvector there and OR it into the lexical search — a name/number
# hit then surfaces all of that case's chunks + halachot. 'simple'
# config (no stemmer) preserves Hebrew names + alphanumeric case
# numbers like "81002-01-21" exactly as V12 does for content.
SCHEMA_V20_SQL = """
ALTER TABLE case_law
    ADD COLUMN IF NOT EXISTS meta_tsv tsvector
    GENERATED ALWAYS AS (
        to_tsvector('simple',
            coalesce(case_name,'') || ' ' || coalesce(case_number,'')
        )
    ) STORED;

CREATE INDEX IF NOT EXISTS idx_case_law_meta_tsv
    ON case_law USING GIN(meta_tsv);
"""


# ── V21: explicit `searchable` flag (GAP-13 / INV-DM1) ─────────────
# Materialized completeness flag — a case_law row is exposed to search only
# when it satisfies the completeness contract (02-data-model §2a). Recomputed
# on ingest/metadata completion via recompute_searchable(); not inferred at
# query time. Default false so a freshly-inserted row is excluded until proven
# complete. Health-check surfaces count(*) FILTER (WHERE NOT searchable).
SCHEMA_V21_SQL = """
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS searchable boolean NOT NULL DEFAULT false;
CREATE INDEX IF NOT EXISTS idx_case_law_searchable ON case_law (searchable);
"""


# ── V22: cases.blocks_stale — DOCX↔blocks drift flag (GAP-17 / INV-EX1) ──
# Set true when revise_draft/apply_user_edit make active_draft_path the live
# source-of-truth without re-syncing decision_blocks; cleared when blocks are
# re-exported or re-saved. Surfaced by health-check. Source-of-truth remains
# decision_blocks — this only flags known drift (no fragile DOCX→blocks reparse).
SCHEMA_V22_SQL = """
ALTER TABLE cases ADD COLUMN IF NOT EXISTS blocks_stale boolean NOT NULL DEFAULT false;
"""


# ── V23: case_law content/indexed hashes — re-index on content change (GAP-09) ──
# content_hash = SHA-256 of current full_text (written at the create boundary).
# indexed_hash = the content_hash the CURRENT chunks/embeddings were built from
# (set by mark_indexed after a successful store). Stale ⇔ content_hash IS
# DISTINCT FROM indexed_hash. embedding can't be a GENERATED column (needs an
# API call), so freshness is enforced by detection + reindex_case_law + health-check.
SCHEMA_V23_SQL = """
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS content_hash text NOT NULL DEFAULT '';
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS indexed_hash text;
"""


SCHEMA_V24_SQL = """
-- X11: citation corroboration (treatment + halacha-level link)
ALTER TABLE precedent_internal_citations
    ADD COLUMN IF NOT EXISTS treatment TEXT DEFAULT '';

CREATE TABLE IF NOT EXISTS halacha_citation_corroboration (
    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    halacha_id UUID NOT NULL REFERENCES halachot(id) ON DELETE CASCADE,
    citing_case_law_id UUID REFERENCES case_law(id) ON DELETE CASCADE,
    citing_decision_id UUID REFERENCES decisions(id) ON DELETE SET NULL,
    source_citation_id UUID NOT NULL,
    treatment TEXT NOT NULL,
    match_score NUMERIC(4,3) DEFAULT 0,
    match_context TEXT DEFAULT '',
    created_at TIMESTAMPTZ DEFAULT now(),
    UNIQUE (halacha_id, source_citation_id)
);
CREATE INDEX IF NOT EXISTS idx_hcc_halacha ON halacha_citation_corroboration(halacha_id);
"""

SCHEMA_V25_SQL = """
-- Crash-safe halacha extraction: per-chunk checkpoint enables incremental store
-- + resume. A chunk with halacha_extracted_at set has been processed; a resumed
-- run skips it (so a crash never loses completed chunks or re-pays for them).
ALTER TABLE precedent_chunks
    ADD COLUMN IF NOT EXISTS halacha_extracted_at TIMESTAMPTZ;
"""

SCHEMA_V26_SQL = """
-- draft_final_pairs (T5 / INV-LRN4): the reconciliation ledger.
-- Every decision is "closed" only after it is compared against the chair's signed
-- final. Captures an immutable snapshot of the AI draft at mark-final time (before
-- it can be overwritten), paired with the final. The LLM distillation (curator)
-- fills final_text + diff_stats + analysis later and advances status.
CREATE TABLE IF NOT EXISTS draft_final_pairs (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    case_id UUID NOT NULL REFERENCES cases(id) ON DELETE CASCADE,
    draft_text TEXT NOT NULL DEFAULT '',
    final_path TEXT DEFAULT '',
    final_text TEXT DEFAULT '',
    diff_stats JSONB DEFAULT NULL,
    analysis JSONB DEFAULT NULL,
    -- final_received → analyzed → lessons_folded
    status TEXT NOT NULL DEFAULT 'final_received',
    created_at TIMESTAMPTZ DEFAULT now(),
    updated_at TIMESTAMPTZ DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_draft_final_pairs_case ON draft_final_pairs(case_id);
CREATE INDEX IF NOT EXISTS idx_draft_final_pairs_status ON draft_final_pairs(status);
"""

SCHEMA_V27_SQL = """
-- style_exemplars (T1-T3): block-level paragraphs from Dafna's OWN decisions
-- (style_corpus + internal_committee finals), embedded for retrieval as
-- style exemplars at write-time. Purpose-built so we DON'T fabricate synthetic
-- cases just to reuse decision_paragraphs. INV-LRN5: style material only — the
-- writer is told to adapt structure/voice, copy only boilerplate, never substance.
CREATE TABLE IF NOT EXISTS style_exemplars (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    decision_number TEXT DEFAULT '',
    source TEXT DEFAULT '',                 -- style_corpus | internal_committee
    practice_area TEXT DEFAULT '',
    outcome TEXT DEFAULT '',                -- rejection | partial_acceptance | full_acceptance | ''
    section TEXT DEFAULT 'other',           -- background | claims | discussion | summary | other
    paragraph_text TEXT NOT NULL,
    word_count INTEGER DEFAULT 0,
    embedding vector(1024),
    created_at TIMESTAMPTZ DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_style_exemplars_section ON style_exemplars(section);
CREATE INDEX IF NOT EXISTS idx_style_exemplars_decision ON style_exemplars(decision_number, source);
"""


async def _run_schema_migrations(pool: asyncpg.Pool) -> None:
    async with pool.acquire() as conn:
        await conn.execute(SCHEMA_SQL)
        await conn.execute(MIGRATIONS_SQL)
        await conn.execute(SCHEMA_V2_SQL)
        await conn.execute(SCHEMA_V3_SQL)
        await conn.execute(SCHEMA_V4_SQL)
        await conn.execute(SCHEMA_V5_SQL)
        await conn.execute(SCHEMA_V6_SQL)
        await conn.execute(SCHEMA_V7_SQL)
        await conn.execute(SCHEMA_V8_SQL)
        await conn.execute(SCHEMA_V9_SQL)
        await conn.execute(SCHEMA_V10_SQL)
        await conn.execute(SCHEMA_V11_SQL)
        await conn.execute(SCHEMA_V12_SQL)
        await conn.execute(SCHEMA_V13_SQL)
        await conn.execute(SCHEMA_V14_SQL)
        await conn.execute(SCHEMA_V15_SQL)
        await conn.execute(SCHEMA_V16_SQL)
        await conn.execute(SCHEMA_V17_SQL)
        await conn.execute(SCHEMA_V18_SQL)
        await conn.execute(SCHEMA_V19_SQL)
        await conn.execute(SCHEMA_V20_SQL)
        await conn.execute(SCHEMA_V21_SQL)
        await conn.execute(SCHEMA_V22_SQL)
        await conn.execute(SCHEMA_V23_SQL)
        await conn.execute(SCHEMA_V24_SQL)
        await conn.execute(SCHEMA_V25_SQL)
        await conn.execute(SCHEMA_V26_SQL)
        await conn.execute(SCHEMA_V27_SQL)
    logger.info("Database schema initialized (v1-v27)")


async def init_schema() -> None:
    """Backward-compatible wrapper. Schema init now runs lazily inside get_pool()."""
    await get_pool()


# ── Case CRUD ───────────────────────────────────────────────────────

async def create_case(
    case_number: str,
    title: str,
    appellants: list[str] | None = None,
    respondents: list[str] | None = None,
    subject: str = "",
    property_address: str = "",
    permit_number: str = "",
    committee_type: str = "ועדה מקומית",
    hearing_date: date | None = None,
    notes: str = "",
    expected_outcome: str = "",
    # Default "" — DB CHECK constraint accepts empty, the upstream tool
    # (cases.case_create) is responsible for deriving the domain value
    # from the case_number prefix before calling here.
    practice_area: str = "",
    appeal_subtype: str = "",
    proceeding_type: str = "ערר",
) -> dict:
    pool = await get_pool()
    case_id = uuid4()
    async with pool.acquire() as conn:
        await conn.execute(
            """INSERT INTO cases (id, case_number, title, appellants, respondents,
               subject, property_address, permit_number, committee_type,
               hearing_date, notes, expected_outcome,
               practice_area, appeal_subtype, proceeding_type)
               VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15)""",
            case_id, _canonical_case_number(case_number), title,
            json.dumps(appellants or []),
            json.dumps(respondents or []),
            subject, property_address, permit_number, committee_type,
            hearing_date, notes, expected_outcome,
            practice_area, appeal_subtype, proceeding_type,
        )
    return await get_case(case_id)


async def get_case(case_id: UUID) -> dict | None:
    pool = await get_pool()
    async with pool.acquire() as conn:
        row = await conn.fetchrow("SELECT * FROM cases WHERE id = $1", case_id)
    if row is None:
        return None
    return _row_to_case(row)


async def set_active_draft_path(case_id: UUID, path: str | None) -> None:
    """Update the case's active_draft_path (the DOCX that is source of truth)."""
    pool = await get_pool()
    async with pool.acquire() as conn:
        await conn.execute(
            "UPDATE cases SET active_draft_path = $1, updated_at = now() WHERE id = $2",
            path, case_id,
        )


async def get_active_draft_path(case_id: UUID) -> str | None:
    pool = await get_pool()
    async with pool.acquire() as conn:
        row = await conn.fetchrow(
            "SELECT active_draft_path FROM cases WHERE id = $1", case_id,
        )
    return row["active_draft_path"] if row else None


async def mark_blocks_stale(case_id: UUID, stale: bool) -> None:
    """Flag/clear DOCX↔blocks drift for a case (GAP-17)."""
    pool = await get_pool()
    async with pool.acquire() as conn:
        await conn.execute(
            "UPDATE cases SET blocks_stale = $1, updated_at = now() WHERE id = $2",
            stale, case_id,
        )


async def resolve_citation_case_law_ids(ids) -> dict:
    """Structural citation→corpus resolution (GAP-20 / INV-AUD3).

    Given case_law_id values referenced by a decision's citations/provenance,
    split into resolvable (exist in case_law) vs unresolvable.
    """
    resolved, unresolved = [], []
    pool = await get_pool()
    async with pool.acquire() as conn:
        for cid in ids:
            try:
                exists = await conn.fetchval(
                    "SELECT EXISTS(SELECT 1 FROM case_law WHERE id = $1)", cid)
            except Exception:
                exists = False
            (resolved if exists else unresolved).append(cid)
    return {"resolved": resolved, "unresolved": unresolved}


def _normalize_case_number(s: str) -> str:
    """Canonicalise a case number for tolerant lookup.

    Agents receive the number in many shapes — from a Paperclip issue
    title ("ערר 8137/24"), with a slash instead of a dash, padded, or with
    surrounding whitespace. Stored values are bare ("8137-24"). Without
    this, get_case_by_number's exact match silently fails and the agent
    concludes the case has no documents (see #58). Strategy: drop any
    leading proceeding-type prefix (everything before the first digit),
    trim, and unify '/' → '-'.
    """
    s = (s or "").strip()
    m = re.search(r"\d", s)
    if m:
        s = s[m.start():]
    return s.strip().replace("/", "-")


def _canonical_case_number(s: str) -> str:
    """Canonical write-time form per X1 §1: trim · prefix-strip · '/'→'-'.

    Deterministic and format-only — does NOT add or remove a month segment.
    Used at the write boundary for identifier-keyed corpora (internal
    committee decisions, active cases). NOT for external precedents, whose
    canonical identifier is the full citation.
    """
    s = (s or "").strip()
    m = re.search(r"\d", s)
    if m:
        s = s[m.start():]
    return s.strip().replace("/", "-")


def _content_hash(text: str) -> str:
    """SHA-256 hex of the text — deterministic content fingerprint (FU-3/GAP-09).

    Empty/None → "" (a row with no text has no content fingerprint).
    """
    if not text:
        return ""
    return hashlib.sha256(text.encode("utf-8")).hexdigest()


async def get_case_by_number(case_number: str) -> dict | None:
    pool = await get_pool()
    norm = _normalize_case_number(case_number)
    async with pool.acquire() as conn:
        # Exact match first (fast path + preferred); fall back to a
        # separator/prefix-normalised comparison so common formatting
        # variants still resolve to the right case. See #58.
        row = await conn.fetchrow(
            """SELECT * FROM cases
               WHERE case_number = $1
                  OR replace(btrim(case_number), '/', '-') = $2
               ORDER BY (case_number = $1) DESC, created_at
               LIMIT 1""",
            case_number, norm,
        )
    if row is None:
        return None
    return _row_to_case(row)


async def list_cases(
    status: str | None = None,
    limit: int = 50,
    include_archived: bool = False,
    archived_only: bool = False,
) -> list[dict]:
    pool = await get_pool()
    where = []
    args: list = []
    if status:
        where.append(f"status = ${len(args) + 1}")
        args.append(status)
    if archived_only:
        where.append("archived_at IS NOT NULL")
    elif not include_archived:
        where.append("archived_at IS NULL")
    where_clause = f"WHERE {' AND '.join(where)}" if where else ""
    args.append(limit)
    sql = f"SELECT * FROM cases {where_clause} ORDER BY updated_at DESC LIMIT ${len(args)}"
    async with pool.acquire() as conn:
        rows = await conn.fetch(sql, *args)
    return [_row_to_case(r) for r in rows]


async def update_case(case_id: UUID, **fields) -> dict | None:
    if not fields:
        return await get_case(case_id)
    pool = await get_pool()
    set_clauses = []
    values = []
    for i, (key, val) in enumerate(fields.items(), start=2):
        if key in ("appellants", "respondents", "tags"):
            val = json.dumps(val)
        set_clauses.append(f"{key} = ${i}")
        values.append(val)
    set_clauses.append("updated_at = now()")
    sql = f"UPDATE cases SET {', '.join(set_clauses)} WHERE id = $1"
    async with pool.acquire() as conn:
        await conn.execute(sql, case_id, *values)
    return await get_case(case_id)


def _row_to_case(row: asyncpg.Record) -> dict:
    d = dict(row)
    for field in ("appellants", "respondents", "tags"):
        if isinstance(d.get(field), str):
            d[field] = json.loads(d[field])
    d["id"] = str(d["id"])
    return d


async def archive_case(case_id: UUID) -> dict | None:
    """Mark a case as archived. Returns updated row, or None if not found."""
    pool = await get_pool()
    async with pool.acquire() as conn:
        row = await conn.fetchrow(
            "UPDATE cases SET archived_at = now(), updated_at = now() "
            "WHERE id = $1 RETURNING *",
            case_id,
        )
    return _row_to_case(row) if row else None


async def restore_case(case_id: UUID) -> dict | None:
    """Clear the archived_at timestamp. Returns updated row, or None if not found."""
    pool = await get_pool()
    async with pool.acquire() as conn:
        row = await conn.fetchrow(
            "UPDATE cases SET archived_at = NULL, updated_at = now() "
            "WHERE id = $1 RETURNING *",
            case_id,
        )
    return _row_to_case(row) if row else None


async def delete_case(case_id: UUID) -> bool:
    """Delete a case row. Returns True if a row was actually removed.

    All dependent rows are removed automatically by FK constraints:
      • CASCADE: documents, document_chunks, claims, appraiser_facts,
        decisions, qa_results, case_precedents
      • SET NULL: audit_log.case_id, chair_feedback.case_id

    NOTE: this only touches the legal-ai database. The Paperclip project
    (issues, comments, runs) and Gitea repo for the case live in other
    systems and are NOT cleaned up here — call sites that need a full
    reset must handle those separately.
    """
    pool = await get_pool()
    async with pool.acquire() as conn:
        result = await conn.execute("DELETE FROM cases WHERE id = $1", case_id)
    # asyncpg execute returns "DELETE <n>" — extract count.
    return int(result.split()[-1]) > 0


# ── Document CRUD ───────────────────────────────────────────────────

async def create_document(
    case_id: UUID,
    doc_type: str,
    title: str,
    file_path: str,
    page_count: int | None = None,
    content_hash: str = "",
) -> dict:
    pool = await get_pool()
    doc_id = uuid4()
    async with pool.acquire() as conn:
        await conn.execute(
            """INSERT INTO documents (id, case_id, doc_type, title, file_path, page_count, content_hash)
               VALUES ($1, $2, $3, $4, $5, $6, $7)""",
            doc_id, case_id, doc_type, title, file_path, page_count, content_hash,
        )
        row = await conn.fetchrow("SELECT * FROM documents WHERE id = $1", doc_id)
    return _row_to_doc(row)


async def get_document_by_hash(case_id: UUID, content_hash: str) -> dict | None:
    """Return an existing document for this case with the same file hash, or None.

    INV-TOOL3 / GAP-52: deterministic key for idempotent upload. Empty hashes
    (legacy rows) are never matched.
    """
    if not content_hash:
        return None
    pool = await get_pool()
    async with pool.acquire() as conn:
        row = await conn.fetchrow(
            "SELECT * FROM documents WHERE case_id = $1 AND content_hash = $2 LIMIT 1",
            case_id, content_hash,
        )
    return _row_to_doc(row) if row else None


async def update_document(doc_id: UUID, **fields) -> None:
    if not fields:
        return
    pool = await get_pool()
    set_clauses = []
    values = []
    for i, (key, val) in enumerate(fields.items(), start=2):
        if key == "metadata":
            val = json.dumps(val)
        set_clauses.append(f"{key} = ${i}")
        values.append(val)
    sql = f"UPDATE documents SET {', '.join(set_clauses)} WHERE id = $1"
    async with pool.acquire() as conn:
        await conn.execute(sql, doc_id, *values)


async def get_document(doc_id: UUID) -> dict | None:
    pool = await get_pool()
    async with pool.acquire() as conn:
        row = await conn.fetchrow("SELECT * FROM documents WHERE id = $1", doc_id)
    return _row_to_doc(row) if row else None


async def list_documents(case_id: UUID) -> list[dict]:
    pool = await get_pool()
    async with pool.acquire() as conn:
        rows = await conn.fetch(
            "SELECT * FROM documents WHERE case_id = $1 ORDER BY created_at", case_id
        )
    return [_row_to_doc(r) for r in rows]


async def get_document_text(doc_id: UUID) -> str:
    pool = await get_pool()
    async with pool.acquire() as conn:
        row = await conn.fetchrow(
            "SELECT extracted_text FROM documents WHERE id = $1", doc_id
        )
    return row["extracted_text"] if row else ""


def _row_to_doc(row: asyncpg.Record) -> dict:
    d = dict(row)
    d["id"] = str(d["id"])
    d["case_id"] = str(d["case_id"])
    if isinstance(d.get("metadata"), str):
        d["metadata"] = json.loads(d["metadata"])
    return d


# ── Claims ─────────────────────────────────────────────────────────

async def store_claims(case_id: UUID, claims: list[dict], source_document: str = "") -> int:
    """Store extracted claims. Replaces existing claims from same source.

    Each claim dict: party_role, claim_text, claim_index, party_name (optional)
    """
    pool = await get_pool()
    async with pool.acquire() as conn:
        if source_document:
            await conn.execute(
                "DELETE FROM claims WHERE case_id = $1 AND source_document = $2",
                case_id, source_document,
            )
        for claim in claims:
            await conn.execute(
                """INSERT INTO claims (case_id, party_role, party_name, claim_text, claim_index, source_document, claim_type)
                   VALUES ($1, $2, $3, $4, $5, $6, $7)""",
                case_id,
                claim["party_role"],
                claim.get("party_name", ""),
                claim["claim_text"],
                claim.get("claim_index", 0),
                source_document,
                claim.get("claim_type", "claim"),
            )
    return len(claims)


async def get_claims(case_id: UUID, party_role: str | None = None) -> list[dict]:
    """Get claims for a case, optionally filtered by party role."""
    pool = await get_pool()
    async with pool.acquire() as conn:
        if party_role:
            rows = await conn.fetch(
                "SELECT * FROM claims WHERE case_id = $1 AND party_role = $2 ORDER BY claim_index",
                case_id, party_role,
            )
        else:
            rows = await conn.fetch(
                "SELECT * FROM claims WHERE case_id = $1 ORDER BY party_role, claim_index",
                case_id,
            )
    return [dict(r) for r in rows]


# ── Decisions ──────────────────────────────────────────────────────

async def create_decision(
    case_id: UUID,
    outcome: str = "",
    outcome_summary: str = "",
    outcome_reasoning: str = "",
    direction_doc: dict | None = None,
) -> dict:
    """Create a decision record for a case."""
    pool = await get_pool()
    decision_id = uuid4()
    async with pool.acquire() as conn:
        # Check if a decision already exists for this case
        existing = await conn.fetchrow(
            "SELECT id, version FROM decisions WHERE case_id = $1 ORDER BY version DESC LIMIT 1",
            case_id,
        )
        version = (existing["version"] + 1) if existing else 1

        await conn.execute(
            """INSERT INTO decisions (id, case_id, version, outcome, outcome_summary,
                                     outcome_reasoning, direction_doc)
               VALUES ($1, $2, $3, $4, $5, $6, $7)""",
            decision_id, case_id, version, outcome, outcome_summary,
            outcome_reasoning, json.dumps(direction_doc) if direction_doc else None,
        )
    return await get_decision(decision_id)


async def get_decision(decision_id: UUID) -> dict | None:
    pool = await get_pool()
    async with pool.acquire() as conn:
        row = await conn.fetchrow("SELECT * FROM decisions WHERE id = $1", decision_id)
    if not row:
        return None
    d = dict(row)
    d["id"] = str(d["id"])
    d["case_id"] = str(d["case_id"])
    if isinstance(d.get("direction_doc"), str):
        d["direction_doc"] = json.loads(d["direction_doc"])
    if isinstance(d.get("panel_members"), str):
        d["panel_members"] = json.loads(d["panel_members"])
    return d


async def get_decision_by_case(case_id: UUID) -> dict | None:
    """Get the latest decision for a case."""
    pool = await get_pool()
    async with pool.acquire() as conn:
        row = await conn.fetchrow(
            "SELECT * FROM decisions WHERE case_id = $1 ORDER BY version DESC LIMIT 1",
            case_id,
        )
    if not row:
        return None
    d = dict(row)
    d["id"] = str(d["id"])
    d["case_id"] = str(d["case_id"])
    if isinstance(d.get("direction_doc"), str):
        d["direction_doc"] = json.loads(d["direction_doc"])
    if isinstance(d.get("panel_members"), str):
        d["panel_members"] = json.loads(d["panel_members"])
    return d


async def get_critical_qa_failures(case_id: UUID) -> list[dict]:
    """Return critical-severity failures from the case's latest QA run.

    ``qa_results`` is cleared+rewritten per ``validate_decision`` run, so the
    current rows for a ``case_id`` ARE the latest run. Returns rows where
    ``severity='critical' AND passed=false``. Callers distinguish "no QA run
    yet" (no rows at all) via ``qa_run_exists`` below.
    """
    pool = await get_pool()
    async with pool.acquire() as conn:
        rows = await conn.fetch(
            """SELECT check_name, severity, passed, errors
               FROM qa_results
               WHERE case_id = $1 AND severity = 'critical' AND passed = false
               ORDER BY check_name""",
            case_id,
        )
    return [dict(r) for r in rows]


async def qa_run_exists(case_id: UUID) -> bool:
    """True if a QA run has ever been recorded for this case (any rows)."""
    pool = await get_pool()
    async with pool.acquire() as conn:
        n = await conn.fetchval(
            "SELECT count(*) FROM qa_results WHERE case_id = $1",
            case_id,
        )
    return bool(n)


async def update_decision(decision_id: UUID, **fields) -> None:
    if not fields:
        return
    pool = await get_pool()
    set_clauses = []
    values = []
    for i, (key, val) in enumerate(fields.items(), start=2):
        if key in ("direction_doc", "panel_members") and isinstance(val, (dict, list)):
            val = json.dumps(val)
        set_clauses.append(f"{key} = ${i}")
        values.append(val)
    set_clauses.append("updated_at = now()")
    sql = f"UPDATE decisions SET {', '.join(set_clauses)} WHERE id = $1"
    async with pool.acquire() as conn:
        await conn.execute(sql, decision_id, *values)


# ── Document deletion ──────────────────────────────────────────────

async def delete_document(doc_id: UUID) -> bool:
    """Delete a document and all its chunks. Returns True if deleted."""
    pool = await get_pool()
    async with pool.acquire() as conn:
        async with conn.transaction():
            await conn.execute(
                "DELETE FROM document_chunks WHERE document_id = $1", doc_id
            )
            result = await conn.execute(
                "DELETE FROM documents WHERE id = $1", doc_id
            )
            return int(result.split()[-1]) > 0


# ── Chunks & Vectors ───────────────────────────────────────────────

async def delete_document_chunks(document_id: UUID) -> int:
    """Delete all chunks for a document (used before reprocessing)."""
    pool = await get_pool()
    async with pool.acquire() as conn:
        result = await conn.execute(
            "DELETE FROM document_chunks WHERE document_id = $1", document_id
        )
        return int(result.split()[-1])  # e.g. "DELETE 5" -> 5


async def store_chunks(
    document_id: UUID,
    case_id: UUID | None,
    chunks: list[dict],
) -> int:
    """Store document chunks with embeddings. Each chunk dict has:
    content, section_type, embedding (list[float]), page_number, chunk_index
    """
    pool = await get_pool()
    async with pool.acquire() as conn:
        # Delete existing chunks for this document
        await conn.execute(
            "DELETE FROM document_chunks WHERE document_id = $1", document_id
        )
        for chunk in chunks:
            await conn.execute(
                """INSERT INTO document_chunks
                   (document_id, case_id, chunk_index, content, section_type, embedding, page_number)
                   VALUES ($1, $2, $3, $4, $5, $6, $7)""",
                document_id, case_id,
                chunk["chunk_index"],
                chunk["content"],
                chunk.get("section_type", "other"),
                chunk["embedding"],
                chunk.get("page_number"),
            )
    return len(chunks)


async def search_similar(
    query_embedding: list[float],
    limit: int = 10,
    case_id: UUID | None = None,
    section_type: str | None = None,
    practice_area: str | None = None,
    appeal_subtype: str | None = None,
) -> list[dict]:
    """Cosine similarity search on document chunks."""
    pool = await get_pool()
    conditions = []
    params: list = [query_embedding, limit]
    param_idx = 3

    if case_id:
        conditions.append(f"dc.case_id = ${param_idx}")
        params.append(case_id)
        param_idx += 1
    if section_type:
        conditions.append(f"dc.section_type = ${param_idx}")
        params.append(section_type)
        param_idx += 1
    if practice_area:
        conditions.append(f"c.practice_area = ${param_idx}")
        params.append(practice_area)
        param_idx += 1
    if appeal_subtype:
        conditions.append(f"c.appeal_subtype = ${param_idx}")
        params.append(appeal_subtype)
        param_idx += 1

    where = f"WHERE {' AND '.join(conditions)}" if conditions else ""

    sql = f"""
        SELECT dc.content, dc.section_type, dc.page_number,
               dc.document_id, dc.case_id,
               d.title AS document_title,
               c.case_number,
               1 - (dc.embedding <=> $1) AS score
        FROM document_chunks dc
        JOIN documents d ON d.id = dc.document_id
        JOIN cases c ON c.id = dc.case_id
        {where}
        ORDER BY dc.embedding <=> $1
        LIMIT $2
    """
    async with pool.acquire() as conn:
        rows = await conn.fetch(sql, *params)
    return [dict(r) for r in rows]


# ── Style corpus ────────────────────────────────────────────────────

async def add_to_style_corpus(
    document_id: UUID | None,
    decision_number: str,
    decision_date: date | None,
    subject_categories: list[str],
    full_text: str,
    summary: str = "",
    outcome: str = "",
    key_principles: list[str] | None = None,
    practice_area: str = "appeals_committee",
    appeal_subtype: str = "",
) -> UUID:
    pool = await get_pool()
    corpus_id = uuid4()
    async with pool.acquire() as conn:
        await conn.execute(
            """INSERT INTO style_corpus
               (id, document_id, decision_number, decision_date,
                subject_categories, full_text, summary, outcome, key_principles,
                practice_area, appeal_subtype)
               VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)""",
            corpus_id, document_id, decision_number, decision_date,
            json.dumps(subject_categories), full_text, summary, outcome,
            json.dumps(key_principles or []),
            practice_area, appeal_subtype,
        )
    return corpus_id


async def delete_from_style_corpus(corpus_id: UUID) -> dict:
    """Remove a decision from style_corpus + related documents (cascades chunks).

    Also tries to delete the [קורפוס] document associated by title match,
    since the current training pipeline inserts style_corpus with document_id=NULL.
    """
    pool = await get_pool()
    async with pool.acquire() as conn:
        async with conn.transaction():
            row = await conn.fetchrow(
                "DELETE FROM style_corpus WHERE id = $1 "
                "RETURNING decision_number, document_id",
                corpus_id,
            )
            if not row:
                return {"deleted": False, "reason": "not found"}

            docs_deleted = 0
            if row["document_id"]:
                await conn.execute(
                    "DELETE FROM documents WHERE id = $1", row["document_id"]
                )
                docs_deleted = 1
            else:
                # Best-effort: match a [קורפוס] document by the decision_number
                # in its title. Only for single, unambiguous matches.
                if row["decision_number"]:
                    docs = await conn.fetch(
                        "SELECT id FROM documents "
                        "WHERE case_id IS NULL AND title LIKE $1",
                        f"%{row['decision_number']}%",
                    )
                    if len(docs) == 1:
                        await conn.execute(
                            "DELETE FROM documents WHERE id = $1", docs[0]["id"]
                        )
                        docs_deleted = 1

    return {
        "deleted": True,
        "decision_number": row["decision_number"],
        "docs_deleted": docs_deleted,
    }


async def get_style_corpus_row(corpus_id: UUID) -> dict | None:
    """Return a single style_corpus row by id, or None if missing."""
    pool = await get_pool()
    async with pool.acquire() as conn:
        row = await conn.fetchrow(
            """
            SELECT id, document_id, decision_number, decision_date,
                   subject_categories, full_text, summary, outcome,
                   key_principles, practice_area, appeal_subtype, created_at
            FROM style_corpus WHERE id = $1
            """,
            corpus_id,
        )
    return dict(row) if row else None


async def update_style_corpus_metadata(
    corpus_id: UUID,
    *,
    summary: str | None = None,
    outcome: str | None = None,
    key_principles: list[str] | None = None,
    appeal_subtype: str | None = None,
    practice_area: str | None = None,
    overwrite: bool = False,
) -> dict:
    """Patch the enriched-metadata columns of a style_corpus row.

    By default, only empty columns are filled — passing ``overwrite=True``
    is the caller's signal that they intentionally want to replace existing
    values (used by the re-extract flow when the chair runs it manually).
    """
    pool = await get_pool()
    async with pool.acquire() as conn:
        existing = await conn.fetchrow(
            "SELECT summary, outcome, key_principles, appeal_subtype, practice_area "
            "FROM style_corpus WHERE id = $1",
            corpus_id,
        )
        if not existing:
            return {"updated": False, "reason": "not found"}

        sets: dict = {}
        if summary is not None and (overwrite or not (existing["summary"] or "").strip()):
            sets["summary"] = summary
        if outcome is not None and (overwrite or not (existing["outcome"] or "").strip()):
            sets["outcome"] = outcome
        if key_principles is not None:
            current = existing["key_principles"]
            if isinstance(current, str):
                try:
                    current = json.loads(current)
                except json.JSONDecodeError:
                    current = []
            if overwrite or not (current or []):
                sets["key_principles"] = json.dumps(key_principles)
        if appeal_subtype is not None and (overwrite or not (existing["appeal_subtype"] or "").strip()):
            sets["appeal_subtype"] = appeal_subtype
        if practice_area is not None and (overwrite or not (existing["practice_area"] or "").strip()):
            sets["practice_area"] = practice_area

        if not sets:
            return {"updated": False, "reason": "nothing to update", "fields": []}

        cols = list(sets.keys())
        set_clause = ", ".join(f"{c} = ${i + 2}" for i, c in enumerate(cols))
        values = [sets[c] for c in cols]
        await conn.execute(
            f"UPDATE style_corpus SET {set_clause} WHERE id = $1",
            corpus_id, *values,
        )
        return {"updated": True, "fields": cols}


# ── decision_lessons (per-corpus row notes) ────────────────────────


async def list_decision_lessons(corpus_id: UUID) -> list[dict]:
    pool = await get_pool()
    async with pool.acquire() as conn:
        rows = await conn.fetch(
            "SELECT id, style_corpus_id, lesson_text, category, source, "
            "       applied_to_skill, created_by, created_at, updated_at "
            "FROM decision_lessons WHERE style_corpus_id = $1 "
            "ORDER BY created_at DESC",
            corpus_id,
        )
    return [dict(r) for r in rows]


async def add_decision_lesson(
    corpus_id: UUID,
    *,
    lesson_text: str,
    category: str = "general",
    source: str = "manual",
    created_by: str = "chaim",
) -> dict:
    pool = await get_pool()
    async with pool.acquire() as conn:
        row = await conn.fetchrow(
            "INSERT INTO decision_lessons "
            "(style_corpus_id, lesson_text, category, source, created_by) "
            "VALUES ($1, $2, $3, $4, $5) "
            "RETURNING id, style_corpus_id, lesson_text, category, source, "
            "          applied_to_skill, created_by, created_at, updated_at",
            corpus_id, lesson_text, category, source, created_by,
        )
    return dict(row) if row else {}


async def update_decision_lesson(
    lesson_id: UUID,
    *,
    lesson_text: str | None = None,
    category: str | None = None,
    applied_to_skill: bool | None = None,
) -> dict:
    sets: dict = {}
    if lesson_text is not None:
        sets["lesson_text"] = lesson_text
    if category is not None:
        sets["category"] = category
    if applied_to_skill is not None:
        sets["applied_to_skill"] = applied_to_skill
    if not sets:
        return {"updated": False, "reason": "nothing to update"}
    sets["updated_at"] = "now()"  # sentinel — replaced inline below
    cols = [c for c in sets if c != "updated_at"]
    set_clause = ", ".join(f"{c} = ${i + 2}" for i, c in enumerate(cols))
    set_clause += ", updated_at = now()"
    values = [sets[c] for c in cols]
    pool = await get_pool()
    async with pool.acquire() as conn:
        row = await conn.fetchrow(
            f"UPDATE decision_lessons SET {set_clause} WHERE id = $1 "
            f"RETURNING id, style_corpus_id, lesson_text, category, source, "
            f"          applied_to_skill, updated_at",
            lesson_id, *values,
        )
    if not row:
        return {"updated": False, "reason": "not found"}
    return {"updated": True, **dict(row)}


async def delete_decision_lesson(lesson_id: UUID) -> dict:
    pool = await get_pool()
    async with pool.acquire() as conn:
        result = await conn.execute(
            "DELETE FROM decision_lessons WHERE id = $1", lesson_id,
        )
    # asyncpg returns "DELETE n"
    deleted = result.split(" ", 1)[1].strip() if " " in result else "0"
    return {"deleted": deleted != "0"}


async def count_decision_lessons_per_corpus() -> dict[str, int]:
    """Map style_corpus.id (str) → lesson count, for badge display in the list."""
    pool = await get_pool()
    async with pool.acquire() as conn:
        rows = await conn.fetch(
            "SELECT style_corpus_id, count(*) AS n "
            "FROM decision_lessons GROUP BY style_corpus_id"
        )
    return {str(r["style_corpus_id"]): r["n"] for r in rows}


# ── chat (style agent conversations) ───────────────────────────────


async def create_chat_conversation(
    *,
    title: str = "שיחה חדשה",
    style_corpus_id: UUID | None = None,
    system_prompt_version: str = "v1",
) -> dict:
    pool = await get_pool()
    async with pool.acquire() as conn:
        row = await conn.fetchrow(
            "INSERT INTO chat_conversations "
            "(title, style_corpus_id, system_prompt_version) "
            "VALUES ($1, $2, $3) "
            "RETURNING id, title, style_corpus_id, claude_session_id, "
            "          system_prompt_version, created_at, last_message_at",
            title, style_corpus_id, system_prompt_version,
        )
    return dict(row) if row else {}


async def list_chat_conversations(limit: int = 50) -> list[dict]:
    pool = await get_pool()
    async with pool.acquire() as conn:
        rows = await conn.fetch(
            """
            SELECT c.id, c.title, c.style_corpus_id, c.claude_session_id,
                   c.created_at, c.last_message_at,
                   sc.decision_number,
                   (SELECT count(*) FROM chat_messages m WHERE m.conversation_id = c.id) AS message_count
            FROM chat_conversations c
            LEFT JOIN style_corpus sc ON sc.id = c.style_corpus_id
            ORDER BY c.last_message_at DESC NULLS LAST
            LIMIT $1
            """,
            limit,
        )
    return [dict(r) for r in rows]


async def get_chat_conversation(conv_id: UUID) -> dict | None:
    pool = await get_pool()
    async with pool.acquire() as conn:
        row = await conn.fetchrow(
            "SELECT id, title, style_corpus_id, claude_session_id, "
            "       system_prompt_version, created_at, last_message_at "
            "FROM chat_conversations WHERE id = $1",
            conv_id,
        )
    return dict(row) if row else None


async def delete_chat_conversation(conv_id: UUID) -> dict:
    pool = await get_pool()
    async with pool.acquire() as conn:
        result = await conn.execute(
            "DELETE FROM chat_conversations WHERE id = $1", conv_id,
        )
    deleted = result.split(" ", 1)[1].strip() if " " in result else "0"
    return {"deleted": deleted != "0"}


async def update_chat_conversation_session_id(
    conv_id: UUID, claude_session_id: str,
) -> None:
    pool = await get_pool()
    async with pool.acquire() as conn:
        await conn.execute(
            "UPDATE chat_conversations SET claude_session_id = $1, "
            "       last_message_at = now() "
            "WHERE id = $2",
            claude_session_id, conv_id,
        )


async def add_chat_message(
    conv_id: UUID,
    *,
    role: str,
    content: str,
    raw_events: list | None = None,
) -> dict:
    pool = await get_pool()
    async with pool.acquire() as conn:
        row = await conn.fetchrow(
            "INSERT INTO chat_messages "
            "(conversation_id, role, content, raw_events) "
            "VALUES ($1, $2, $3, $4) "
            "RETURNING id, conversation_id, role, content, created_at",
            conv_id, role, content, json.dumps(raw_events or []),
        )
        await conn.execute(
            "UPDATE chat_conversations SET last_message_at = now() WHERE id = $1",
            conv_id,
        )
    return dict(row) if row else {}


async def list_chat_messages(conv_id: UUID) -> list[dict]:
    pool = await get_pool()
    async with pool.acquire() as conn:
        rows = await conn.fetch(
            "SELECT id, role, content, created_at "
            "FROM chat_messages WHERE conversation_id = $1 "
            "ORDER BY created_at ASC",
            conv_id,
        )
    return [dict(r) for r in rows]


async def get_style_patterns(pattern_type: str | None = None) -> list[dict]:
    pool = await get_pool()
    async with pool.acquire() as conn:
        if pattern_type:
            rows = await conn.fetch(
                "SELECT * FROM style_patterns WHERE pattern_type = $1 ORDER BY frequency DESC",
                pattern_type,
            )
        else:
            rows = await conn.fetch(
                "SELECT * FROM style_patterns ORDER BY pattern_type, frequency DESC"
            )
    return [dict(r) for r in rows]


async def get_methodology_overrides(category: str) -> dict:
    """Chair's /methodology edits for one category (golden_ratios / discussion_rules /
    content_checklists). Returns {rule_key: parsed_value}. These OVERRIDE the hardcoded
    lessons.py defaults — the writer must consume them (T15 / INV-LRN4). Mirrors the merge
    in GET /api/methodology/{category}."""
    pool = await get_pool()
    async with pool.acquire() as conn:
        rows = await conn.fetch(
            "SELECT rule_key, rule_value FROM appeal_type_rules "
            "WHERE appeal_type = '_global' AND rule_category = $1",
            category,
        )
    out: dict = {}
    for r in rows:
        raw = r["rule_value"]
        if isinstance(raw, str):
            try:
                raw = json.loads(raw)
            except (json.JSONDecodeError, TypeError):
                pass
        out[r["rule_key"]] = raw
    return out


async def get_recent_decision_lessons(limit: int = 15, practice_area: str = "") -> list[dict]:
    """Per-decision learnings the chair/curator attached in /training (decision_lessons),
    so the writer consumes them too (T15). Prefers style/structure/lexicon, recent first."""
    pool = await get_pool()
    async with pool.acquire() as conn:
        rows = await conn.fetch(
            """SELECT dl.lesson_text, dl.category, dl.source,
                      sc.decision_number, sc.practice_area
               FROM decision_lessons dl
               JOIN style_corpus sc ON sc.id = dl.style_corpus_id
               WHERE ($2 = '' OR sc.practice_area = $2)
               ORDER BY dl.created_at DESC
               LIMIT $1""",
            limit, practice_area,
        )
    return [dict(r) for r in rows]


async def create_draft_final_pair(case_id: UUID, draft_text: str, final_path: str = "") -> str:
    """Capture the draft↔final pairing at mark-final (T5 / INV-LRN4). Immutable draft
    snapshot; final_text/diff_stats/analysis filled later by the curator distillation."""
    pool = await get_pool()
    async with pool.acquire() as conn:
        row = await conn.fetchrow(
            """INSERT INTO draft_final_pairs (case_id, draft_text, final_path, status)
               VALUES ($1, $2, $3, 'final_received') RETURNING id""",
            case_id, draft_text, final_path,
        )
    return str(row["id"])


async def update_draft_final_pair(
    pair_id: UUID,
    final_text: str | None = None,
    diff_stats: dict | None = None,
    analysis: dict | None = None,
    status: str | None = None,
) -> None:
    """Advance a pairing row (curator distillation): final_text → diff_stats → analysis → status."""
    sets, params, idx = [], [], 1
    if final_text is not None:
        sets.append(f"final_text = ${idx}"); params.append(final_text); idx += 1
    if diff_stats is not None:
        sets.append(f"diff_stats = ${idx}::jsonb"); params.append(json.dumps(diff_stats, ensure_ascii=False)); idx += 1
    if analysis is not None:
        sets.append(f"analysis = ${idx}::jsonb"); params.append(json.dumps(analysis, ensure_ascii=False)); idx += 1
    if status is not None:
        sets.append(f"status = ${idx}"); params.append(status); idx += 1
    if not sets:
        return
    sets.append("updated_at = now()")
    params.append(pair_id)
    pool = await get_pool()
    async with pool.acquire() as conn:
        await conn.execute(
            f"UPDATE draft_final_pairs SET {', '.join(sets)} WHERE id = ${idx}", *params,
        )


async def list_draft_final_pairs(status: str | None = None, limit: int = 200) -> list[dict]:
    """Reconciliation ledger: all decisions paired with their final + status."""
    pool = await get_pool()
    async with pool.acquire() as conn:
        if status:
            rows = await conn.fetch(
                """SELECT p.id, p.case_id, c.case_number, c.title, p.status,
                          p.diff_stats, p.created_at, p.updated_at
                   FROM draft_final_pairs p LEFT JOIN cases c ON c.id = p.case_id
                   WHERE p.status = $1 ORDER BY p.created_at DESC LIMIT $2""",
                status, limit,
            )
        else:
            rows = await conn.fetch(
                """SELECT p.id, p.case_id, c.case_number, c.title, p.status,
                          p.diff_stats, p.created_at, p.updated_at
                   FROM draft_final_pairs p LEFT JOIN cases c ON c.id = p.case_id
                   ORDER BY p.created_at DESC LIMIT $1""",
                limit,
            )
    return [dict(r) for r in rows]


async def insert_style_exemplar(
    decision_number: str, source: str, practice_area: str, outcome: str,
    section: str, paragraph_text: str, word_count: int, embedding: list[float],
) -> None:
    """Insert one block-level style exemplar (T1 backfill)."""
    pool = await get_pool()
    async with pool.acquire() as conn:
        await conn.execute(
            """INSERT INTO style_exemplars
                   (decision_number, source, practice_area, outcome, section,
                    paragraph_text, word_count, embedding)
               VALUES ($1, $2, $3, $4, $5, $6, $7, $8)""",
            decision_number, source, practice_area, outcome, section,
            paragraph_text, word_count, embedding,
        )


async def delete_style_exemplars(decision_number: str, source: str) -> int:
    """Idempotent backfill: clear a decision's exemplars before re-inserting."""
    pool = await get_pool()
    async with pool.acquire() as conn:
        res = await conn.execute(
            "DELETE FROM style_exemplars WHERE decision_number = $1 AND source = $2",
            decision_number, source,
        )
    try:
        return int(res.split()[-1])
    except (ValueError, IndexError):
        return 0


async def search_style_exemplars(
    query_embedding: list[float],
    section: str | None = None,
    outcome: str | None = None,
    practice_area: str | None = None,
    limit: int = 6,
) -> list[dict]:
    """Retrieve Dafna's own block-level paragraphs as STYLE exemplars (T2).
    Filters by section (block) + optionally outcome/practice_area for the closest
    match to the block being written. Soft filters: outcome/practice_area narrow but
    never zero-out — section is the hard filter."""
    pool = await get_pool()
    conditions, params, idx = [], [query_embedding, limit], 3
    if section:
        conditions.append(f"section = ${idx}"); params.append(section); idx += 1
    if outcome:
        conditions.append(f"(outcome = ${idx} OR outcome = '')"); params.append(outcome); idx += 1
    if practice_area:
        conditions.append(f"(practice_area = ${idx} OR practice_area = '')"); params.append(practice_area); idx += 1
    where = f"WHERE {' AND '.join(conditions)}" if conditions else ""
    sql = f"""
        SELECT decision_number, source, section, outcome, practice_area,
               paragraph_text, word_count,
               1 - (embedding <=> $1) AS score
        FROM style_exemplars
        {where}
        ORDER BY embedding <=> $1
        LIMIT $2
    """
    async with pool.acquire() as conn:
        rows = await conn.fetch(sql, *params)
    return [dict(r) for r in rows]


async def count_style_exemplars() -> dict:
    """Coverage check for the backfill."""
    pool = await get_pool()
    async with pool.acquire() as conn:
        total = await conn.fetchval("SELECT count(*) FROM style_exemplars")
        by_section = await conn.fetch(
            "SELECT section, count(*) AS n FROM style_exemplars GROUP BY section ORDER BY n DESC"
        )
        decisions = await conn.fetchval(
            "SELECT count(DISTINCT decision_number) FROM style_exemplars"
        )
    return {"total": total, "decisions": decisions, "by_section": [dict(r) for r in by_section]}


async def upsert_style_pattern(
    pattern_type: str,
    pattern_text: str,
    context: str = "",
    examples: list[str] | None = None,
    appeal_subtype: str = "",
) -> None:
    pool = await get_pool()
    async with pool.acquire() as conn:
        existing = await conn.fetchrow(
            "SELECT id, frequency FROM style_patterns "
            "WHERE pattern_type = $1 AND pattern_text = $2 AND appeal_subtype = $3",
            pattern_type, pattern_text, appeal_subtype,
        )
        if existing:
            await conn.execute(
                "UPDATE style_patterns SET frequency = frequency + 1 WHERE id = $1",
                existing["id"],
            )
        else:
            await conn.execute(
                """INSERT INTO style_patterns (pattern_type, pattern_text, context, examples, appeal_subtype)
                   VALUES ($1, $2, $3, $4, $5)""",
                pattern_type, pattern_text, context,
                json.dumps(examples or []),
                appeal_subtype,
            )


async def clear_style_patterns(appeal_subtype: str = "") -> None:
    """Delete style patterns, optionally filtered by appeal_subtype.

    Empty appeal_subtype = delete ALL patterns.
    """
    pool = await get_pool()
    async with pool.acquire() as conn:
        if appeal_subtype:
            await conn.execute(
                "DELETE FROM style_patterns WHERE appeal_subtype = $1", appeal_subtype
            )
        else:
            await conn.execute("DELETE FROM style_patterns")


# ── Semantic Search (V2 — decision blocks & case law) ─────────────

async def search_similar_paragraphs(
    query_embedding: list[float],
    limit: int = 10,
    block_type: str | None = None,
) -> list[dict]:
    """Search decision paragraphs by semantic similarity."""
    pool = await get_pool()
    conditions = []
    params: list = [query_embedding, limit]
    param_idx = 3

    if block_type:
        conditions.append(f"db.block_id = ${param_idx}")
        params.append(block_type)
        param_idx += 1

    where = f"WHERE {' AND '.join(conditions)}" if conditions else ""

    sql = f"""
        SELECT dp.content, dp.word_count, dp.paragraph_number,
               db.block_id AS block_type, db.title AS block_title,
               c.case_number, c.title AS case_title,
               d.outcome, d.author,
               1 - (pe.embedding <=> $1) AS score
        FROM paragraph_embeddings pe
        JOIN decision_paragraphs dp ON dp.id = pe.paragraph_id
        JOIN decision_blocks db ON db.id = dp.block_id
        JOIN decisions d ON d.id = db.decision_id
        JOIN cases c ON c.id = d.case_id
        {where}
        ORDER BY pe.embedding <=> $1
        LIMIT $2
    """
    async with pool.acquire() as conn:
        rows = await conn.fetch(sql, *params)
    return [dict(r) for r in rows]


async def search_similar_case_law(
    query_embedding: list[float],
    limit: int = 5,
) -> list[dict]:
    """Search case law by semantic similarity."""
    pool = await get_pool()
    sql = """
        SELECT cl.case_number, cl.case_name, cl.court, cl.summary,
               cl.key_quote, cl.subject_tags,
               cle.chunk_text,
               1 - (cle.embedding <=> $1) AS score
        FROM case_law_embeddings cle
        JOIN case_law cl ON cl.id = cle.case_law_id
        ORDER BY cle.embedding <=> $1
        LIMIT $2
    """
    async with pool.acquire() as conn:
        rows = await conn.fetch(sql, query_embedding, limit)
    results = []
    for r in rows:
        d = dict(r)
        if isinstance(d.get("subject_tags"), str):
            d["subject_tags"] = json.loads(d["subject_tags"])
        results.append(d)
    return results


async def search_precedents(
    query_embedding: list[float],
    limit: int = 10,
) -> list[dict]:
    """Combined search: paragraphs + case law, ranked by score."""
    paragraphs = await search_similar_paragraphs(query_embedding, limit=limit)
    case_law = await search_similar_case_law(query_embedding, limit=limit)

    # Combine and sort by score
    results = []
    for p in paragraphs:
        results.append({
            "type": "decision_paragraph",
            "score": float(p["score"]),
            "case_number": p["case_number"],
            "case_title": p["case_title"],
            "block_type": p["block_type"],
            "content": p["content"][:500],
            "author": p["author"],
        })
    for c in case_law:
        results.append({
            "type": "case_law",
            "score": float(c["score"]),
            "case_number": c["case_number"],
            "case_name": c["case_name"],
            "court": c["court"],
            "content": c["summary"],
        })

    results.sort(key=lambda x: x["score"], reverse=True)
    return results[:limit]


# ── Case precedents (CRUD) ────────────────────────────────────────


async def create_case_precedent(
    case_id: UUID,
    quote: str,
    citation: str,
    section_id: str | None = None,
    chair_note: str = "",
    pdf_document_id: UUID | None = None,
    practice_area: str | None = None,
) -> dict:
    """Insert a new precedent attached to a case."""
    pool = await get_pool()
    row = await pool.fetchrow(
        """
        INSERT INTO case_precedents
            (case_id, section_id, quote, citation, chair_note, pdf_document_id, practice_area)
        VALUES ($1, $2, $3, $4, $5, $6, $7)
        RETURNING *
        """,
        case_id, section_id, quote, citation, chair_note, pdf_document_id, practice_area,
    )
    return dict(row)


async def list_case_precedents(case_id: UUID) -> list[dict]:
    """List all precedents attached to a case, ordered by section then creation time."""
    pool = await get_pool()
    rows = await pool.fetch(
        """
        SELECT id, case_id, section_id, quote, citation, chair_note,
               pdf_document_id, practice_area, created_at, updated_at
        FROM case_precedents
        WHERE case_id = $1
        ORDER BY section_id NULLS LAST, created_at
        """,
        case_id,
    )
    return [dict(r) for r in rows]


async def delete_case_precedent(precedent_id: UUID) -> bool:
    """Delete a precedent attachment by ID. Returns True if deleted."""
    pool = await get_pool()
    result = await pool.execute(
        "DELETE FROM case_precedents WHERE id = $1", precedent_id
    )
    return result == "DELETE 1"


async def search_precedent_library(
    query: str, practice_area: str = "", limit: int = 10,
) -> list[dict]:
    """Search all precedents across cases by citation or quote text."""
    pool = await get_pool()
    pattern = f"%{query}%"
    if practice_area:
        rows = await pool.fetch(
            """
            SELECT id, case_id, section_id, quote, citation, chair_note,
                   practice_area, created_at
            FROM case_precedents
            WHERE (citation ILIKE $1 OR quote ILIKE $1)
              AND practice_area = $2
            ORDER BY created_at DESC
            LIMIT $3
            """,
            pattern, practice_area, limit,
        )
    else:
        rows = await pool.fetch(
            """
            SELECT id, case_id, section_id, quote, citation, chair_note,
                   practice_area, created_at
            FROM case_precedents
            WHERE citation ILIKE $1 OR quote ILIKE $1
            ORDER BY created_at DESC
            LIMIT $2
            """,
            pattern, limit,
        )
    return [dict(r) for r in rows]


# ── Chair feedback ────────────────────────────────────────────────

async def record_chair_feedback(
    case_id: UUID | None,
    block_id: str,
    feedback_text: str,
    category: str = "other",
    lesson_extracted: str = "",
) -> UUID:
    """Record feedback from the chair (Dafna) on a draft block."""
    pool = await get_pool()
    feedback_id = uuid4()
    async with pool.acquire() as conn:
        await conn.execute(
            """INSERT INTO chair_feedback
               (id, case_id, block_id, feedback_text, category, lesson_extracted)
               VALUES ($1, $2, $3, $4, $5, $6)""",
            feedback_id, case_id, block_id, feedback_text, category,
            lesson_extracted,
        )
    return feedback_id


async def list_chair_feedback(
    case_id: UUID | None = None,
    category: str | None = None,
    unresolved_only: bool = False,
    limit: int = 100,
) -> list[dict]:
    """List chair feedback, optionally filtered. Capped by limit (INV-TOOL5 / GAP-53)."""
    pool = await get_pool()
    conditions = []
    params: list = []
    idx = 1

    if case_id:
        conditions.append(f"case_id = ${idx}")
        params.append(case_id)
        idx += 1
    if category:
        conditions.append(f"category = ${idx}")
        params.append(category)
        idx += 1
    if unresolved_only:
        conditions.append("resolved = FALSE")

    where = f"WHERE {' AND '.join(conditions)}" if conditions else ""
    params.append(max(1, int(limit)))
    async with pool.acquire() as conn:
        rows = await conn.fetch(
            f"SELECT * FROM chair_feedback {where} ORDER BY created_at DESC LIMIT ${idx}",
            *params,
        )
    return [dict(r) for r in rows]


async def get_chair_feedback(feedback_id: UUID) -> dict | None:
    """Return a single chair_feedback row by id (with case_number), or None."""
    pool = await get_pool()
    async with pool.acquire() as conn:
        row = await conn.fetchrow(
            """SELECT cf.*, c.case_number, c.appeal_type AS case_appeal_type
               FROM chair_feedback cf
               LEFT JOIN cases c ON c.id = cf.case_id
               WHERE cf.id = $1""",
            feedback_id,
        )
    return dict(row) if row else None


async def resolve_chair_feedback(
    feedback_id: UUID,
    applied_to: list[str],
) -> None:
    """Mark feedback as resolved and record where it was applied."""
    pool = await get_pool()
    async with pool.acquire() as conn:
        await conn.execute(
            """UPDATE chair_feedback
               SET resolved = TRUE, applied_to = $2
               WHERE id = $1""",
            feedback_id, applied_to,
        )


# ── Appraiser facts (V5 — for interim drafts) ─────────────────────

async def replace_appraiser_facts(
    case_id: UUID,
    document_id: UUID,
    facts: list[dict],
) -> int:
    """Replace all appraiser_facts for a given document.

    Each fact dict: appraiser_name, appraiser_side, fact_type ('plan'|'permit'),
    identifier, details (dict), page_number (optional).
    """
    pool = await get_pool()
    async with pool.acquire() as conn:
        async with conn.transaction():
            await conn.execute(
                "DELETE FROM appraiser_facts WHERE document_id = $1", document_id,
            )
            for f in facts:
                await conn.execute(
                    """INSERT INTO appraiser_facts
                       (case_id, document_id, appraiser_name, appraiser_side,
                        fact_type, identifier, details, page_number)
                       VALUES ($1, $2, $3, $4, $5, $6, $7, $8)""",
                    case_id, document_id,
                    f["appraiser_name"],
                    f.get("appraiser_side", ""),
                    f["fact_type"],
                    f["identifier"],
                    json.dumps(f.get("details", {}), ensure_ascii=False),
                    f.get("page_number"),
                )
    return len(facts)


async def list_appraiser_facts(
    case_id: UUID,
    fact_type: str | None = None,
) -> list[dict]:
    """List appraiser_facts for a case, optionally filtered by fact_type."""
    pool = await get_pool()
    async with pool.acquire() as conn:
        if fact_type:
            rows = await conn.fetch(
                """SELECT * FROM appraiser_facts
                   WHERE case_id = $1 AND fact_type = $2
                   ORDER BY identifier, appraiser_name""",
                case_id, fact_type,
            )
        else:
            rows = await conn.fetch(
                """SELECT * FROM appraiser_facts
                   WHERE case_id = $1
                   ORDER BY fact_type, identifier, appraiser_name""",
                case_id,
            )
    results = []
    for r in rows:
        d = dict(r)
        d["id"] = str(d["id"])
        d["case_id"] = str(d["case_id"])
        d["document_id"] = str(d["document_id"])
        if isinstance(d.get("details"), str):
            d["details"] = json.loads(d["details"])
        results.append(d)
    return results


async def detect_appraiser_conflicts(case_id: UUID) -> list[dict]:
    """Detect conflicts: identifiers cited by 2+ different appraisers in this case.

    A conflict exists when the SAME identifier (e.g., "תמ"א 38") was reported
    differently by two appraisers — different details, or one cited it and the
    other did not. Returns list of conflict groups. Each entry in a group
    carries the appraiser's side so the caller can label it as committee /
    appellant / deciding.
    """
    pool = await get_pool()
    async with pool.acquire() as conn:
        rows = await conn.fetch(
            """SELECT identifier, fact_type,
                      json_agg(jsonb_build_object(
                          'appraiser_name', appraiser_name,
                          'appraiser_side', appraiser_side,
                          'details', details,
                          'page_number', page_number,
                          'document_id', document_id
                      ) ORDER BY
                          CASE appraiser_side
                              WHEN 'committee' THEN 1
                              WHEN 'appellant' THEN 2
                              WHEN 'deciding'  THEN 3
                              ELSE 4
                          END,
                          appraiser_name
                      ) AS entries,
                      COUNT(DISTINCT appraiser_name) AS n_appraisers
               FROM appraiser_facts
               WHERE case_id = $1
               GROUP BY identifier, fact_type
               HAVING COUNT(DISTINCT appraiser_name) > 1""",
            case_id,
        )
    conflicts = []
    for r in rows:
        entries = r["entries"]
        if isinstance(entries, str):
            entries = json.loads(entries)
        # Parse nested details if still strings
        for e in entries:
            if isinstance(e.get("details"), str):
                e["details"] = json.loads(e["details"])
        conflicts.append({
            "identifier": r["identifier"],
            "fact_type": r["fact_type"],
            "n_appraisers": r["n_appraisers"],
            "entries": entries,
        })
    return conflicts


# ── V7: External precedent library + halachot ─────────────────────


def _row_to_case_law(row: asyncpg.Record) -> dict:
    """Normalize a case_law row, parsing subject_tags JSONB to list."""
    d = dict(row)
    if isinstance(d.get("subject_tags"), str):
        try:
            d["subject_tags"] = json.loads(d["subject_tags"])
        except (TypeError, ValueError):
            d["subject_tags"] = []
    if d.get("date") is not None:
        d["date"] = d["date"].isoformat()
    return d


async def get_case_law(case_law_id: UUID) -> dict | None:
    pool = await get_pool()
    row = await pool.fetchrow(
        "SELECT * FROM case_law WHERE id = $1", case_law_id,
    )
    return _row_to_case_law(row) if row else None


async def get_external_case_law_by_citation(citation: str) -> dict | None:
    """Return the first external_upload row whose case_number matches citation, or None."""
    pool = await get_pool()
    row = await pool.fetchrow(
        """
        SELECT id, case_number, case_name, court, date,
               halacha_extraction_status, source_kind, created_at
        FROM case_law
        WHERE case_number = $1
          AND source_kind = 'external_upload'
        LIMIT 1
        """,
        citation,
    )
    return _row_to_case_law(row) if row else None


async def mark_indexed(case_law_id: UUID) -> None:
    """Mark a case_law row's embeddings as built from its current content (FU-3).

    Sets indexed_hash := content_hash. Call AFTER a successful chunk+embed+store.
    """
    pool = await get_pool()
    async with pool.acquire() as conn:
        await conn.execute(
            "UPDATE case_law SET indexed_hash = content_hash WHERE id = $1",
            case_law_id,
        )


async def list_stale_case_law(limit: int = 500) -> list[dict]:
    """case_law rows whose embeddings are stale vs current content (GAP-09/INV-G6)."""
    pool = await get_pool()
    async with pool.acquire() as conn:
        rows = await conn.fetch(
            """SELECT id, case_number, source_kind
               FROM case_law
               WHERE coalesce(full_text, '') <> ''
                 AND content_hash IS DISTINCT FROM indexed_hash
               ORDER BY created_at LIMIT $1""",
            limit,
        )
    return [dict(r) for r in rows]


async def recompute_content_hashes() -> dict:
    """Backfill (FU-3): set content_hash for all rows; set indexed_hash=content_hash
    only where chunks already exist (those are already embedded). Rows with text but
    no chunks get indexed_hash=NULL → surface as stale. Hash-only; no re-embed."""
    pool = await get_pool()
    updated = 0
    async with pool.acquire() as conn:
        rows = await conn.fetch("SELECT id, full_text FROM case_law")
        for r in rows:
            ch = _content_hash(r["full_text"] or "")
            has_chunks = await conn.fetchval(
                "SELECT EXISTS(SELECT 1 FROM precedent_chunks WHERE case_law_id = $1)",
                r["id"])
            await conn.execute(
                "UPDATE case_law SET content_hash = $2, "
                "indexed_hash = CASE WHEN $3 THEN $2 ELSE indexed_hash END WHERE id = $1",
                r["id"], ch, bool(has_chunks))
            updated += 1
    return {"updated": updated}


async def add_case_law_relation(
    a_id: UUID, b_id: UUID, relation_type: str = "same_case_chain"
) -> None:
    """Link two case_law records bidirectionally. Idempotent (ON CONFLICT DO NOTHING)."""
    pool = await get_pool()
    async with pool.acquire() as conn:
        await conn.executemany(
            """
            INSERT INTO case_law_relations(case_law_id, related_id, relation_type)
            VALUES($1, $2, $3)
            ON CONFLICT (case_law_id, related_id) DO NOTHING
            """,
            [(a_id, b_id, relation_type), (b_id, a_id, relation_type)],
        )


async def remove_case_law_relation(a_id: UUID, b_id: UUID) -> None:
    """Remove a bidirectional link between two case_law records."""
    pool = await get_pool()
    await pool.execute(
        """
        DELETE FROM case_law_relations
        WHERE (case_law_id = $1 AND related_id = $2)
           OR (case_law_id = $2 AND related_id = $1)
        """,
        a_id,
        b_id,
    )


async def get_case_law_relations(case_law_id: UUID) -> list[dict]:
    """Return all case_law records linked to case_law_id, ordered by date asc."""
    pool = await get_pool()
    rows = await pool.fetch(
        """
        SELECT cl.*, r.relation_type
        FROM case_law_relations r
        JOIN case_law cl ON cl.id = r.related_id
        WHERE r.case_law_id = $1
        ORDER BY cl.date ASC NULLS LAST
        """,
        case_law_id,
    )
    results = []
    for row in rows:
        d = dict(row)
        relation_type = d.pop("relation_type")
        normalized = _row_to_case_law(d)
        normalized["relation_type"] = relation_type
        results.append(normalized)
    return results


async def get_case_law_by_citation(case_number: str) -> dict | None:
    pool = await get_pool()
    row = await pool.fetchrow(
        "SELECT * FROM case_law WHERE case_number = $1", case_number,
    )
    return _row_to_case_law(row) if row else None


async def create_external_case_law(
    case_number: str,
    case_name: str,
    full_text: str,
    court: str = "",
    decision_date: date | None = None,
    practice_area: str = "",
    appeal_subtype: str = "",
    subject_tags: list[str] | None = None,
    summary: str = "",
    headnote: str = "",
    key_quote: str = "",
    source_url: str = "",
    source_type: str = "",
    precedent_level: str = "",
    is_binding: bool = True,
    document_id: UUID | None = None,
) -> dict:
    """Insert a chair-uploaded external precedent into case_law.

    If a row with this ``case_number`` already exists with
    source_kind='cited_only' (auto-discovered), promote it to
    source_kind='external_upload' and fill in the missing fields.
    """
    pool = await get_pool()
    tags_json = json.dumps(subject_tags or [], ensure_ascii=False)
    async with pool.acquire() as conn:
        # Atomic upsert on the V15 partial unique index
        # uq_case_law_external_number (case_number) WHERE source_kind <> 'internal_committee'.
        # The predicate is repeated in ON CONFLICT (required for partial indexes).
        # This also subsumes the old cited_only→external_upload promotion: a
        # cited_only row with the same case_number conflicts and is promoted by
        # DO UPDATE. Scoped to the external partial index, so an internal row with
        # the same number is NOT touched (the old SELECT-without-source_kind could
        # wrongly promote it).
        row = await conn.fetchrow(
            """
            INSERT INTO case_law (
                case_number, case_name, court, date, subject_tags,
                summary, key_quote, full_text, source_url,
                source_kind, document_id, extraction_status,
                halacha_extraction_status, practice_area, appeal_subtype,
                headnote, source_type, precedent_level, is_binding, content_hash
            ) VALUES (
                $1, $2, $3, $4, $5, $6, $7, $8, $9,
                'external_upload', $10, 'processing', 'pending',
                $11, $12, $13, $14, $15, $16, $17
            )
            ON CONFLICT (case_number) WHERE source_kind <> 'internal_committee'
            DO UPDATE SET
                case_name = EXCLUDED.case_name,
                court = COALESCE(NULLIF(EXCLUDED.court, ''), case_law.court),
                date = COALESCE(EXCLUDED.date, case_law.date),
                practice_area = EXCLUDED.practice_area,
                appeal_subtype = EXCLUDED.appeal_subtype,
                subject_tags = EXCLUDED.subject_tags,
                summary = COALESCE(NULLIF(EXCLUDED.summary, ''), case_law.summary),
                headnote = EXCLUDED.headnote,
                key_quote = COALESCE(NULLIF(EXCLUDED.key_quote, ''), case_law.key_quote),
                full_text = EXCLUDED.full_text,
                source_url = COALESCE(NULLIF(EXCLUDED.source_url, ''), case_law.source_url),
                source_type = EXCLUDED.source_type,
                precedent_level = EXCLUDED.precedent_level,
                is_binding = EXCLUDED.is_binding,
                document_id = COALESCE(EXCLUDED.document_id, case_law.document_id),
                source_kind = 'external_upload',
                extraction_status = 'processing',
                halacha_extraction_status = 'pending',
                content_hash = EXCLUDED.content_hash
            RETURNING *
            """,
            case_number, case_name, court, decision_date, tags_json,
            summary, key_quote, full_text, source_url,
            document_id, practice_area, appeal_subtype, headnote,
            source_type, precedent_level, is_binding,
            _content_hash(full_text),
        )
    return _row_to_case_law(row)


async def create_internal_committee_decision(
    case_number: str,
    case_name: str,
    full_text: str,
    court: str = "",
    decision_date: date | None = None,
    chair_name: str = "",
    district: str = "",
    practice_area: str = "",
    appeal_subtype: str = "",
    subject_tags: list[str] | None = None,
    summary: str = "",
    is_binding: bool = True,
    document_id: UUID | None = None,
    proceeding_type: str = "ערר",
) -> dict:
    """Upsert an appeals-committee decision as source_kind='internal_committee'.

    Idempotency key: (case_number, proceeding_type) — the same number can
    exist as both 'ערר' and 'בל"מ' (an extension-of-time request can be
    filed against an existing appeal with the same number).
    """
    pool = await get_pool()
    case_number = _canonical_case_number(case_number)
    tags_json = json.dumps(subject_tags or [], ensure_ascii=False)
    async with pool.acquire() as conn:
        # Atomic upsert on V15 partial unique index
        # uq_case_law_internal_number_proc (case_number, proceeding_type)
        # WHERE source_kind = 'internal_committee'. Predicate repeated for the
        # partial index. Replaces the old SELECT-then-INSERT/UPDATE (race-prone).
        row = await conn.fetchrow(
            """
            INSERT INTO case_law (
                case_number, case_name, court, date, chair_name, district,
                subject_tags, summary, full_text,
                source_kind, source_type, document_id,
                extraction_status, halacha_extraction_status,
                practice_area, appeal_subtype, is_binding, proceeding_type, content_hash
            ) VALUES (
                $1, $2, $3, $4, $5, $6,
                $7, $8, $9,
                'internal_committee', 'appeals_committee', $10,
                'processing', 'pending',
                $11, $12, $13, $14, $15
            )
            ON CONFLICT (case_number, proceeding_type)
                WHERE source_kind = 'internal_committee'
            DO UPDATE SET
                case_name = EXCLUDED.case_name,
                court = COALESCE(NULLIF(EXCLUDED.court, ''), case_law.court),
                date = COALESCE(EXCLUDED.date, case_law.date),
                chair_name = COALESCE(NULLIF(EXCLUDED.chair_name, ''), case_law.chair_name),
                district = COALESCE(NULLIF(EXCLUDED.district, ''), case_law.district),
                practice_area = EXCLUDED.practice_area,
                appeal_subtype = EXCLUDED.appeal_subtype,
                subject_tags = EXCLUDED.subject_tags,
                summary = COALESCE(NULLIF(EXCLUDED.summary, ''), case_law.summary),
                full_text = EXCLUDED.full_text,
                source_type = 'appeals_committee',
                source_kind = 'internal_committee',
                is_binding = EXCLUDED.is_binding,
                document_id = COALESCE(EXCLUDED.document_id, case_law.document_id),
                extraction_status = 'processing',
                halacha_extraction_status = 'pending',
                content_hash = EXCLUDED.content_hash
            RETURNING *
            """,
            case_number, case_name, court, decision_date, chair_name, district,
            tags_json, summary, full_text,
            document_id, practice_area, appeal_subtype, is_binding,
            proceeding_type, _content_hash(full_text),
        )
    return _row_to_case_law(row)


def _compute_searchable(row: dict, has_embedded_chunk: bool) -> bool:
    """Completeness contract (INV-DM1 / 02-data-model §2a).

    A row is searchable IFF: canonical id present · case_name/practice_area/
    source_kind present · ≥1 chunk with a non-null embedding · extraction
    completed · metadata non-empty (≥1 of headnote/summary/subject_tags).
    Pure — `has_embedded_chunk` is supplied by the caller (cross-table check).
    """
    if not has_embedded_chunk:
        return False
    if (row.get("extraction_status") or "") != "completed":
        return False
    if not (row.get("case_number") or "").strip():
        return False
    if not (row.get("case_name") or "").strip():
        return False
    # practice_area is required only for identifier-keyed corpora (internal
    # committee decisions, active cases). External precedents (e.g. בג"ץ) are
    # legitimately cross-domain and may have no single practice_area.
    if (row.get("source_kind") or "") != "external_upload":
        if not (row.get("practice_area") or "").strip():
            return False
    if not (row.get("source_kind") or "").strip():
        return False
    tags = row.get("subject_tags") or []
    has_meta = bool((row.get("headnote") or "").strip()) \
        or bool((row.get("summary") or "").strip()) \
        or (len(tags) > 0)
    return has_meta


async def recompute_searchable(case_law_id: "UUID | str | None" = None) -> int:
    """Recompute and persist the `searchable` flag. Idempotent / reversible.

    If case_law_id is None, recompute ALL rows (used by the V21 backfill and
    the dry-run). Returns the number of rows now marked searchable=true.
    """
    pool = await get_pool()
    async with pool.acquire() as conn:
        if case_law_id is not None:
            cid = case_law_id if isinstance(case_law_id, UUID) else UUID(str(case_law_id))
            rows = await conn.fetch(
                "SELECT * FROM case_law WHERE id = $1", cid)
        else:
            rows = await conn.fetch("SELECT * FROM case_law")
        n_true = 0
        for r in rows:
            row = dict(r)
            tags = row.get("subject_tags")
            if isinstance(tags, str):
                try:
                    tags = json.loads(tags)
                except (ValueError, TypeError):
                    tags = []
            row["subject_tags"] = tags or []
            has_chunk = await conn.fetchval(
                "SELECT EXISTS(SELECT 1 FROM precedent_chunks "
                "WHERE case_law_id = $1 AND embedding IS NOT NULL)", row["id"])
            val = _compute_searchable(row, bool(has_chunk))
            await conn.execute(
                "UPDATE case_law SET searchable = $2 WHERE id = $1", row["id"], val)
            if val:
                n_true += 1
    return n_true


async def update_case_law(case_law_id: UUID, **fields) -> dict | None:
    """Patch metadata fields on a case_law row.

    Allowed fields: case_name, court, date, practice_area, appeal_subtype,
    subject_tags, summary, headnote, key_quote, source_url, source_type,
    precedent_level, is_binding, citation_formatted.
    """
    allowed = {
        "case_number", "case_name", "court", "date", "practice_area", "appeal_subtype",
        "subject_tags", "summary", "headnote", "key_quote", "source_url",
        "source_type", "precedent_level", "is_binding", "district", "chair_name",
        "proceeding_type", "citation_formatted",
    }
    updates = {k: v for k, v in fields.items() if k in allowed}
    if not updates:
        return await get_case_law(case_law_id)

    pool = await get_pool()
    set_parts = []
    params: list = [case_law_id]
    for i, (k, v) in enumerate(updates.items(), start=2):
        if k == "subject_tags":
            v = json.dumps(v or [], ensure_ascii=False)
        set_parts.append(f"{k} = ${i}")
        params.append(v)
    sql = f"UPDATE case_law SET {', '.join(set_parts)} WHERE id = $1 RETURNING *"
    row = await pool.fetchrow(sql, *params)
    return _row_to_case_law(row) if row else None


async def set_case_law_extraction_status(case_law_id: UUID, status: str) -> None:
    """Set text-extraction status. When transitioning to a terminal state
    ('completed'/'failed') we also NULL ``metadata_extraction_requested_at``
    so the local-MCP queue (`process_pending_extractions`, which scans by
    ``WHERE *_requested_at IS NOT NULL``) doesn't re-pick the row forever
    and leave the row blocked in the UI's `isPrecedentActive` check."""
    pool = await get_pool()
    if status in ("completed", "failed"):
        await pool.execute(
            "UPDATE case_law SET extraction_status = $2, "
            "metadata_extraction_requested_at = NULL WHERE id = $1",
            case_law_id, status,
        )
    else:
        await pool.execute(
            "UPDATE case_law SET extraction_status = $2 WHERE id = $1",
            case_law_id, status,
        )


async def set_case_law_halacha_status(case_law_id: UUID, status: str) -> None:
    """Set halacha-extraction status. Mirrors ``set_case_law_extraction_status``:
    on terminal states we also clear ``halacha_extraction_requested_at`` so the
    queue and UI don't see a stale request flag."""
    pool = await get_pool()
    if status in ("completed", "failed"):
        await pool.execute(
            "UPDATE case_law SET halacha_extraction_status = $2, "
            "halacha_extraction_requested_at = NULL WHERE id = $1",
            case_law_id, status,
        )
    else:
        await pool.execute(
            "UPDATE case_law SET halacha_extraction_status = $2 WHERE id = $1",
            case_law_id, status,
        )


async def set_case_law_metadata_status(case_law_id: UUID, status: str) -> None:
    """Set metadata-extraction status. Mirrors ``set_case_law_halacha_status``:
    on terminal states ('completed'/'failed') we also clear
    ``metadata_extraction_requested_at`` so the local-MCP queue
    (`process_pending_extractions`, which scans ``WHERE *_requested_at IS NOT
    NULL``) stops re-picking the row and the UI's ``isPrecedentActive`` check
    settles."""
    pool = await get_pool()
    if status in ("completed", "failed"):
        await pool.execute(
            "UPDATE case_law SET metadata_extraction_status = $2, "
            "metadata_extraction_requested_at = NULL WHERE id = $1",
            case_law_id, status,
        )
    else:
        await pool.execute(
            "UPDATE case_law SET metadata_extraction_status = $2 WHERE id = $1",
            case_law_id, status,
        )


async def list_external_case_law(
    practice_area: str = "",
    court: str = "",
    precedent_level: str = "",
    source_type: str = "",
    search: str = "",
    limit: int = 100,
    offset: int = 0,
    source_kind: str = "external_upload",
) -> list[dict]:
    """List chair-uploaded precedents, with simple filters.

    source_kind="all_committees" expands to: source_kind='internal_committee'
    OR (source_kind='external_upload' AND source_type='appeals_committee').
    """
    pool = await get_pool()
    if source_kind == "all_committees":
        conditions = [
            "(source_kind = 'internal_committee' OR "
            "(source_kind = 'external_upload' AND source_type = 'appeals_committee'))"
        ]
    else:
        conditions = [f"source_kind = '{source_kind}'"]
    params: list = []
    idx = 1
    if practice_area:
        conditions.append(f"practice_area = ${idx}")
        params.append(practice_area)
        idx += 1
    if court:
        conditions.append(f"court ILIKE ${idx}")
        params.append(f"%{court}%")
        idx += 1
    if precedent_level:
        conditions.append(f"precedent_level = ${idx}")
        params.append(precedent_level)
        idx += 1
    if source_type:
        conditions.append(f"source_type = ${idx}")
        params.append(source_type)
        idx += 1
    if search:
        conditions.append(
            f"(case_number ILIKE ${idx} OR case_name ILIKE ${idx} "
            f"OR summary ILIKE ${idx} OR headnote ILIKE ${idx})"
        )
        params.append(f"%{search}%")
        idx += 1
    where_sql = " AND ".join(conditions)
    params.extend([limit, offset])
    sql = f"""
        SELECT id, case_number, case_name, court, date, practice_area,
               appeal_subtype, source_type, precedent_level, is_binding,
               summary, headnote, subject_tags, source_kind,
               chair_name, district, citation_formatted,
               extraction_status, halacha_extraction_status,
               metadata_extraction_status,
               metadata_extraction_requested_at,
               halacha_extraction_requested_at,
               created_at,
               (SELECT COUNT(*) FROM halachot h WHERE h.case_law_id = case_law.id) AS halachot_count,
               (SELECT COUNT(*) FROM halachot h WHERE h.case_law_id = case_law.id
                  AND h.review_status IN ('approved', 'published')) AS approved_count
        FROM case_law
        WHERE {where_sql}
        ORDER BY created_at DESC
        LIMIT ${idx} OFFSET ${idx + 1}
    """
    rows = await pool.fetch(sql, *params)
    out = []
    for r in rows:
        d = _row_to_case_law(r)
        # Render timestamps as ISO strings so the JSON layer stays simple
        for k in ("metadata_extraction_requested_at", "halacha_extraction_requested_at"):
            if d.get(k) is not None:
                d[k] = d[k].isoformat()
        out.append(d)
    return out


async def delete_case_law(case_law_id: UUID) -> bool:
    """Delete a precedent and cascade chunks + halachot."""
    pool = await get_pool()
    result = await pool.execute(
        "DELETE FROM case_law WHERE id = $1", case_law_id,
    )
    return result == "DELETE 1"


async def store_precedent_chunks(
    case_law_id: UUID, chunks: list[dict],
) -> int:
    """Replace precedent chunks for a case_law row (single-tier).

    Each chunk dict has: chunk_index, content, section_type, page_number,
    embedding (list[float] or None).

    All rows written here are stored with ``chunk_role='child'`` and
    ``parent_chunk_id IS NULL`` — backward-compatible with the V17
    schema (parent-doc lookup is a no-op for these rows). For two-tier
    ingestion, see :func:`store_precedent_chunks_hierarchical`.
    """
    pool = await get_pool()
    async with pool.acquire() as conn:
        await conn.execute(
            "DELETE FROM precedent_chunks WHERE case_law_id = $1",
            case_law_id,
        )
        for c in chunks:
            await conn.execute(
                """INSERT INTO precedent_chunks
                   (case_law_id, chunk_index, content, section_type,
                    page_number, embedding)
                   VALUES ($1, $2, $3, $4, $5, $6)""",
                case_law_id,
                c["chunk_index"],
                c["content"],
                c.get("section_type", "other"),
                c.get("page_number"),
                c.get("embedding"),
            )
    return len(chunks)


async def store_precedent_chunks_hierarchical(
    case_law_id: UUID,
    chunks: list[dict],
) -> dict:
    """Replace precedent chunks for a case_law row (two-tier).

    Each input dict must carry:
        * ``role``: 'child' | 'parent'
        * ``local_id``: in-batch identifier (int) used to wire children
          to their parent's DB UUID
        * ``parent_local_id``: int (only for children) — references the
          ``local_id`` of the parent in this same batch. For parents,
          this is None.
        * ``chunk_index``, ``content``, ``section_type``, ``page_number``
        * ``embedding``: required for children, None for parents

    Two-pass write inside a single transaction:
      1. INSERT all parents (no FK back to children), capture
         ``local_id → DB UUID`` map.
      2. INSERT all children with ``parent_chunk_id`` resolved.

    Returns ``{"parents": N, "children": M, "total": N+M}``.
    """
    parents = [c for c in chunks if c.get("role") == "parent"]
    children = [c for c in chunks if c.get("role") == "child"]
    if not parents and not children:
        return {"parents": 0, "children": 0, "total": 0}

    pool = await get_pool()
    async with pool.acquire() as conn:
        async with conn.transaction():
            await conn.execute(
                "DELETE FROM precedent_chunks WHERE case_law_id = $1",
                case_law_id,
            )
            # Pass 1: parents — embedding intentionally NULL (parents
            # aren't matched on; they only carry retrieval context).
            local_to_uuid: dict[int, UUID] = {}
            for p in parents:
                row = await conn.fetchrow(
                    """INSERT INTO precedent_chunks
                       (case_law_id, chunk_index, content, section_type,
                        page_number, embedding, chunk_role, parent_chunk_id)
                       VALUES ($1, $2, $3, $4, $5, NULL, 'parent', NULL)
                       RETURNING id""",
                    case_law_id,
                    p["chunk_index"],
                    p["content"],
                    p.get("section_type", "other"),
                    p.get("page_number"),
                )
                local_to_uuid[int(p["local_id"])] = row["id"]

            # Pass 2: children with resolved parent_chunk_id.
            for c in children:
                parent_uuid = local_to_uuid.get(
                    int(c["parent_local_id"])
                ) if c.get("parent_local_id") is not None else None
                await conn.execute(
                    """INSERT INTO precedent_chunks
                       (case_law_id, chunk_index, content, section_type,
                        page_number, embedding, chunk_role, parent_chunk_id)
                       VALUES ($1, $2, $3, $4, $5, $6, 'child', $7)""",
                    case_law_id,
                    c["chunk_index"],
                    c["content"],
                    c.get("section_type", "other"),
                    c.get("page_number"),
                    c.get("embedding"),
                    parent_uuid,
                )
    return {
        "parents": len(parents),
        "children": len(children),
        "total": len(parents) + len(children),
    }


async def list_precedent_chunks(
    case_law_id: UUID,
    section_types: tuple[str, ...] | None = None,
) -> list[dict]:
    pool = await get_pool()
    if section_types:
        rows = await pool.fetch(
            """SELECT id, chunk_index, content, section_type, page_number,
                      halacha_extracted_at
               FROM precedent_chunks
               WHERE case_law_id = $1 AND section_type = ANY($2::text[])
               ORDER BY chunk_index""",
            case_law_id, list(section_types),
        )
    else:
        rows = await pool.fetch(
            """SELECT id, chunk_index, content, section_type, page_number,
                      halacha_extracted_at
               FROM precedent_chunks
               WHERE case_law_id = $1
               ORDER BY chunk_index""",
            case_law_id,
        )
    return [dict(r) for r in rows]


async def delete_halachot(case_law_id: UUID) -> int:
    pool = await get_pool()
    result = await pool.execute(
        "DELETE FROM halachot WHERE case_law_id = $1", case_law_id,
    )
    # result is e.g. "DELETE 5" — extract the number.
    try:
        return int(result.split()[-1])
    except (ValueError, IndexError):
        return 0


async def store_halachot(case_law_id: UUID, halachot: list[dict]) -> int:
    """Bulk-insert extracted halachot.

    Each halacha enters with review_status determined by extractor
    confidence vs ``config.HALACHA_AUTO_APPROVE_THRESHOLD``:
      - confidence >= threshold → 'approved' (visible to search immediately)
      - else → 'pending_review' (chair must approve manually)

    The auto-approval reviewer is recorded as 'auto' for traceability.
    """
    if not halachot:
        return 0
    threshold = config.HALACHA_AUTO_APPROVE_THRESHOLD
    pool = await get_pool()
    async with pool.acquire() as conn:
        for i, h in enumerate(halachot):
            confidence = float(h.get("confidence", 0.0))
            auto_approve = confidence >= threshold
            review_status = "approved" if auto_approve else "pending_review"
            reviewer = (
                f"auto-approved (confidence ≥ {threshold:.2f})"
                if auto_approve else None
            )
            reviewed_at_clause = "now()" if auto_approve else "NULL"
            await conn.execute(
                f"""INSERT INTO halachot
                   (case_law_id, halacha_index, rule_statement, rule_type,
                    reasoning_summary, supporting_quote, page_reference,
                    practice_areas, subject_tags, cites, confidence,
                    quote_verified, embedding, review_status,
                    reviewer, reviewed_at)
                   VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11,
                           $12, $13, $14, $15, {reviewed_at_clause})""",
                case_law_id,
                i,
                h["rule_statement"],
                h.get("rule_type", "binding"),
                h.get("reasoning_summary", ""),
                h["supporting_quote"],
                h.get("page_reference", ""),
                h.get("practice_areas", []),
                h.get("subject_tags", []),
                h.get("cites", []),
                confidence,
                h.get("quote_verified", False),
                h.get("embedding"),
                review_status,
                reviewer,
            )
    return len(halachot)


async def reset_halacha_extraction(case_law_id: UUID) -> None:
    """Force a clean re-extraction: wipe halachot + clear per-chunk checkpoints
    so every chunk is re-processed (used by explicit re-extract, not resume)."""
    pool = await get_pool()
    async with pool.acquire() as conn:
        async with conn.transaction():
            await conn.execute("DELETE FROM halachot WHERE case_law_id = $1", case_law_id)
            await conn.execute(
                "UPDATE precedent_chunks SET halacha_extracted_at = NULL "
                "WHERE case_law_id = $1", case_law_id,
            )


async def mark_all_chunks_extracted(case_law_id: UUID) -> int:
    """Checkpoint every un-marked chunk of a precedent as extracted.

    Used to backfill pre-V25 precedents (halachot already exist but no chunk was
    checkpointed) so a resume run skips them instead of re-extracting (which
    would duplicate). Returns rows updated.
    """
    pool = await get_pool()
    result = await pool.execute(
        "UPDATE precedent_chunks SET halacha_extracted_at = now() "
        "WHERE case_law_id = $1 AND halacha_extracted_at IS NULL", case_law_id,
    )
    try:
        return int(result.split()[-1])
    except (ValueError, IndexError):
        return 0


async def store_halachot_for_chunk(
    case_law_id: UUID, chunk_id: UUID, halachot: list[dict],
) -> int:
    """Persist ONE chunk's halachot and mark the chunk done — atomically.

    Crash-safe + resumable: each chunk's results land in the DB the moment it
    finishes, and the chunk is flagged (``halacha_extracted_at``) so a resumed
    run skips it. ``halacha_index`` continues from the current max so appends
    across chunks never collide. The chunk is marked even when ``halachot`` is
    empty (so resume skips genuinely-empty chunks too). Caller serializes calls
    (a single in-process store-lock) so the MAX read stays race-free.

    Two gates encode the strict rubric (docs/halacha-strict-rubric.md) so the
    corpus stays clean at the source instead of accumulating noise:

    * Auto-approve gate — a halacha auto-approves only if confidence ≥ threshold
      AND it carries no ``quality_flags`` (non_decision / truncated_quote /
      thin_restatement / quote_unverified). Flagged items route to
      ``pending_review`` regardless of confidence.
    * Dedup-on-insert — within the SAME precedent, a halacha is skipped if its
      normalized ``supporting_quote`` already exists, or its rule-embedding has
      cosine ≥ ``HALACHA_DEDUP_COSINE`` against an already-stored halacha.

    Returns the number of halachot actually INSERTED (after dedup skips).
    """
    threshold = config.HALACHA_AUTO_APPROVE_THRESHOLD
    dedup_distance = 1.0 - config.HALACHA_DEDUP_COSINE  # cosine sim → distance
    pool = await get_pool()
    inserted = 0
    skipped = 0
    async with pool.acquire() as conn:
        async with conn.transaction():
            base = await conn.fetchval(
                "SELECT COALESCE(MAX(halacha_index), -1) + 1 FROM halachot "
                "WHERE case_law_id = $1", case_law_id,
            )
            # Existing normalized quotes for exact-dedup (incl. within-batch).
            existing_quotes = {
                halacha_quality.normalize_text(r["supporting_quote"])
                for r in await conn.fetch(
                    "SELECT supporting_quote FROM halachot WHERE case_law_id = $1",
                    case_law_id,
                )
            }
            for h in halachot:
                norm_quote = halacha_quality.normalize_text(h["supporting_quote"])
                # 1) exact normalized-quote duplicate within this precedent
                if norm_quote and norm_quote in existing_quotes:
                    skipped += 1
                    continue
                # 2) semantic near-duplicate (rule embedding cosine)
                emb = h.get("embedding")
                if emb is not None and config.HALACHA_DEDUP_COSINE <= 1.0:
                    dup = await conn.fetchval(
                        "SELECT 1 FROM halachot WHERE case_law_id = $1 "
                        "AND embedding IS NOT NULL AND (embedding <=> $2) <= $3 "
                        "LIMIT 1",
                        case_law_id, emb, dedup_distance,
                    )
                    if dup:
                        skipped += 1
                        continue

                confidence = float(h.get("confidence", 0.0))
                flags = h.get("quality_flags") or []
                auto_approve = confidence >= threshold and not flags
                review_status = "approved" if auto_approve else "pending_review"
                reviewer = (
                    f"auto-approved (confidence ≥ {threshold:.2f})"
                    if auto_approve else None
                )
                reviewed_at_clause = "now()" if auto_approve else "NULL"
                await conn.execute(
                    f"""INSERT INTO halachot
                       (case_law_id, halacha_index, rule_statement, rule_type,
                        reasoning_summary, supporting_quote, page_reference,
                        practice_areas, subject_tags, cites, confidence,
                        quote_verified, quality_flags, embedding, review_status,
                        reviewer, reviewed_at)
                       VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11,
                               $12, $13, $14, $15, $16, {reviewed_at_clause})""",
                    case_law_id, base + inserted, h["rule_statement"],
                    h.get("rule_type", "binding"), h.get("reasoning_summary", ""),
                    h["supporting_quote"], h.get("page_reference", ""),
                    h.get("practice_areas", []), h.get("subject_tags", []),
                    h.get("cites", []), confidence, h.get("quote_verified", False),
                    flags, h.get("embedding"), review_status, reviewer,
                )
                existing_quotes.add(norm_quote)
                inserted += 1
            await conn.execute(
                "UPDATE precedent_chunks SET halacha_extracted_at = now() "
                "WHERE id = $1", chunk_id,
            )
    if skipped:
        logger.info(
            "store_halachot_for_chunk: case_law=%s chunk=%s — %d inserted, "
            "%d skipped as duplicates", case_law_id, chunk_id, inserted, skipped,
        )
    return inserted


async def list_halachot(
    case_law_id: UUID | None = None,
    review_status: str | None = None,
    practice_area: str | None = None,
    limit: int = 200,
    offset: int = 0,
) -> list[dict]:
    pool = await get_pool()
    conditions = []
    params: list = []
    idx = 1
    if case_law_id is not None:
        conditions.append(f"h.case_law_id = ${idx}")
        params.append(case_law_id)
        idx += 1
    if review_status:
        conditions.append(f"h.review_status = ${idx}")
        params.append(review_status)
        idx += 1
    if practice_area:
        conditions.append(f"${idx} = ANY(h.practice_areas)")
        params.append(practice_area)
        idx += 1
    where_sql = f"WHERE {' AND '.join(conditions)}" if conditions else ""
    params.extend([limit, offset])
    sql = f"""
        SELECT h.id, h.case_law_id, h.halacha_index, h.rule_statement,
               h.rule_type, h.reasoning_summary, h.supporting_quote,
               h.page_reference, h.practice_areas, h.subject_tags,
               h.cites, h.confidence, h.quote_verified, h.quality_flags,
               h.review_status,
               h.reviewer, h.reviewed_at, h.created_at, h.updated_at,
               cl.case_number, cl.case_name, cl.court, cl.date AS decision_date,
               cl.precedent_level,
               COALESCE(cor.corroboration_count, 0)::int AS corroboration_count,
               COALESCE(cor.corroboration_negative, false) AS corroboration_negative
        FROM halachot h
        LEFT JOIN case_law cl ON cl.id = h.case_law_id
        LEFT JOIN (
            SELECT halacha_id,
                   count(DISTINCT COALESCE(citing_case_law_id::text,
                         citing_decision_id::text, source_citation_id::text))
                     FILTER (WHERE treatment IN ('followed','explained'))
                     AS corroboration_count,
                   bool_or(treatment IN
                     ('distinguished','criticized','questioned','overruled'))
                     AS corroboration_negative
            FROM halacha_citation_corroboration
            GROUP BY halacha_id
        ) cor ON cor.halacha_id = h.id
        {where_sql}
        ORDER BY h.case_law_id, h.halacha_index
        LIMIT ${idx} OFFSET ${idx + 1}
    """
    rows = await pool.fetch(sql, *params)
    out = []
    for r in rows:
        d = dict(r)
        if d.get("decision_date") is not None:
            d["decision_date"] = d["decision_date"].isoformat()
        out.append(d)
    return out


async def update_halacha(
    halacha_id: UUID,
    review_status: str | None = None,
    reviewer: str = "",
    rule_statement: str | None = None,
    reasoning_summary: str | None = None,
    subject_tags: list[str] | None = None,
    practice_areas: list[str] | None = None,
) -> dict | None:
    """Update a halacha — used by the chair to approve/reject/edit."""
    pool = await get_pool()
    set_parts: list[str] = []
    params: list = [halacha_id]
    idx = 2
    if review_status is not None:
        set_parts.append(f"review_status = ${idx}")
        params.append(review_status)
        idx += 1
        if review_status in ("approved", "rejected", "published", "deferred"):
            set_parts.append(f"reviewed_at = now()")
            set_parts.append(f"reviewer = ${idx}")
            params.append(reviewer)
            idx += 1
    if rule_statement is not None:
        set_parts.append(f"rule_statement = ${idx}")
        params.append(rule_statement)
        idx += 1
    if reasoning_summary is not None:
        set_parts.append(f"reasoning_summary = ${idx}")
        params.append(reasoning_summary)
        idx += 1
    if subject_tags is not None:
        set_parts.append(f"subject_tags = ${idx}")
        params.append(subject_tags)
        idx += 1
    if practice_areas is not None:
        set_parts.append(f"practice_areas = ${idx}")
        params.append(practice_areas)
        idx += 1
    if not set_parts:
        return None
    set_parts.append("updated_at = now()")
    # Exclude `embedding` — it's a numpy.ndarray of np.float32 that breaks
    # FastAPI's jsonable_encoder downstream (PATCH /api/halachot/{id}).
    # Callers that need it (none today) can re-fetch with get_halacha.
    sql = f"""
        UPDATE halachot SET {', '.join(set_parts)} WHERE id = $1
        RETURNING id, case_law_id, halacha_index, rule_statement, rule_type,
                  reasoning_summary, supporting_quote, page_reference,
                  practice_areas, subject_tags, cites, confidence,
                  quote_verified, quality_flags, review_status, reviewer,
                  reviewed_at, created_at, updated_at
    """
    row = await pool.fetchrow(sql, *params)
    return dict(row) if row else None


# Statuses the chair can set via review (batch or single). 'deferred' = snooze:
# stays out of the active library AND out of the default pending queue, without
# the finality of 'rejected'. #84 review-queue triage.
HALACHA_REVIEW_STATUSES = {
    "pending_review", "approved", "rejected", "published", "deferred",
}


async def update_halachot_batch(
    halacha_ids: list[str], review_status: str, reviewer: str = "",
) -> int:
    """Bulk-set review_status for many halachot in one atomic statement.

    Powers the #84 "approve/reject/defer the whole group" action — one request,
    one transaction, one refetch (vs N PATCH round-trips). Only the status +
    reviewer + reviewed_at are touched (no content edits in batch). Returns the
    number of rows updated.
    """
    if not halacha_ids or review_status not in HALACHA_REVIEW_STATUSES:
        return 0
    ids = [UUID(str(i)) for i in halacha_ids]
    stamp = review_status in ("approved", "rejected", "published", "deferred")
    pool = await get_pool()
    result = await pool.execute(
        f"""UPDATE halachot
            SET review_status = $2,
                updated_at = now()
                {", reviewed_at = now(), reviewer = $3" if stamp else ""}
            WHERE id = ANY($1::uuid[])""",
        ids, review_status, *( [reviewer] if stamp else [] ),
    )
    try:
        return int(result.split()[-1])
    except (ValueError, IndexError):
        return 0


async def approve_halacha_by_corroboration(
    halacha_id: UUID, n_sources: int, min_cites: int,
) -> bool:
    """Approve a halacha on citation corroboration — ONLY if it is currently
    awaiting the chair (``pending_review``). Never touches ``published`` /
    ``rejected`` / already-``approved`` (INV-COR5: the chair gate is preserved for
    everything else). The reviewer records the corroboration basis as provenance
    (INV-COR6). Returns True iff a row actually transitioned."""
    pool = await get_pool()
    reviewer = f"corroborated ({n_sources} judicial citations ≥ {min_cites})"
    row = await pool.fetchrow(
        "UPDATE halachot SET review_status='approved', reviewer=$2, "
        "reviewed_at=now(), updated_at=now() "
        "WHERE id=$1 AND review_status='pending_review' RETURNING id",
        halacha_id, reviewer,
    )
    return row is not None


async def demote_halacha_overruled(halacha_id: UUID) -> bool:
    """Demote an APPROVED halacha back to the chair gate because a later citing
    court overruled it (INV-COR2). Acts only on ``approved`` → ``pending_review``;
    leaves ``published`` / ``rejected`` / already-``pending_review`` untouched. The
    reviewer note records why it re-entered the queue. Returns True iff a row
    transitioned."""
    pool = await get_pool()
    row = await pool.fetchrow(
        "UPDATE halachot SET review_status='pending_review', "
        "reviewer='flagged: overruled by later citation (X11)', "
        "reviewed_at=NULL, updated_at=now() "
        "WHERE id=$1 AND review_status='approved' RETURNING id",
        halacha_id,
    )
    return row is not None


async def list_corroboration_grouped(case_law_id: UUID) -> dict[str, list[dict]]:
    """Per-halacha corroboration links for a cited precedent, in the
    ``{source_id, treatment}`` shape ``aggregate()`` consumes. The distinct citing
    source is keyed by case_law/decision id (falling back to the citation row id
    so two anonymous rows are not collapsed)."""
    pool = await get_pool()
    rows = await pool.fetch(
        "SELECT hcc.halacha_id::text AS halacha_id, "
        "  COALESCE(hcc.citing_case_law_id::text, hcc.citing_decision_id::text, "
        "           hcc.source_citation_id::text) AS source_id, "
        "  hcc.treatment "
        "FROM halacha_citation_corroboration hcc "
        "JOIN halachot h ON h.id = hcc.halacha_id "
        "WHERE h.case_law_id = $1",
        case_law_id,
    )
    out: dict[str, list[dict]] = {}
    for r in rows:
        out.setdefault(r["halacha_id"], []).append(
            {"source_id": r["source_id"], "treatment": r["treatment"]}
        )
    return out


async def precedents_with_halachot_and_incoming_citations() -> list[str]:
    """case_law ids that have at least one halacha AND at least one incoming
    citation (either graph) — the corroboration backfill target set."""
    pool = await get_pool()
    rows = await pool.fetch(
        "SELECT c.id::text FROM case_law c "
        "WHERE EXISTS (SELECT 1 FROM halachot h WHERE h.case_law_id=c.id) "
        "  AND (EXISTS (SELECT 1 FROM precedent_internal_citations p "
        "               WHERE p.cited_case_law_id=c.id) "
        "    OR EXISTS (SELECT 1 FROM case_law_citations cc "
        "               WHERE cc.case_law_id=c.id))",
    )
    return [r["id"] for r in rows]


async def nearest_halacha_for_vector(case_law_id: UUID, vec: list[float]) -> tuple[str, float] | None:
    """Best-matching halacha of `case_law_id` for a context embedding (cosine)."""
    pool = await get_pool()
    row = await pool.fetchrow(
        "SELECT id::text AS id, 1 - (embedding <=> $2) AS sim "
        "FROM halachot WHERE case_law_id = $1 AND embedding IS NOT NULL "
        "ORDER BY embedding <=> $2 LIMIT 1",
        case_law_id, vec,
    )
    return (row["id"], float(row["sim"])) if row else None


async def incoming_citations_for_precedent(case_law_id: UUID) -> list[dict]:
    """All incoming citations (both graphs) with their context + source id."""
    pool = await get_pool()
    rows = await pool.fetch(
        "SELECT id::text AS source_id, source_case_law_id::text AS citing_case_law_id, "
        "       NULL::text AS citing_decision_id, match_context AS context "
        "FROM precedent_internal_citations WHERE cited_case_law_id = $1 "
        "UNION ALL "
        "SELECT id::text, NULL, decision_id::text, context_text "
        "FROM case_law_citations WHERE case_law_id = $1",
        case_law_id,
    )
    return [dict(r) for r in rows]


async def store_corroboration(
    halacha_id: str,
    source_id: str,
    citing_case_law_id,
    citing_decision_id,
    treatment: str,
    score: float,
    context: str,
) -> None:
    from uuid import UUID as _UUID
    pool = await get_pool()
    # asyncpg requires UUID objects for uuid-typed columns; convert non-None strings.
    h_id = _UUID(halacha_id) if isinstance(halacha_id, str) else halacha_id
    s_id = _UUID(source_id) if isinstance(source_id, str) else source_id
    cl_id = _UUID(citing_case_law_id) if (citing_case_law_id and isinstance(citing_case_law_id, str)) else citing_case_law_id
    d_id = _UUID(citing_decision_id) if (citing_decision_id and isinstance(citing_decision_id, str)) else citing_decision_id
    await pool.execute(
        "INSERT INTO halacha_citation_corroboration "
        "(halacha_id, citing_case_law_id, citing_decision_id, source_citation_id, treatment, match_score, match_context) "
        "VALUES ($1,$2,$3,$4,$5,$6,$7) "
        "ON CONFLICT (halacha_id, source_citation_id) DO UPDATE SET "
        "treatment=EXCLUDED.treatment, match_score=EXCLUDED.match_score",
        h_id, cl_id, d_id, s_id, treatment, score, context,
    )


async def list_corroboration_for_halacha(halacha_id: UUID) -> list[dict]:
    """Return all corroboration rows for one halacha, ordered by match_score DESC."""
    pool = await get_pool()
    rows = await pool.fetch(
        "SELECT treatment, match_score, match_context, citing_case_law_id::text, "
        "       citing_decision_id::text, created_at "
        "FROM halacha_citation_corroboration WHERE halacha_id = $1 "
        "ORDER BY match_score DESC", halacha_id,
    )
    return [
        {
            "treatment": r["treatment"],
            "match_score": float(r["match_score"]) if r["match_score"] is not None else None,
            "match_context": r["match_context"],
            "citing_case_law_id": r["citing_case_law_id"],
            "citing_decision_id": r["citing_decision_id"],
            "created_at": r["created_at"].isoformat() if r["created_at"] else None,
        }
        for r in rows
    ]


async def search_precedent_library_semantic(
    query_embedding: list[float],
    practice_area: str = "",
    court: str = "",
    precedent_level: str = "",
    appeal_subtype: str = "",
    is_binding: bool | None = None,
    subject_tag: str = "",
    limit: int = 10,
    include_halachot: bool = True,
    source_kind: str = "external_upload",
    district: str = "",
    chair_name: str = "",
) -> list[dict]:
    """Semantic search over precedents filtered by source_kind.

    source_kind='external_upload'  → court rulings (default)
    source_kind='internal_committee' → appeals-committee decisions

    Returns merged halachot + chunks. Halachot are pre-distilled rules, so
    they get a small score boost. Only ``approved`` / ``published`` halachot
    are visible (per chair-review policy). Chunks are visible regardless
    of halacha review status.
    """
    pool = await get_pool()
    halacha_filters = [
        "h.review_status IN ('approved', 'published')",
        f"cl.source_kind = '{source_kind}'",
        "cl.searchable = true",
    ]
    chunk_filters = [f"cl.source_kind = '{source_kind}'", "cl.searchable = true"]
    h_params: list = [query_embedding, limit]
    c_params: list = [query_embedding, limit]
    h_idx = 3
    c_idx = 3

    if practice_area:
        halacha_filters.append(f"${h_idx} = ANY(h.practice_areas)")
        h_params.append(practice_area)
        h_idx += 1
        chunk_filters.append(f"cl.practice_area = ${c_idx}")
        c_params.append(practice_area)
        c_idx += 1
    if court:
        halacha_filters.append(f"cl.court ILIKE ${h_idx}")
        h_params.append(f"%{court}%")
        h_idx += 1
        chunk_filters.append(f"cl.court ILIKE ${c_idx}")
        c_params.append(f"%{court}%")
        c_idx += 1
    if precedent_level:
        halacha_filters.append(f"cl.precedent_level = ${h_idx}")
        h_params.append(precedent_level)
        h_idx += 1
        chunk_filters.append(f"cl.precedent_level = ${c_idx}")
        c_params.append(precedent_level)
        c_idx += 1
    if appeal_subtype:
        halacha_filters.append(f"cl.appeal_subtype = ${h_idx}")
        h_params.append(appeal_subtype)
        h_idx += 1
        chunk_filters.append(f"cl.appeal_subtype = ${c_idx}")
        c_params.append(appeal_subtype)
        c_idx += 1
    if is_binding is not None:
        halacha_filters.append(f"cl.is_binding = ${h_idx}")
        h_params.append(is_binding)
        h_idx += 1
        chunk_filters.append(f"cl.is_binding = ${c_idx}")
        c_params.append(is_binding)
        c_idx += 1
    if subject_tag:
        halacha_filters.append(f"${h_idx} = ANY(h.subject_tags)")
        h_params.append(subject_tag)
        h_idx += 1
    if district:
        halacha_filters.append(f"cl.district = ${h_idx}")
        h_params.append(district)
        h_idx += 1
        chunk_filters.append(f"cl.district = ${c_idx}")
        c_params.append(district)
        c_idx += 1
    if chair_name:
        halacha_filters.append(f"cl.chair_name = ${h_idx}")
        h_params.append(chair_name)
        h_idx += 1
        chunk_filters.append(f"cl.chair_name = ${c_idx}")
        c_params.append(chair_name)
        c_idx += 1

    halacha_sql = f"""
        SELECT h.id AS halacha_id, h.case_law_id, h.rule_statement,
               h.reasoning_summary, h.supporting_quote, h.page_reference,
               h.practice_areas, h.subject_tags, h.confidence, h.rule_type,
               cl.case_number, cl.case_name, cl.court, cl.date AS decision_date,
               cl.precedent_level, cl.chair_name, cl.district,
               1 - (h.embedding <=> $1) AS score
        FROM halachot h
        JOIN case_law cl ON cl.id = h.case_law_id
        WHERE {' AND '.join(halacha_filters)}
          AND h.embedding IS NOT NULL
        ORDER BY h.embedding <=> $1
        LIMIT $2
    """

    # Parent-doc retrieval (V17 / TaskMaster #48): the LEFT JOIN
    # surfaces each chunk's parent_chunk's content alongside it. When
    # ``config.PARENT_DOC_RETRIEVAL_ENABLED`` is true *and* the row has
    # a non-null parent, the post-processing loop swaps in the parent's
    # content so the writer sees the broader passage instead of the
    # 300-token sliver that matched. Legacy rows (parent_chunk_id NULL)
    # are unaffected — the JOIN returns NULL parent_* and the swap is a
    # no-op. Index ``idx_precedent_chunks_role`` is not used here
    # intentionally: filtering on chunk_role='child' would exclude
    # legacy single-tier rows that default to 'child' but have no
    # parent; an embedding-IS-NOT-NULL filter is equivalent because
    # parents store NULL embeddings.
    chunk_sql = f"""
        SELECT pc.id AS chunk_id, pc.case_law_id, pc.content,
               pc.section_type, pc.page_number,
               pc.parent_chunk_id,
               parent.content AS parent_content,
               parent.section_type AS parent_section_type,
               parent.page_number AS parent_page_number,
               cl.case_number, cl.case_name, cl.court, cl.date AS decision_date,
               cl.precedent_level, cl.practice_area, cl.chair_name, cl.district,
               1 - (pc.embedding <=> $1) AS score
        FROM precedent_chunks pc
        JOIN case_law cl ON cl.id = pc.case_law_id
        LEFT JOIN precedent_chunks parent
            ON parent.id = pc.parent_chunk_id
        WHERE {' AND '.join(chunk_filters)}
          AND pc.embedding IS NOT NULL
          -- #55: exclude tiny fragment chunks (artifacts of pre-fix
          -- mid-sentence header splits) that carry no retrievable signal.
          AND length(trim(pc.content)) >= 50
        ORDER BY pc.embedding <=> $1
        LIMIT $2
    """

    results: list[dict] = []
    if include_halachot:
        rows = await pool.fetch(halacha_sql, *h_params)
        for r in rows:
            d = dict(r)
            if d.get("decision_date") is not None:
                d["decision_date"] = d["decision_date"].isoformat()
            # Dynamic rule-level boost: scales with extractor confidence
            # so high-conf halachot rank higher than low-conf ones.
            # conf=0.78 → +0.047, conf=0.90 → +0.054, conf=0.95 → +0.057
            # Calibrated so the average (≈0.85) stays at +0.05 (legacy value).
            _conf = float(d.get("confidence") or 0.0)
            d["score"] = float(d["score"]) + max(_conf * 0.06, 0.0)
            d["type"] = "halacha"
            results.append(d)

    rows = await pool.fetch(chunk_sql, *c_params)
    for r in rows:
        d = dict(r)
        if d.get("decision_date") is not None:
            d["decision_date"] = d["decision_date"].isoformat()
        d["score"] = float(d["score"])
        d["type"] = "passage"
        _maybe_swap_parent(d)
        results.append(d)

    results.sort(key=lambda x: x["score"], reverse=True)
    # Dedupe: when multiple child hits share the same parent, we'd
    # otherwise return duplicate parent content. Keep the highest-
    # scoring hit per parent (skip if parent swap disabled or row has
    # no parent — chunk_id alone remains unique).
    return _dedupe_by_parent(results, limit)


def _maybe_swap_parent(row: dict) -> None:
    """Promote parent content into ``content`` when the flag is on
    and the row has a non-NULL parent. Mutates ``row`` in place.

    Adds debug fields ``child_content`` / ``child_section_type`` /
    ``child_page_number`` so callers can see what originally matched.
    Strips the ``parent_*`` keys that come back from the LEFT JOIN —
    they're an implementation detail of the swap.
    """
    parent_content = row.pop("parent_content", None)
    parent_section = row.pop("parent_section_type", None)
    parent_page = row.pop("parent_page_number", None)
    if (
        config.PARENT_DOC_RETRIEVAL_ENABLED
        and row.get("parent_chunk_id") is not None
        and parent_content
    ):
        row["child_content"] = row.get("content")
        row["child_section_type"] = row.get("section_type")
        row["child_page_number"] = row.get("page_number")
        row["content"] = parent_content
        # Parent's section_type is authoritative for the swapped row
        # (children inherit from their parent, but a parent that spans
        # a boundary uses its first section's type — same convention).
        if parent_section:
            row["section_type"] = parent_section
        if parent_page is not None:
            row["page_number"] = parent_page
        row["parent_swap"] = True


def _dedupe_by_parent(rows: list[dict], limit: int) -> list[dict]:
    """When parent-doc swap is active, multiple children sharing a
    parent collapse to one parent row (the highest-scored child wins).
    Rows without a parent (legacy chunks, halachot) pass through
    unchanged.
    """
    if not config.PARENT_DOC_RETRIEVAL_ENABLED:
        return rows[:limit]
    seen_parents: set = set()
    out: list[dict] = []
    for r in rows:
        pid = r.get("parent_chunk_id")
        if pid and r.get("parent_swap"):
            if pid in seen_parents:
                continue
            seen_parents.add(pid)
        out.append(r)
        if len(out) >= limit:
            break
    return out


async def search_precedent_library_lexical(
    *,
    query: str,
    practice_area: str = "",
    court: str = "",
    precedent_level: str = "",
    appeal_subtype: str = "",
    is_binding: bool | None = None,
    subject_tag: str = "",
    source_kind: str = "external_upload",
    district: str = "",
    chair_name: str = "",
    limit: int = 30,
    include_halachot: bool = True,
) -> list[dict]:
    """Lexical (BM25-like) search via ``ts_rank_cd`` over ``content_tsv``
    and ``rule_tsv`` (V12 columns).

    Mirrors the filter set of :func:`search_precedent_library_semantic`
    so the two layers can be fused 1:1 by rank in
    :mod:`hybrid_search` via RRF.

    Why ``plainto_tsquery``: it accepts free-text input, lowercases, and
    AND-joins the terms — matches the bi-encoder's "all words contribute"
    assumption better than ``websearch_to_tsquery`` (which inserts ORs).
    Empty / stopword-only queries return zero rows (no error).

    Why ``ts_rank_cd``: cover density variant — rewards documents where
    the query terms appear close together (e.g. "1461/20 אנטרים" matches
    the same paragraph). Higher is more relevant.
    """
    if not (query or "").strip():
        return []

    pool = await get_pool()
    halacha_filters = [
        "h.review_status IN ('approved', 'published')",
        f"cl.source_kind = '{source_kind}'",
        "cl.searchable = true",
    ]
    chunk_filters = [f"cl.source_kind = '{source_kind}'", "cl.searchable = true"]
    # $1 = query, $2 = limit. Filters append starting at $3.
    h_params: list = [query, limit]
    c_params: list = [query, limit]
    h_idx = 3
    c_idx = 3

    if practice_area:
        halacha_filters.append(f"${h_idx} = ANY(h.practice_areas)")
        h_params.append(practice_area)
        h_idx += 1
        chunk_filters.append(f"cl.practice_area = ${c_idx}")
        c_params.append(practice_area)
        c_idx += 1
    if court:
        halacha_filters.append(f"cl.court ILIKE ${h_idx}")
        h_params.append(f"%{court}%")
        h_idx += 1
        chunk_filters.append(f"cl.court ILIKE ${c_idx}")
        c_params.append(f"%{court}%")
        c_idx += 1
    if precedent_level:
        halacha_filters.append(f"cl.precedent_level = ${h_idx}")
        h_params.append(precedent_level)
        h_idx += 1
        chunk_filters.append(f"cl.precedent_level = ${c_idx}")
        c_params.append(precedent_level)
        c_idx += 1
    if appeal_subtype:
        halacha_filters.append(f"cl.appeal_subtype = ${h_idx}")
        h_params.append(appeal_subtype)
        h_idx += 1
        chunk_filters.append(f"cl.appeal_subtype = ${c_idx}")
        c_params.append(appeal_subtype)
        c_idx += 1
    if is_binding is not None:
        halacha_filters.append(f"cl.is_binding = ${h_idx}")
        h_params.append(is_binding)
        h_idx += 1
        chunk_filters.append(f"cl.is_binding = ${c_idx}")
        c_params.append(is_binding)
        c_idx += 1
    if subject_tag:
        halacha_filters.append(f"${h_idx} = ANY(h.subject_tags)")
        h_params.append(subject_tag)
        h_idx += 1
    if district:
        halacha_filters.append(f"cl.district = ${h_idx}")
        h_params.append(district)
        h_idx += 1
        chunk_filters.append(f"cl.district = ${c_idx}")
        c_params.append(district)
        c_idx += 1
    if chair_name:
        halacha_filters.append(f"cl.chair_name = ${h_idx}")
        h_params.append(chair_name)
        h_idx += 1
        chunk_filters.append(f"cl.chair_name = ${c_idx}")
        c_params.append(chair_name)
        c_idx += 1

    halacha_sql = f"""
        SELECT h.id AS halacha_id, h.case_law_id, h.rule_statement,
               h.reasoning_summary, h.supporting_quote, h.page_reference,
               h.practice_areas, h.subject_tags, h.confidence, h.rule_type,
               cl.case_number, cl.case_name, cl.court, cl.date AS decision_date,
               cl.precedent_level, cl.chair_name, cl.district,
               GREATEST(
                   ts_rank_cd(h.rule_tsv, plainto_tsquery('simple', $1)),
                   ts_rank_cd(cl.meta_tsv, plainto_tsquery('simple', $1))
               )
               + CASE WHEN cl.meta_tsv @@ plainto_tsquery('simple', $1)
                      THEN 1.0 ELSE 0.0 END AS score
        FROM halachot h
        JOIN case_law cl ON cl.id = h.case_law_id
        WHERE {' AND '.join(halacha_filters)}
          AND (h.rule_tsv @@ plainto_tsquery('simple', $1)
               OR cl.meta_tsv @@ plainto_tsquery('simple', $1))
        ORDER BY score DESC
        LIMIT $2
    """

    # Parent-doc retrieval (V17) — same LEFT JOIN strategy as the
    # semantic side. The tsvector match still runs over the child's
    # ``content_tsv``; only the *returned* content is promoted to the
    # parent when the flag is on and a parent exists. See
    # :func:`search_precedent_library_semantic` for the rationale.
    # We intentionally restrict matching to chunks with an embedding
    # (i.e. children + legacy single-tier rows). Hierarchical parents
    # store NULL embeddings, so even though their ``content_tsv`` is
    # populated they're excluded here — preventing a parent from
    # matching directly and then being "swapped" with itself.
    chunk_sql = f"""
        SELECT pc.id AS chunk_id, pc.case_law_id, pc.content,
               pc.section_type, pc.page_number,
               pc.parent_chunk_id,
               parent.content AS parent_content,
               parent.section_type AS parent_section_type,
               parent.page_number AS parent_page_number,
               cl.case_number, cl.case_name, cl.court, cl.date AS decision_date,
               cl.precedent_level, cl.practice_area, cl.chair_name, cl.district,
               GREATEST(
                   ts_rank_cd(pc.content_tsv, plainto_tsquery('simple', $1)),
                   ts_rank_cd(cl.meta_tsv, plainto_tsquery('simple', $1))
               )
               + CASE WHEN cl.meta_tsv @@ plainto_tsquery('simple', $1)
                      THEN 1.0 ELSE 0.0 END AS score
        FROM precedent_chunks pc
        JOIN case_law cl ON cl.id = pc.case_law_id
        LEFT JOIN precedent_chunks parent
            ON parent.id = pc.parent_chunk_id
        WHERE {' AND '.join(chunk_filters)}
          AND pc.embedding IS NOT NULL
          -- #55: exclude tiny fragment chunks (see semantic query above).
          AND length(trim(pc.content)) >= 50
          AND (pc.content_tsv @@ plainto_tsquery('simple', $1)
               OR cl.meta_tsv @@ plainto_tsquery('simple', $1))
        ORDER BY score DESC
        LIMIT $2
    """

    results: list[dict] = []
    if include_halachot:
        rows = await pool.fetch(halacha_sql, *h_params)
        for r in rows:
            d = dict(r)
            if d.get("decision_date") is not None:
                d["decision_date"] = d["decision_date"].isoformat()
            d["score"] = float(d["score"])
            d["type"] = "halacha"
            results.append(d)

    rows = await pool.fetch(chunk_sql, *c_params)
    for r in rows:
        d = dict(r)
        if d.get("decision_date") is not None:
            d["decision_date"] = d["decision_date"].isoformat()
        d["score"] = float(d["score"])
        d["type"] = "passage"
        _maybe_swap_parent(d)
        results.append(d)

    results.sort(key=lambda x: x["score"], reverse=True)
    return _dedupe_by_parent(results, limit)


async def precedent_library_stats() -> dict:
    """Aggregate stats for the /precedents stats tab."""
    pool = await get_pool()
    async with pool.acquire() as conn:
        total = await conn.fetchval(
            "SELECT COUNT(*) FROM case_law"
        )
        by_practice = await conn.fetch(
            """SELECT practice_area, COUNT(*) AS n
               FROM case_law
               GROUP BY practice_area
               ORDER BY n DESC"""
        )
        by_level = await conn.fetch(
            """SELECT precedent_level, COUNT(*) AS n
               FROM case_law
               GROUP BY precedent_level
               ORDER BY n DESC"""
        )
        halachot_total = await conn.fetchval(
            "SELECT COUNT(*) FROM halachot"
        )
        halachot_pending = await conn.fetchval(
            "SELECT COUNT(*) FROM halachot WHERE review_status = 'pending_review'"
        )
        halachot_approved = await conn.fetchval(
            "SELECT COUNT(*) FROM halachot WHERE review_status IN ('approved', 'published')"
        )
    return {
        "precedents_total": int(total or 0),
        "by_practice_area": [
            {"practice_area": r["practice_area"], "count": int(r["n"])}
            for r in by_practice
        ],
        "by_precedent_level": [
            {"precedent_level": r["precedent_level"], "count": int(r["n"])}
            for r in by_level
        ],
        "halachot_total": int(halachot_total or 0),
        "halachot_pending": int(halachot_pending or 0),
        "halachot_approved": int(halachot_approved or 0),
    }


# ── V8: extraction request queue helpers ─────────────────────────


async def request_metadata_extraction(case_law_id: UUID) -> bool:
    """Stamp ``metadata_extraction_requested_at`` for the local MCP worker
    to pick up. Returns False if the row is missing.

    Originally restricted to ``source_kind='external_upload'`` (see git
    blame). Opened to all source kinds 2026-05-06 — internal_committee
    rows can also need re-extraction (e.g. corrupted subject_tags from
    an early ingest pipeline). The extractor itself preserves user
    values (``precedent_metadata_extractor.extract_and_apply`` only
    fills empty fields), so this is safe.
    """
    pool = await get_pool()
    # Reset the status to 'pending' alongside the timestamp so a re-request
    # after a prior 'completed'/'failed' run shows "בתור" again in the UI
    # instead of a stale terminal badge.
    result = await pool.execute(
        "UPDATE case_law SET metadata_extraction_requested_at = now(), "
        "metadata_extraction_status = 'pending' "
        "WHERE id = $1",
        case_law_id,
    )
    return result == "UPDATE 1"


async def request_halacha_extraction(case_law_id: UUID) -> bool:
    """Same but for halacha extraction. See note on
    :func:`request_metadata_extraction` re: opening to all source kinds."""
    pool = await get_pool()
    result = await pool.execute(
        "UPDATE case_law SET halacha_extraction_requested_at = now() "
        "WHERE id = $1",
        case_law_id,
    )
    return result == "UPDATE 1"


async def list_pending_extraction_requests(
    kind: str = "metadata",  # 'metadata' | 'halacha'
    limit: int = 20,
) -> list[dict]:
    """Return rows requesting extraction, oldest request first.

    The MCP worker drains the queue in order: process → clear timestamp.
    """
    col = (
        "metadata_extraction_requested_at"
        if kind == "metadata"
        else "halacha_extraction_requested_at"
    )
    pool = await get_pool()
    # Drop the legacy ``source_kind = 'external_upload'`` filter — without it
    # internal_committee rows could be stamped (we opened that gate in
    # request_metadata_extraction / request_halacha_extraction) but stayed
    # invisible to the worker forever.
    rows = await pool.fetch(
        f"""SELECT id, case_number, case_name, court, date,
                  practice_area, is_binding, {col} AS requested_at
            FROM case_law
            WHERE {col} IS NOT NULL
            ORDER BY {col} ASC
            LIMIT $1""",
        limit,
    )
    out = []
    for r in rows:
        d = dict(r)
        if d.get("date") is not None:
            d["date"] = d["date"].isoformat()
        if d.get("requested_at") is not None:
            d["requested_at"] = d["requested_at"].isoformat()
        out.append(d)
    return out


async def extraction_queue_status() -> dict:
    """Pending-extraction queue depth per kind (INV-TOOL4 visibility / GAP-45).

    Surfaces the otherwise-hidden queue that ``process_pending_extractions``
    drains: how many case_law rows still carry a metadata/halacha extraction
    request, and the age of the oldest one. Read-only — does not drain.
    """
    pool = await get_pool()
    async with pool.acquire() as conn:
        meta = await conn.fetchrow(
            "SELECT COUNT(*) AS n, MIN(metadata_extraction_requested_at) AS oldest "
            "FROM case_law WHERE metadata_extraction_requested_at IS NOT NULL"
        )
        hal = await conn.fetchrow(
            "SELECT COUNT(*) AS n, MIN(halacha_extraction_requested_at) AS oldest "
            "FROM case_law WHERE halacha_extraction_requested_at IS NOT NULL"
        )

    def _fmt(r: dict) -> dict:
        oldest = r["oldest"]
        return {"pending": r["n"], "oldest_request": oldest.isoformat() if oldest else None}

    return {"metadata": _fmt(meta), "halacha": _fmt(hal)}


async def clear_extraction_request(
    case_law_id: UUID, kind: str = "metadata",
) -> None:
    col = (
        "metadata_extraction_requested_at"
        if kind == "metadata"
        else "halacha_extraction_requested_at"
    )
    pool = await get_pool()
    await pool.execute(
        f"UPDATE case_law SET {col} = NULL WHERE id = $1",
        case_law_id,
    )


# ── V9: Multimodal page image embeddings ─────────────────────────


async def store_document_image_embeddings(
    document_id: UUID,
    case_id: UUID | None,
    page_records: list[dict],
    model_name: str = "voyage-multimodal-3",
) -> int:
    """Replace per-page image embeddings for a document.

    Each ``page_records`` entry: ``{page_number, embedding, image_thumbnail_path}``.
    Embeddings should already be 1024-dim lists (or None for skipped pages).
    """
    pool = await get_pool()
    async with pool.acquire() as conn:
        await conn.execute(
            "DELETE FROM document_image_embeddings WHERE document_id = $1",
            document_id,
        )
        for r in page_records:
            await conn.execute(
                """INSERT INTO document_image_embeddings
                   (document_id, case_id, page_number, embedding,
                    image_thumbnail_path, model_name)
                   VALUES ($1, $2, $3, $4, $5, $6)""",
                document_id, case_id,
                r["page_number"],
                r.get("embedding"),
                r.get("image_thumbnail_path"),
                model_name,
            )
    return len(page_records)


async def store_precedent_image_embeddings(
    case_law_id: UUID,
    page_records: list[dict],
    model_name: str = "voyage-multimodal-3",
) -> int:
    """Same pattern as store_document_image_embeddings but for precedents."""
    pool = await get_pool()
    async with pool.acquire() as conn:
        await conn.execute(
            "DELETE FROM precedent_image_embeddings WHERE case_law_id = $1",
            case_law_id,
        )
        for r in page_records:
            await conn.execute(
                """INSERT INTO precedent_image_embeddings
                   (case_law_id, page_number, embedding,
                    image_thumbnail_path, model_name)
                   VALUES ($1, $2, $3, $4, $5)""",
                case_law_id,
                r["page_number"],
                r.get("embedding"),
                r.get("image_thumbnail_path"),
                model_name,
            )
    return len(page_records)


async def search_document_images_similar(
    query_embedding: list[float],
    limit: int = 10,
    case_id: UUID | None = None,
    practice_area: str | None = None,
    appeal_subtype: str | None = None,
) -> list[dict]:
    """Cosine search over per-page image embeddings of case documents."""
    pool = await get_pool()
    conditions: list[str] = []
    params: list = [query_embedding, limit]
    idx = 3
    if case_id:
        conditions.append(f"die.case_id = ${idx}")
        params.append(case_id); idx += 1
    if practice_area:
        conditions.append(f"c.practice_area = ${idx}")
        params.append(practice_area); idx += 1
    if appeal_subtype:
        conditions.append(f"c.appeal_subtype = ${idx}")
        params.append(appeal_subtype); idx += 1
    where = f"WHERE {' AND '.join(conditions)}" if conditions else ""
    sql = f"""
        SELECT die.document_id, die.case_id, die.page_number,
               die.image_thumbnail_path,
               d.title AS document_title,
               c.case_number,
               1 - (die.embedding <=> $1) AS score
        FROM document_image_embeddings die
        JOIN documents d ON d.id = die.document_id
        JOIN cases c ON c.id = die.case_id
        {where}
        ORDER BY die.embedding <=> $1
        LIMIT $2
    """
    async with pool.acquire() as conn:
        rows = await conn.fetch(sql, *params)
    return [dict(r) for r in rows]


async def search_precedent_images_similar(
    query_embedding: list[float],
    limit: int = 10,
    practice_area: str = "",
    court: str = "",
    precedent_level: str = "",
    appeal_subtype: str = "",
    is_binding: bool | None = None,
) -> list[dict]:
    """Cosine search over per-page image embeddings of precedent rulings."""
    pool = await get_pool()
    conditions: list[str] = ["cl.source_kind = 'external_upload'"]
    params: list = [query_embedding, limit]
    idx = 3
    if practice_area:
        conditions.append(f"cl.practice_area = ${idx}")
        params.append(practice_area); idx += 1
    if court:
        conditions.append(f"cl.court ILIKE ${idx}")
        params.append(f"%{court}%"); idx += 1
    if precedent_level:
        conditions.append(f"cl.precedent_level = ${idx}")
        params.append(precedent_level); idx += 1
    if appeal_subtype:
        conditions.append(f"cl.appeal_subtype = ${idx}")
        params.append(appeal_subtype); idx += 1
    if is_binding is not None:
        conditions.append(f"cl.is_binding = ${idx}")
        params.append(is_binding); idx += 1
    where = " AND ".join(conditions)
    sql = f"""
        SELECT pie.case_law_id, pie.page_number, pie.image_thumbnail_path,
               cl.case_number, cl.case_name, cl.court, cl.date AS decision_date,
               cl.precedent_level, cl.practice_area,
               1 - (pie.embedding <=> $1) AS score
        FROM precedent_image_embeddings pie
        JOIN case_law cl ON cl.id = pie.case_law_id
        WHERE {where}
        ORDER BY pie.embedding <=> $1
        LIMIT $2
    """
    async with pool.acquire() as conn:
        rows = await conn.fetch(sql, *params)
    out = []
    for r in rows:
        d = dict(r)
        if d.get("decision_date") is not None:
            d["decision_date"] = d["decision_date"].isoformat()
        out.append(d)
    return out


async def search_similar_hybrid(
    query_text_embedding: list[float],
    query_image_embedding: list[float],
    limit: int = 10,
    fetch_k: int = 30,
    text_weight: float = 0.65,
    case_id: UUID | None = None,
    section_type: str | None = None,
    practice_area: str | None = None,
    appeal_subtype: str | None = None,
) -> list[dict]:
    """Weighted merge of text-chunk and per-page image search.

    Same (document_id, page_number) → boost text chunk by image score
    on that page. Image-only pages with no overlapping text chunk are
    surfaced as ``match_type='image'`` so dense scanned content still
    appears in results.
    """
    img_weight = 1.0 - text_weight
    text_rows = await search_similar(
        query_text_embedding, limit=fetch_k, case_id=case_id,
        section_type=section_type, practice_area=practice_area,
        appeal_subtype=appeal_subtype,
    )
    img_rows = await search_document_images_similar(
        query_image_embedding, limit=fetch_k, case_id=case_id,
        practice_area=practice_area, appeal_subtype=appeal_subtype,
    )
    img_by_page: dict[tuple, dict] = {
        (str(r["document_id"]), r["page_number"]): r for r in img_rows
    }
    seen: set = set()
    merged: list[dict] = []
    for r in text_rows:
        page = r.get("page_number")
        key = (str(r["document_id"]), page) if page is not None else None
        img_hit = img_by_page.get(key) if key else None
        text_score = float(r["score"])
        image_score = float(img_hit["score"]) if img_hit else 0.0
        d = dict(r)
        d["text_score"] = text_score
        d["image_score"] = image_score
        d["score"] = text_score * text_weight + image_score * img_weight
        d["match_type"] = "text+image" if img_hit else "text"
        if img_hit:
            d["image_thumbnail_path"] = img_hit.get("image_thumbnail_path")
        merged.append(d)
        if key:
            seen.add(key)
    for r in img_rows:
        key = (str(r["document_id"]), r["page_number"])
        if key in seen:
            continue
        d = dict(r)
        d["text_score"] = 0.0
        d["image_score"] = float(r["score"])
        d["score"] = float(r["score"]) * img_weight
        d["match_type"] = "image"
        d["content"] = ""
        d["section_type"] = "image"
        merged.append(d)
    merged.sort(key=lambda x: -x["score"])
    return merged[:limit]


async def search_precedent_library_hybrid(
    query_text_embedding: list[float],
    query_image_embedding: list[float],
    limit: int = 10,
    fetch_k: int = 30,
    text_weight: float = 0.65,
    practice_area: str = "",
    court: str = "",
    precedent_level: str = "",
    appeal_subtype: str = "",
    is_binding: bool | None = None,
    subject_tag: str = "",
    include_halachot: bool = True,
) -> list[dict]:
    """Hybrid variant of search_precedent_library_semantic.

    Halachot have no ``page_number`` — they're boosted by the max
    image score from any page in the same case_law row.
    """
    img_weight = 1.0 - text_weight
    text_results = await search_precedent_library_semantic(
        query_text_embedding,
        practice_area=practice_area, court=court,
        precedent_level=precedent_level, appeal_subtype=appeal_subtype,
        is_binding=is_binding, subject_tag=subject_tag,
        limit=fetch_k, include_halachot=include_halachot,
    )
    img_results = await search_precedent_images_similar(
        query_image_embedding, limit=fetch_k,
        practice_area=practice_area, court=court,
        precedent_level=precedent_level, appeal_subtype=appeal_subtype,
        is_binding=is_binding,
    )
    img_by_page: dict[tuple, dict] = {}
    img_by_case: dict[str, float] = {}
    for r in img_results:
        cid = str(r["case_law_id"])
        img_by_page[(cid, r["page_number"])] = r
        img_by_case[cid] = max(img_by_case.get(cid, 0.0), float(r["score"]))
    seen: set = set()
    merged: list[dict] = []
    for r in text_results:
        cid = str(r["case_law_id"])
        page = r.get("page_number")
        key = (cid, page) if page is not None else None
        img_hit = img_by_page.get(key) if key else None
        if img_hit:
            image_score = float(img_hit["score"])
        elif r.get("type") == "halacha":
            image_score = img_by_case.get(cid, 0.0)
        else:
            image_score = 0.0
        text_score = float(r["score"])
        d = dict(r)
        d["text_score"] = text_score
        d["image_score"] = image_score
        d["score"] = text_score * text_weight + image_score * img_weight
        if img_hit:
            d["image_thumbnail_path"] = img_hit.get("image_thumbnail_path")
        if key:
            seen.add(key)
        merged.append(d)
    for r in img_results:
        key = (str(r["case_law_id"]), r["page_number"])
        if key in seen:
            continue
        d = dict(r)
        d["text_score"] = 0.0
        d["image_score"] = float(r["score"])
        d["score"] = float(r["score"]) * img_weight
        d["type"] = "image_page"
        d["content"] = ""
        d["section_type"] = "image"
        merged.append(d)
    merged.sort(key=lambda x: -x["score"])
    return merged[:limit]


# ── Missing precedents (V13) ───────────────────────────────────────
# Track citations from party briefs that aren't yet in the corpus.
# Lifecycle: 'open' → researcher logs gap → chair uploads decision
# → status='uploaded' (file ingested) → status='closed' (linked to
# case_law row). 'irrelevant' = chair decided the citation isn't worth
# adding to the library.

ALLOWED_MP_PARTIES = {
    "appellant", "respondent", "committee", "permit_applicant", "unknown",
}
ALLOWED_MP_STATUS = {"open", "uploaded", "closed", "irrelevant"}


def _row_to_missing_precedent(row: asyncpg.Record) -> dict:
    d = dict(row)
    d["id"] = str(d["id"])
    if d.get("cited_in_case_id") is not None:
        d["cited_in_case_id"] = str(d["cited_in_case_id"])
    if d.get("cited_in_document_id") is not None:
        d["cited_in_document_id"] = str(d["cited_in_document_id"])
    if d.get("linked_case_law_id") is not None:
        d["linked_case_law_id"] = str(d["linked_case_law_id"])
    return d


async def create_missing_precedent(
    citation: str,
    case_name: str | None = None,
    cited_in_case_id: UUID | None = None,
    cited_in_document_id: UUID | None = None,
    cited_by_party: str | None = None,
    cited_by_party_name: str | None = None,
    legal_topic: str | None = None,
    legal_issue: str | None = None,
    claim_quote: str | None = None,
    notes: str | None = None,
) -> dict:
    """Create a new missing-precedent row (status='open' by default)."""
    if not citation.strip():
        raise ValueError("citation is required")
    if cited_by_party and cited_by_party not in ALLOWED_MP_PARTIES:
        raise ValueError(
            f"cited_by_party must be one of {sorted(ALLOWED_MP_PARTIES)}"
        )
    pool = await get_pool()
    async with pool.acquire() as conn:
        row = await conn.fetchrow(
            """INSERT INTO missing_precedents (
                citation, case_name, cited_in_case_id, cited_in_document_id,
                cited_by_party, cited_by_party_name, legal_topic, legal_issue,
                claim_quote, notes
            )
            VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
            RETURNING *""",
            citation.strip(), case_name, cited_in_case_id, cited_in_document_id,
            cited_by_party, cited_by_party_name, legal_topic, legal_issue,
            claim_quote, notes,
        )
    return _row_to_missing_precedent(row)


async def list_missing_precedents(
    status: str | None = None,
    case_id: UUID | None = None,
    legal_topic: str | None = None,
    limit: int = 200,
    offset: int = 0,
) -> list[dict]:
    """List missing precedents, joining the cited-in case_number for display."""
    pool = await get_pool()
    conditions: list[str] = []
    params: list = []
    idx = 1
    if status:
        conditions.append(f"mp.status = ${idx}")
        params.append(status)
        idx += 1
    if case_id:
        conditions.append(f"mp.cited_in_case_id = ${idx}")
        params.append(case_id)
        idx += 1
    if legal_topic:
        conditions.append(f"mp.legal_topic ILIKE ${idx}")
        params.append(f"%{legal_topic}%")
        idx += 1
    where = f"WHERE {' AND '.join(conditions)}" if conditions else ""
    params.append(limit)
    params.append(offset)
    sql = f"""
        SELECT mp.*,
               c.case_number AS cited_in_case_number,
               cl.case_number AS linked_case_law_number,
               cl.case_name AS linked_case_law_name
        FROM missing_precedents mp
        LEFT JOIN cases c ON c.id = mp.cited_in_case_id
        LEFT JOIN case_law cl ON cl.id = mp.linked_case_law_id
        {where}
        ORDER BY
            CASE mp.status
                WHEN 'open' THEN 0
                WHEN 'uploaded' THEN 1
                WHEN 'closed' THEN 2
                WHEN 'irrelevant' THEN 3
            END,
            mp.created_at DESC
        LIMIT ${idx} OFFSET ${idx + 1}
    """
    async with pool.acquire() as conn:
        rows = await conn.fetch(sql, *params)
    return [_row_to_missing_precedent(r) for r in rows]


async def get_missing_precedent(mp_id: UUID) -> dict | None:
    pool = await get_pool()
    async with pool.acquire() as conn:
        row = await conn.fetchrow(
            """
            SELECT mp.*,
                   c.case_number AS cited_in_case_number,
                   cl.case_number AS linked_case_law_number,
                   cl.case_name AS linked_case_law_name
            FROM missing_precedents mp
            LEFT JOIN cases c ON c.id = mp.cited_in_case_id
            LEFT JOIN case_law cl ON cl.id = mp.linked_case_law_id
            WHERE mp.id = $1
            """,
            mp_id,
        )
    return _row_to_missing_precedent(row) if row else None


async def update_missing_precedent(mp_id: UUID, **fields) -> dict | None:
    """Patch a missing-precedent row. Allowed fields: legal_topic,
    legal_issue, notes, cited_by_party, cited_by_party_name, case_name,
    status, linked_case_law_id, closed_at."""
    if not fields:
        return await get_missing_precedent(mp_id)
    allowed = {
        "legal_topic", "legal_issue", "notes", "cited_by_party",
        "cited_by_party_name", "case_name", "status", "linked_case_law_id",
        "closed_at", "claim_quote", "citation",
    }
    clean = {k: v for k, v in fields.items() if k in allowed}
    if not clean:
        return await get_missing_precedent(mp_id)
    if "status" in clean and clean["status"] not in ALLOWED_MP_STATUS:
        raise ValueError(
            f"status must be one of {sorted(ALLOWED_MP_STATUS)}"
        )
    if "cited_by_party" in clean and clean["cited_by_party"] and \
            clean["cited_by_party"] not in ALLOWED_MP_PARTIES:
        raise ValueError(
            f"cited_by_party must be one of {sorted(ALLOWED_MP_PARTIES)}"
        )
    set_clauses = []
    values = []
    for i, (key, val) in enumerate(clean.items(), start=2):
        set_clauses.append(f"{key} = ${i}")
        values.append(val)
    set_clauses.append("updated_at = now()")
    sql = (
        f"UPDATE missing_precedents SET {', '.join(set_clauses)} "
        f"WHERE id = $1 RETURNING *"
    )
    pool = await get_pool()
    async with pool.acquire() as conn:
        row = await conn.fetchrow(sql, mp_id, *values)
    return _row_to_missing_precedent(row) if row else None


async def close_missing_precedent(
    mp_id: UUID,
    linked_case_law_id: UUID | None = None,
    notes: str | None = None,
    status: str = "closed",
) -> dict | None:
    """Mark a missing-precedent row as closed (or 'uploaded'/'irrelevant')
    and link it to a case_law row if provided."""
    if status not in ALLOWED_MP_STATUS:
        raise ValueError(
            f"status must be one of {sorted(ALLOWED_MP_STATUS)}"
        )
    pool = await get_pool()
    async with pool.acquire() as conn:
        sets = ["status = $2", "closed_at = now()", "updated_at = now()"]
        params: list = [mp_id, status]
        idx = 3
        if linked_case_law_id is not None:
            sets.append(f"linked_case_law_id = ${idx}")
            params.append(linked_case_law_id)
            idx += 1
        if notes is not None:
            sets.append(f"notes = ${idx}")
            params.append(notes)
            idx += 1
        sql = (
            f"UPDATE missing_precedents SET {', '.join(sets)} "
            f"WHERE id = $1 RETURNING *"
        )
        row = await conn.fetchrow(sql, *params)
    return _row_to_missing_precedent(row) if row else None


async def find_missing_precedent_by_citation(
    citation: str,
    case_id: UUID | None = None,
) -> dict | None:
    """Look up an existing row by citation string (exact match) and optionally
    cited-in case_id. Used to deduplicate auto-creation by the researcher."""
    pool = await get_pool()
    async with pool.acquire() as conn:
        if case_id is not None:
            row = await conn.fetchrow(
                "SELECT * FROM missing_precedents "
                "WHERE citation = $1 AND cited_in_case_id = $2 LIMIT 1",
                citation.strip(), case_id,
            )
        else:
            row = await conn.fetchrow(
                "SELECT * FROM missing_precedents WHERE citation = $1 LIMIT 1",
                citation.strip(),
            )
    return _row_to_missing_precedent(row) if row else None