"""Database service - asyncpg connection pool and queries.""" from __future__ import annotations import json import logging from datetime import date from uuid import UUID, uuid4 import asyncpg from pgvector.asyncpg import register_vector from legal_mcp import config logger = logging.getLogger(__name__) _pool: asyncpg.Pool | None = None async def get_pool() -> asyncpg.Pool: global _pool if _pool is None: # First, ensure pgvector extension exists (before registering type codec) conn = await asyncpg.connect(config.POSTGRES_URL) await conn.execute('CREATE EXTENSION IF NOT EXISTS vector') await conn.execute('CREATE EXTENSION IF NOT EXISTS "uuid-ossp"') await conn.close() _pool = await asyncpg.create_pool( config.POSTGRES_URL, min_size=2, max_size=10, init=_init_connection, ) return _pool async def _init_connection(conn: asyncpg.Connection) -> None: await register_vector(conn) async def close_pool() -> None: global _pool if _pool: await _pool.close() _pool = None # ── Schema ────────────────────────────────────────────────────────── SCHEMA_SQL = """ CREATE TABLE IF NOT EXISTS cases ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), case_number TEXT UNIQUE NOT NULL, title TEXT NOT NULL, appellants JSONB DEFAULT '[]', respondents JSONB DEFAULT '[]', subject TEXT DEFAULT '', property_address TEXT DEFAULT '', permit_number TEXT DEFAULT '', committee_type TEXT DEFAULT 'ועדה מקומית', status TEXT DEFAULT 'new', hearing_date DATE, decision_date DATE, tags JSONB DEFAULT '[]', notes TEXT DEFAULT '', created_at TIMESTAMPTZ DEFAULT now(), updated_at TIMESTAMPTZ DEFAULT now() ); CREATE TABLE IF NOT EXISTS documents ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), case_id UUID REFERENCES cases(id) ON DELETE CASCADE, doc_type TEXT NOT NULL, title TEXT NOT NULL, file_path TEXT NOT NULL, extracted_text TEXT DEFAULT '', extraction_status TEXT DEFAULT 'pending', page_count INTEGER, metadata JSONB DEFAULT '{}', created_at TIMESTAMPTZ DEFAULT now() ); CREATE TABLE IF NOT EXISTS document_chunks ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), document_id UUID REFERENCES documents(id) ON DELETE CASCADE, case_id UUID REFERENCES cases(id) ON DELETE CASCADE, chunk_index INTEGER NOT NULL, content TEXT NOT NULL, section_type TEXT DEFAULT 'other', embedding vector(1024), page_number INTEGER, created_at TIMESTAMPTZ DEFAULT now() ); CREATE TABLE IF NOT EXISTS style_corpus ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), document_id UUID REFERENCES documents(id) ON DELETE SET NULL, decision_number TEXT, decision_date DATE, subject_categories JSONB DEFAULT '[]', full_text TEXT NOT NULL, summary TEXT DEFAULT '', outcome TEXT DEFAULT '', key_principles JSONB DEFAULT '[]', practice_area TEXT DEFAULT 'appeals_committee', appeal_subtype TEXT DEFAULT '', created_at TIMESTAMPTZ DEFAULT now() ); CREATE TABLE IF NOT EXISTS style_patterns ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), pattern_type TEXT NOT NULL, pattern_text TEXT NOT NULL, frequency INTEGER DEFAULT 1, context TEXT DEFAULT '', examples JSONB DEFAULT '[]', appeal_subtype TEXT DEFAULT '', created_at TIMESTAMPTZ DEFAULT now() ); CREATE INDEX IF NOT EXISTS idx_chunks_embedding ON document_chunks USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100); CREATE INDEX IF NOT EXISTS idx_chunks_case ON document_chunks(case_id); CREATE INDEX IF NOT EXISTS idx_chunks_doc ON document_chunks(document_id); CREATE INDEX IF NOT EXISTS idx_docs_case ON documents(case_id); CREATE INDEX IF NOT EXISTS idx_cases_status ON cases(status); CREATE INDEX IF NOT EXISTS idx_cases_number ON cases(case_number); """ MIGRATIONS_SQL = """ ALTER TABLE cases ADD COLUMN IF NOT EXISTS expected_outcome TEXT DEFAULT ''; CREATE TABLE IF NOT EXISTS audit_log ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), action TEXT NOT NULL, case_id UUID REFERENCES cases(id) ON DELETE SET NULL, document_id UUID REFERENCES documents(id) ON DELETE SET NULL, details JSONB DEFAULT '{}', actor TEXT DEFAULT 'system', created_at TIMESTAMPTZ DEFAULT now() ); CREATE INDEX IF NOT EXISTS idx_audit_case ON audit_log(case_id); CREATE INDEX IF NOT EXISTS idx_audit_action ON audit_log(action); CREATE INDEX IF NOT EXISTS idx_audit_created ON audit_log(created_at DESC); """ # ── Phase 3: Workflow expansion ──────────────────────────────────── SCHEMA_V3_SQL = """ -- הרחבת decisions עם שדות חדשים ALTER TABLE decisions ADD COLUMN IF NOT EXISTS direction_doc JSONB DEFAULT NULL; ALTER TABLE decisions ADD COLUMN IF NOT EXISTS outcome_reasoning TEXT DEFAULT ''; -- הרחבת cases עם appeal_type (אם לא קיים) ALTER TABLE cases ADD COLUMN IF NOT EXISTS appeal_type TEXT DEFAULT ''; ALTER TABLE cases ADD COLUMN IF NOT EXISTS practice_area TEXT DEFAULT 'appeals_committee'; ALTER TABLE cases ADD COLUMN IF NOT EXISTS appeal_subtype TEXT DEFAULT ''; -- active_draft_path = path to the DOCX that is the current source of truth -- for this case's decision text. Set to the latest טיוטה-v*.docx after export, -- or the latest עריכה-v*.docx after user upload. Used by revise_draft to know -- what file to base Track Changes revisions on. ALTER TABLE cases ADD COLUMN IF NOT EXISTS active_draft_path TEXT; -- הרחבת style_corpus עם practice_area / appeal_subtype ALTER TABLE style_corpus ADD COLUMN IF NOT EXISTS practice_area TEXT DEFAULT 'appeals_committee'; ALTER TABLE style_corpus ADD COLUMN IF NOT EXISTS appeal_subtype TEXT DEFAULT ''; -- הרחבת style_patterns עם appeal_subtype לניתוח סגנון נפרד לכל סוג ערר ALTER TABLE style_patterns ADD COLUMN IF NOT EXISTS appeal_subtype TEXT DEFAULT ''; -- טבלת qa_results CREATE TABLE IF NOT EXISTS qa_results ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), decision_id UUID REFERENCES decisions(id) ON DELETE CASCADE, case_id UUID REFERENCES cases(id) ON DELETE CASCADE, check_name TEXT NOT NULL, passed BOOLEAN NOT NULL, severity TEXT DEFAULT 'warning', errors JSONB DEFAULT '[]', details TEXT DEFAULT '', created_at TIMESTAMPTZ DEFAULT now() ); CREATE INDEX IF NOT EXISTS idx_qa_results_decision ON qa_results(decision_id); CREATE INDEX IF NOT EXISTS idx_qa_results_case ON qa_results(case_id); -- טבלת decision_definitions (אם לא קיימת) CREATE TABLE IF NOT EXISTS decision_definitions ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), decision_id UUID REFERENCES decisions(id) ON DELETE CASCADE, term TEXT NOT NULL, definition TEXT NOT NULL, block_id TEXT DEFAULT 'block-he', created_at TIMESTAMPTZ DEFAULT now() ); CREATE INDEX IF NOT EXISTS idx_definitions_decision ON decision_definitions(decision_id); -- טבלת appeal_type_rules (אם לא קיימת) CREATE TABLE IF NOT EXISTS appeal_type_rules ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), appeal_type TEXT NOT NULL, rule_category TEXT NOT NULL, rule_key TEXT NOT NULL, rule_value JSONB NOT NULL, description TEXT DEFAULT '', created_at TIMESTAMPTZ DEFAULT now(), UNIQUE(appeal_type, rule_category, rule_key) ); -- image_placeholders על decision_blocks ALTER TABLE decision_blocks ADD COLUMN IF NOT EXISTS image_placeholders JSONB DEFAULT '[]'; """ # ── Phase 2: Decision + Knowledge + RAG layers ──────────────────── SCHEMA_V2_SQL = """ -- ═══════════════════════════════════════════════════════════════════ -- Layer 2: Decision -- ═══════════════════════════════════════════════════════════════════ -- decisions: מטאדטה של החלטה (גרסה אחת = רשומה אחת) CREATE TABLE IF NOT EXISTS decisions ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), case_id UUID REFERENCES cases(id) ON DELETE CASCADE, version INTEGER DEFAULT 1, status TEXT DEFAULT 'draft', -- draft/review/final/published outcome TEXT DEFAULT '', -- rejected/accepted/partial outcome_summary TEXT DEFAULT '', -- תמצית תוצאה (שורה אחת) total_paragraphs INTEGER DEFAULT 0, total_words INTEGER DEFAULT 0, decision_date DATE, author TEXT DEFAULT 'דפנה תמיר', panel_members JSONB DEFAULT '[]', created_at TIMESTAMPTZ DEFAULT now(), updated_at TIMESTAMPTZ DEFAULT now(), UNIQUE(case_id, version) ); -- decision_blocks: 12 בלוקים לפי block-schema.md CREATE TABLE IF NOT EXISTS decision_blocks ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), decision_id UUID REFERENCES decisions(id) ON DELETE CASCADE, block_id TEXT NOT NULL, -- block-alef, block-bet, ... block-yod-bet block_index INTEGER NOT NULL, -- 1-12 title TEXT DEFAULT '', -- כותרת הבלוק (ריק לבלוקים ללא כותרת) content TEXT DEFAULT '', -- תוכן מלא (markdown) word_count INTEGER DEFAULT 0, weight_percent NUMERIC(5,2) DEFAULT 0, -- משקל בפועל (%) generation_type TEXT DEFAULT '', -- template-fill/reproduction/paraphrase/... model_used TEXT DEFAULT '', -- sonnet/opus/script temperature NUMERIC(3,2) DEFAULT 0, status TEXT DEFAULT 'empty', -- empty/draft/review/final notes TEXT DEFAULT '', created_at TIMESTAMPTZ DEFAULT now(), updated_at TIMESTAMPTZ DEFAULT now(), UNIQUE(decision_id, block_id) ); -- decision_paragraphs: סעיפים בודדים עם מעקב ציטוטים CREATE TABLE IF NOT EXISTS decision_paragraphs ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), block_id UUID REFERENCES decision_blocks(id) ON DELETE CASCADE, paragraph_number INTEGER NOT NULL, -- מספור רציף בתוך ההחלטה content TEXT NOT NULL, word_count INTEGER DEFAULT 0, citations JSONB DEFAULT '[]', -- [{case_law_id, text, type}] cross_references JSONB DEFAULT '[]', -- הפניות לסעיפים אחרים ["סעיף 5 לעיל"] created_at TIMESTAMPTZ DEFAULT now() ); -- claims: טענות צדדים (בלוק ז) CREATE TABLE IF NOT EXISTS claims ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), case_id UUID REFERENCES cases(id) ON DELETE CASCADE, party_role TEXT NOT NULL, -- appellant/respondent/permit_applicant/committee party_name TEXT DEFAULT '', claim_text TEXT NOT NULL, claim_index INTEGER DEFAULT 0, -- סדר הופעה source_document TEXT DEFAULT '', -- מאיזה מסמך חולצה הטענה addressed_in_paragraph INTEGER, -- באיזה סעיף בדיון נענתה created_at TIMESTAMPTZ DEFAULT now() ); -- ═══════════════════════════════════════════════════════════════════ -- Layer 3: Legal Knowledge -- ═══════════════════════════════════════════════════════════════════ -- case_law: פסיקה (תקדימים) CREATE TABLE IF NOT EXISTS case_law ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), case_number TEXT UNIQUE NOT NULL, -- עע"מ 3975/22 או ערר 1011-03-25 case_name TEXT NOT NULL, -- שם קצר: "ב. קרן-נכסים" court TEXT DEFAULT '', -- בג"ץ / עליון / מנהלי / ועדת ערר date DATE, subject_tags JSONB DEFAULT '[]', -- ["proprietary_claims", "parking"] summary TEXT DEFAULT '', -- תמצית 2-3 משפטים key_quote TEXT DEFAULT '', -- ציטוט מרכזי full_text TEXT DEFAULT '', -- טקסט מלא אם זמין source_url TEXT DEFAULT '', created_at TIMESTAMPTZ DEFAULT now() ); -- case_law_citations: קשרים בין פסיקה להחלטות שלנו CREATE TABLE IF NOT EXISTS case_law_citations ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), case_law_id UUID REFERENCES case_law(id) ON DELETE CASCADE, decision_id UUID REFERENCES decisions(id) ON DELETE CASCADE, paragraph_id UUID REFERENCES decision_paragraphs(id) ON DELETE SET NULL, citation_type TEXT DEFAULT 'support', -- support/distinguish/overrule/obiter context_text TEXT DEFAULT '', -- ההקשר שבו צוטט created_at TIMESTAMPTZ DEFAULT now() ); -- statutory_provisions: חקיקה נפוצה CREATE TABLE IF NOT EXISTS statutory_provisions ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), statute_name TEXT NOT NULL, -- "חוק התכנון והבנייה" section_number TEXT NOT NULL, -- "152(א)(2)" section_title TEXT DEFAULT '', -- "זכות ערר" full_text TEXT DEFAULT '', -- נוסח הסעיף common_usage TEXT DEFAULT '', -- מתי משתמשים subject_tags JSONB DEFAULT '[]', created_at TIMESTAMPTZ DEFAULT now(), UNIQUE(statute_name, section_number) ); -- transition_phrases: ביטויי מעבר של דפנה CREATE TABLE IF NOT EXISTS transition_phrases ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), phrase TEXT UNIQUE NOT NULL, -- "ועל מנת לא לצאת בחסר" usage_context TEXT DEFAULT '', -- מתי להשתמש block_types JSONB DEFAULT '[]', -- באילו בלוקים: ["block-yod"] frequency INTEGER DEFAULT 1, -- כמה פעמים ראינו source_decision TEXT DEFAULT '', -- מאיזו החלטה created_at TIMESTAMPTZ DEFAULT now() ); -- lessons_learned: לקחים מהשוואת טיוטות לגרסאות סופיות CREATE TABLE IF NOT EXISTS lessons_learned ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), lesson_title TEXT NOT NULL, -- "Discussion = continuous essay, no sub-headers" lesson_text TEXT NOT NULL, -- תיאור מלא category TEXT DEFAULT '', -- structure/style/content/process applies_to JSONB DEFAULT '[]', -- ["block-yod", "all"] source_case TEXT DEFAULT '', -- "הכט 1180-1181" severity TEXT DEFAULT 'important', -- critical/important/nice-to-have created_at TIMESTAMPTZ DEFAULT now() ); -- ═══════════════════════════════════════════════════════════════════ -- Layer 4: Extended RAG -- ═══════════════════════════════════════════════════════════════════ -- paragraph_embeddings: embeddings של סעיפים בהחלטות CREATE TABLE IF NOT EXISTS paragraph_embeddings ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), paragraph_id UUID REFERENCES decision_paragraphs(id) ON DELETE CASCADE, embedding vector(1024), created_at TIMESTAMPTZ DEFAULT now() ); -- case_law_embeddings: embeddings של פסיקה CREATE TABLE IF NOT EXISTS case_law_embeddings ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), case_law_id UUID REFERENCES case_law(id) ON DELETE CASCADE, chunk_text TEXT NOT NULL, embedding vector(1024), created_at TIMESTAMPTZ DEFAULT now() ); -- ═══════════════════════════════════════════════════════════════════ -- Chair Feedback (הערות דפנה על טיוטות) -- ═══════════════════════════════════════════════════════════════════ CREATE TABLE IF NOT EXISTS chair_feedback ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), case_id UUID REFERENCES cases(id) ON DELETE SET NULL, block_id TEXT DEFAULT '', -- block-yod, block-vav, etc. feedback_text TEXT NOT NULL, -- ההערה של דפנה category TEXT DEFAULT 'other', -- missing_content/wrong_tone/wrong_structure/factual_error/style/other lesson_extracted TEXT DEFAULT '', -- הלקח שהופק applied_to TEXT[] DEFAULT '{}', -- לאילו קבצים/כללים הלקח יושם resolved BOOLEAN DEFAULT FALSE, -- האם הלקח יושם created_at TIMESTAMPTZ DEFAULT now() ); CREATE TABLE IF NOT EXISTS tag_company_mappings ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), tag TEXT NOT NULL, -- appeal_subtype value (e.g. building_permit) tag_label TEXT NOT NULL DEFAULT '', -- Hebrew display label company_id TEXT NOT NULL, -- Paperclip company UUID company_name TEXT NOT NULL DEFAULT '', -- cached company name for display created_at TIMESTAMPTZ DEFAULT now(), UNIQUE(tag, company_id) ); -- ═══════════════════════════════════════════════════════════════════ -- Indexes -- ═══════════════════════════════════════════════════════════════════ CREATE INDEX IF NOT EXISTS idx_decisions_case ON decisions(case_id); CREATE INDEX IF NOT EXISTS idx_decisions_status ON decisions(status); CREATE INDEX IF NOT EXISTS idx_decision_blocks_decision ON decision_blocks(decision_id); CREATE INDEX IF NOT EXISTS idx_decision_blocks_block_id ON decision_blocks(block_id); CREATE INDEX IF NOT EXISTS idx_decision_paragraphs_block ON decision_paragraphs(block_id); CREATE INDEX IF NOT EXISTS idx_claims_case ON claims(case_id); CREATE INDEX IF NOT EXISTS idx_claims_role ON claims(party_role); CREATE INDEX IF NOT EXISTS idx_case_law_subject ON case_law USING gin(subject_tags); CREATE INDEX IF NOT EXISTS idx_case_law_citations_decision ON case_law_citations(decision_id); CREATE INDEX IF NOT EXISTS idx_statutory_provisions_statute ON statutory_provisions(statute_name); CREATE INDEX IF NOT EXISTS idx_transition_phrases_block ON transition_phrases USING gin(block_types); CREATE INDEX IF NOT EXISTS idx_lessons_category ON lessons_learned(category); CREATE INDEX IF NOT EXISTS idx_paragraph_embeddings_vec ON paragraph_embeddings USING ivfflat (embedding vector_cosine_ops) WITH (lists = 50); CREATE INDEX IF NOT EXISTS idx_case_law_embeddings_vec ON case_law_embeddings USING ivfflat (embedding vector_cosine_ops) WITH (lists = 50); """ # ── Phase 4: Methodology alignment ────────────────────────────── SCHEMA_V4_SQL = """ -- ═══════════════════════════════════════════════════════════════════ -- V4: Methodology alignment (decision-methodology.md) -- ═══════════════════════════════════════════════════════════════════ -- claims: טיפול בטענות (bundle/skip) + סוג טענה ALTER TABLE claims ADD COLUMN IF NOT EXISTS claim_type TEXT DEFAULT 'claim'; -- claim / response / reply ALTER TABLE claims ADD COLUMN IF NOT EXISTS claim_handling TEXT DEFAULT 'address'; -- address (דיון מלא) / bundle (קיבוץ) / skip (דילוג) ALTER TABLE claims ADD COLUMN IF NOT EXISTS bundle_group TEXT DEFAULT ''; -- שם הקבוצה לקיבוץ (למשל "פגמים פרוצדורליים") ALTER TABLE claims ADD COLUMN IF NOT EXISTS handling_reason TEXT DEFAULT ''; -- נימוק לדילוג/קיבוץ (למשל "נבחנה ולא מצאנו ממש") -- cases: תקן ביקורת + קטגוריות נושא ALTER TABLE cases ADD COLUMN IF NOT EXISTS standard_of_review TEXT DEFAULT ''; -- "שיקול דעת תכנוני עצמאי" / "בחינת שומה מכרעת" / ... ALTER TABLE cases ADD COLUMN IF NOT EXISTS subject_categories JSONB DEFAULT '[]'; -- ["חניה", "קווי בניין", "גובה", "שימוש חורג", ...] -- case_law: רמת תקדים + מעמד ALTER TABLE case_law ADD COLUMN IF NOT EXISTS precedent_level TEXT DEFAULT ''; -- עליון / מנהלי / ועדת ערר ארצית / ועדת ערר מחוזית ALTER TABLE case_law ADD COLUMN IF NOT EXISTS is_binding BOOLEAN DEFAULT TRUE; -- הלכה מחייבת (true) / אמרת אגב (false) ALTER TABLE case_law ADD COLUMN IF NOT EXISTS creac_role TEXT DEFAULT ''; -- rule (הנחה עליונה) / explanation (הרחבה) / analogy (אנלוגיה) -- decisions: סדר סוגיות + תקן ביקורת ALTER TABLE decisions ADD COLUMN IF NOT EXISTS issue_order JSONB DEFAULT '[]'; -- סדר הסוגיות שנקבע ע"י המנצח: [{"title": "...", "type": "threshold/dispositive/secondary"}] ALTER TABLE decisions ADD COLUMN IF NOT EXISTS claim_handling JSONB DEFAULT '{}'; -- {"overrides": [{"claim_id": "...", "handling": "bundle", "group": "..."}]} -- indexes CREATE INDEX IF NOT EXISTS idx_claims_handling ON claims(claim_handling); CREATE INDEX IF NOT EXISTS idx_claims_type ON claims(claim_type); CREATE INDEX IF NOT EXISTS idx_case_law_level ON case_law(precedent_level); """ async def init_schema() -> None: pool = await get_pool() async with pool.acquire() as conn: await conn.execute(SCHEMA_SQL) await conn.execute(MIGRATIONS_SQL) await conn.execute(SCHEMA_V2_SQL) await conn.execute(SCHEMA_V3_SQL) await conn.execute(SCHEMA_V4_SQL) logger.info("Database schema initialized (v1 + v2 + v3 + v4)") # ── Case CRUD ─────────────────────────────────────────────────────── async def create_case( case_number: str, title: str, appellants: list[str] | None = None, respondents: list[str] | None = None, subject: str = "", property_address: str = "", permit_number: str = "", committee_type: str = "ועדה מקומית", hearing_date: date | None = None, notes: str = "", expected_outcome: str = "", practice_area: str = "appeals_committee", appeal_subtype: str = "", ) -> dict: pool = await get_pool() case_id = uuid4() async with pool.acquire() as conn: await conn.execute( """INSERT INTO cases (id, case_number, title, appellants, respondents, subject, property_address, permit_number, committee_type, hearing_date, notes, expected_outcome, practice_area, appeal_subtype) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)""", case_id, case_number, title, json.dumps(appellants or []), json.dumps(respondents or []), subject, property_address, permit_number, committee_type, hearing_date, notes, expected_outcome, practice_area, appeal_subtype, ) return await get_case(case_id) async def get_case(case_id: UUID) -> dict | None: pool = await get_pool() async with pool.acquire() as conn: row = await conn.fetchrow("SELECT * FROM cases WHERE id = $1", case_id) if row is None: return None return _row_to_case(row) async def set_active_draft_path(case_id: UUID, path: str | None) -> None: """Update the case's active_draft_path (the DOCX that is source of truth).""" pool = await get_pool() async with pool.acquire() as conn: await conn.execute( "UPDATE cases SET active_draft_path = $1, updated_at = now() WHERE id = $2", path, case_id, ) async def get_active_draft_path(case_id: UUID) -> str | None: pool = await get_pool() async with pool.acquire() as conn: row = await conn.fetchrow( "SELECT active_draft_path FROM cases WHERE id = $1", case_id, ) return row["active_draft_path"] if row else None async def get_case_by_number(case_number: str) -> dict | None: pool = await get_pool() async with pool.acquire() as conn: row = await conn.fetchrow( "SELECT * FROM cases WHERE case_number = $1", case_number ) if row is None: return None return _row_to_case(row) async def list_cases(status: str | None = None, limit: int = 50) -> list[dict]: pool = await get_pool() async with pool.acquire() as conn: if status: rows = await conn.fetch( "SELECT * FROM cases WHERE status = $1 ORDER BY updated_at DESC LIMIT $2", status, limit, ) else: rows = await conn.fetch( "SELECT * FROM cases ORDER BY updated_at DESC LIMIT $1", limit ) return [_row_to_case(r) for r in rows] async def update_case(case_id: UUID, **fields) -> dict | None: if not fields: return await get_case(case_id) pool = await get_pool() set_clauses = [] values = [] for i, (key, val) in enumerate(fields.items(), start=2): if key in ("appellants", "respondents", "tags"): val = json.dumps(val) set_clauses.append(f"{key} = ${i}") values.append(val) set_clauses.append("updated_at = now()") sql = f"UPDATE cases SET {', '.join(set_clauses)} WHERE id = $1" async with pool.acquire() as conn: await conn.execute(sql, case_id, *values) return await get_case(case_id) def _row_to_case(row: asyncpg.Record) -> dict: d = dict(row) for field in ("appellants", "respondents", "tags"): if isinstance(d.get(field), str): d[field] = json.loads(d[field]) d["id"] = str(d["id"]) return d # ── Document CRUD ─────────────────────────────────────────────────── async def create_document( case_id: UUID, doc_type: str, title: str, file_path: str, page_count: int | None = None, ) -> dict: pool = await get_pool() doc_id = uuid4() async with pool.acquire() as conn: await conn.execute( """INSERT INTO documents (id, case_id, doc_type, title, file_path, page_count) VALUES ($1, $2, $3, $4, $5, $6)""", doc_id, case_id, doc_type, title, file_path, page_count, ) row = await conn.fetchrow("SELECT * FROM documents WHERE id = $1", doc_id) return _row_to_doc(row) async def update_document(doc_id: UUID, **fields) -> None: if not fields: return pool = await get_pool() set_clauses = [] values = [] for i, (key, val) in enumerate(fields.items(), start=2): if key == "metadata": val = json.dumps(val) set_clauses.append(f"{key} = ${i}") values.append(val) sql = f"UPDATE documents SET {', '.join(set_clauses)} WHERE id = $1" async with pool.acquire() as conn: await conn.execute(sql, doc_id, *values) async def get_document(doc_id: UUID) -> dict | None: pool = await get_pool() async with pool.acquire() as conn: row = await conn.fetchrow("SELECT * FROM documents WHERE id = $1", doc_id) return _row_to_doc(row) if row else None async def list_documents(case_id: UUID) -> list[dict]: pool = await get_pool() async with pool.acquire() as conn: rows = await conn.fetch( "SELECT * FROM documents WHERE case_id = $1 ORDER BY created_at", case_id ) return [_row_to_doc(r) for r in rows] async def get_document_text(doc_id: UUID) -> str: pool = await get_pool() async with pool.acquire() as conn: row = await conn.fetchrow( "SELECT extracted_text FROM documents WHERE id = $1", doc_id ) return row["extracted_text"] if row else "" def _row_to_doc(row: asyncpg.Record) -> dict: d = dict(row) d["id"] = str(d["id"]) d["case_id"] = str(d["case_id"]) if isinstance(d.get("metadata"), str): d["metadata"] = json.loads(d["metadata"]) return d # ── Claims ───────────────────────────────────────────────────────── async def store_claims(case_id: UUID, claims: list[dict], source_document: str = "") -> int: """Store extracted claims. Replaces existing claims from same source. Each claim dict: party_role, claim_text, claim_index, party_name (optional) """ pool = await get_pool() async with pool.acquire() as conn: if source_document: await conn.execute( "DELETE FROM claims WHERE case_id = $1 AND source_document = $2", case_id, source_document, ) for claim in claims: await conn.execute( """INSERT INTO claims (case_id, party_role, party_name, claim_text, claim_index, source_document, claim_type) VALUES ($1, $2, $3, $4, $5, $6, $7)""", case_id, claim["party_role"], claim.get("party_name", ""), claim["claim_text"], claim.get("claim_index", 0), source_document, claim.get("claim_type", "claim"), ) return len(claims) async def get_claims(case_id: UUID, party_role: str | None = None) -> list[dict]: """Get claims for a case, optionally filtered by party role.""" pool = await get_pool() async with pool.acquire() as conn: if party_role: rows = await conn.fetch( "SELECT * FROM claims WHERE case_id = $1 AND party_role = $2 ORDER BY claim_index", case_id, party_role, ) else: rows = await conn.fetch( "SELECT * FROM claims WHERE case_id = $1 ORDER BY party_role, claim_index", case_id, ) return [dict(r) for r in rows] # ── Decisions ────────────────────────────────────────────────────── async def create_decision( case_id: UUID, outcome: str = "", outcome_summary: str = "", outcome_reasoning: str = "", direction_doc: dict | None = None, ) -> dict: """Create a decision record for a case.""" pool = await get_pool() decision_id = uuid4() async with pool.acquire() as conn: # Check if a decision already exists for this case existing = await conn.fetchrow( "SELECT id, version FROM decisions WHERE case_id = $1 ORDER BY version DESC LIMIT 1", case_id, ) version = (existing["version"] + 1) if existing else 1 await conn.execute( """INSERT INTO decisions (id, case_id, version, outcome, outcome_summary, outcome_reasoning, direction_doc) VALUES ($1, $2, $3, $4, $5, $6, $7)""", decision_id, case_id, version, outcome, outcome_summary, outcome_reasoning, json.dumps(direction_doc) if direction_doc else None, ) return await get_decision(decision_id) async def get_decision(decision_id: UUID) -> dict | None: pool = await get_pool() async with pool.acquire() as conn: row = await conn.fetchrow("SELECT * FROM decisions WHERE id = $1", decision_id) if not row: return None d = dict(row) d["id"] = str(d["id"]) d["case_id"] = str(d["case_id"]) if isinstance(d.get("direction_doc"), str): d["direction_doc"] = json.loads(d["direction_doc"]) if isinstance(d.get("panel_members"), str): d["panel_members"] = json.loads(d["panel_members"]) return d async def get_decision_by_case(case_id: UUID) -> dict | None: """Get the latest decision for a case.""" pool = await get_pool() async with pool.acquire() as conn: row = await conn.fetchrow( "SELECT * FROM decisions WHERE case_id = $1 ORDER BY version DESC LIMIT 1", case_id, ) if not row: return None d = dict(row) d["id"] = str(d["id"]) d["case_id"] = str(d["case_id"]) if isinstance(d.get("direction_doc"), str): d["direction_doc"] = json.loads(d["direction_doc"]) if isinstance(d.get("panel_members"), str): d["panel_members"] = json.loads(d["panel_members"]) return d async def update_decision(decision_id: UUID, **fields) -> None: if not fields: return pool = await get_pool() set_clauses = [] values = [] for i, (key, val) in enumerate(fields.items(), start=2): if key in ("direction_doc", "panel_members") and isinstance(val, (dict, list)): val = json.dumps(val) set_clauses.append(f"{key} = ${i}") values.append(val) set_clauses.append("updated_at = now()") sql = f"UPDATE decisions SET {', '.join(set_clauses)} WHERE id = $1" async with pool.acquire() as conn: await conn.execute(sql, decision_id, *values) # ── Document deletion ────────────────────────────────────────────── async def delete_document(doc_id: UUID) -> bool: """Delete a document and all its chunks. Returns True if deleted.""" pool = await get_pool() async with pool.acquire() as conn: async with conn.transaction(): await conn.execute( "DELETE FROM document_chunks WHERE document_id = $1", doc_id ) result = await conn.execute( "DELETE FROM documents WHERE id = $1", doc_id ) return int(result.split()[-1]) > 0 # ── Chunks & Vectors ─────────────────────────────────────────────── async def delete_document_chunks(document_id: UUID) -> int: """Delete all chunks for a document (used before reprocessing).""" pool = await get_pool() async with pool.acquire() as conn: result = await conn.execute( "DELETE FROM document_chunks WHERE document_id = $1", document_id ) return int(result.split()[-1]) # e.g. "DELETE 5" -> 5 async def store_chunks( document_id: UUID, case_id: UUID | None, chunks: list[dict], ) -> int: """Store document chunks with embeddings. Each chunk dict has: content, section_type, embedding (list[float]), page_number, chunk_index """ pool = await get_pool() async with pool.acquire() as conn: # Delete existing chunks for this document await conn.execute( "DELETE FROM document_chunks WHERE document_id = $1", document_id ) for chunk in chunks: await conn.execute( """INSERT INTO document_chunks (document_id, case_id, chunk_index, content, section_type, embedding, page_number) VALUES ($1, $2, $3, $4, $5, $6, $7)""", document_id, case_id, chunk["chunk_index"], chunk["content"], chunk.get("section_type", "other"), chunk["embedding"], chunk.get("page_number"), ) return len(chunks) async def search_similar( query_embedding: list[float], limit: int = 10, case_id: UUID | None = None, section_type: str | None = None, practice_area: str | None = None, appeal_subtype: str | None = None, ) -> list[dict]: """Cosine similarity search on document chunks.""" pool = await get_pool() conditions = [] params: list = [query_embedding, limit] param_idx = 3 if case_id: conditions.append(f"dc.case_id = ${param_idx}") params.append(case_id) param_idx += 1 if section_type: conditions.append(f"dc.section_type = ${param_idx}") params.append(section_type) param_idx += 1 if practice_area: conditions.append(f"c.practice_area = ${param_idx}") params.append(practice_area) param_idx += 1 if appeal_subtype: conditions.append(f"c.appeal_subtype = ${param_idx}") params.append(appeal_subtype) param_idx += 1 where = f"WHERE {' AND '.join(conditions)}" if conditions else "" sql = f""" SELECT dc.content, dc.section_type, dc.page_number, dc.document_id, dc.case_id, d.title AS document_title, c.case_number, 1 - (dc.embedding <=> $1) AS score FROM document_chunks dc JOIN documents d ON d.id = dc.document_id JOIN cases c ON c.id = dc.case_id {where} ORDER BY dc.embedding <=> $1 LIMIT $2 """ async with pool.acquire() as conn: rows = await conn.fetch(sql, *params) return [dict(r) for r in rows] # ── Style corpus ──────────────────────────────────────────────────── async def add_to_style_corpus( document_id: UUID | None, decision_number: str, decision_date: date | None, subject_categories: list[str], full_text: str, summary: str = "", outcome: str = "", key_principles: list[str] | None = None, practice_area: str = "appeals_committee", appeal_subtype: str = "", ) -> UUID: pool = await get_pool() corpus_id = uuid4() async with pool.acquire() as conn: await conn.execute( """INSERT INTO style_corpus (id, document_id, decision_number, decision_date, subject_categories, full_text, summary, outcome, key_principles, practice_area, appeal_subtype) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)""", corpus_id, document_id, decision_number, decision_date, json.dumps(subject_categories), full_text, summary, outcome, json.dumps(key_principles or []), practice_area, appeal_subtype, ) return corpus_id async def delete_from_style_corpus(corpus_id: UUID) -> dict: """Remove a decision from style_corpus + related documents (cascades chunks). Also tries to delete the [קורפוס] document associated by title match, since the current training pipeline inserts style_corpus with document_id=NULL. """ pool = await get_pool() async with pool.acquire() as conn: async with conn.transaction(): row = await conn.fetchrow( "DELETE FROM style_corpus WHERE id = $1 " "RETURNING decision_number, document_id", corpus_id, ) if not row: return {"deleted": False, "reason": "not found"} docs_deleted = 0 if row["document_id"]: await conn.execute( "DELETE FROM documents WHERE id = $1", row["document_id"] ) docs_deleted = 1 else: # Best-effort: match a [קורפוס] document by the decision_number # in its title. Only for single, unambiguous matches. if row["decision_number"]: docs = await conn.fetch( "SELECT id FROM documents " "WHERE case_id IS NULL AND title LIKE $1", f"%{row['decision_number']}%", ) if len(docs) == 1: await conn.execute( "DELETE FROM documents WHERE id = $1", docs[0]["id"] ) docs_deleted = 1 return { "deleted": True, "decision_number": row["decision_number"], "docs_deleted": docs_deleted, } async def get_style_patterns(pattern_type: str | None = None) -> list[dict]: pool = await get_pool() async with pool.acquire() as conn: if pattern_type: rows = await conn.fetch( "SELECT * FROM style_patterns WHERE pattern_type = $1 ORDER BY frequency DESC", pattern_type, ) else: rows = await conn.fetch( "SELECT * FROM style_patterns ORDER BY pattern_type, frequency DESC" ) return [dict(r) for r in rows] async def upsert_style_pattern( pattern_type: str, pattern_text: str, context: str = "", examples: list[str] | None = None, appeal_subtype: str = "", ) -> None: pool = await get_pool() async with pool.acquire() as conn: existing = await conn.fetchrow( "SELECT id, frequency FROM style_patterns " "WHERE pattern_type = $1 AND pattern_text = $2 AND appeal_subtype = $3", pattern_type, pattern_text, appeal_subtype, ) if existing: await conn.execute( "UPDATE style_patterns SET frequency = frequency + 1 WHERE id = $1", existing["id"], ) else: await conn.execute( """INSERT INTO style_patterns (pattern_type, pattern_text, context, examples, appeal_subtype) VALUES ($1, $2, $3, $4, $5)""", pattern_type, pattern_text, context, json.dumps(examples or []), appeal_subtype, ) async def clear_style_patterns(appeal_subtype: str = "") -> None: """Delete style patterns, optionally filtered by appeal_subtype. Empty appeal_subtype = delete ALL patterns. """ pool = await get_pool() async with pool.acquire() as conn: if appeal_subtype: await conn.execute( "DELETE FROM style_patterns WHERE appeal_subtype = $1", appeal_subtype ) else: await conn.execute("DELETE FROM style_patterns") # ── Semantic Search (V2 — decision blocks & case law) ───────────── async def search_similar_paragraphs( query_embedding: list[float], limit: int = 10, block_type: str | None = None, ) -> list[dict]: """Search decision paragraphs by semantic similarity.""" pool = await get_pool() conditions = [] params: list = [query_embedding, limit] param_idx = 3 if block_type: conditions.append(f"db.block_id = ${param_idx}") params.append(block_type) param_idx += 1 where = f"WHERE {' AND '.join(conditions)}" if conditions else "" sql = f""" SELECT dp.content, dp.word_count, dp.paragraph_number, db.block_id AS block_type, db.title AS block_title, c.case_number, c.title AS case_title, d.outcome, d.author, 1 - (pe.embedding <=> $1) AS score FROM paragraph_embeddings pe JOIN decision_paragraphs dp ON dp.id = pe.paragraph_id JOIN decision_blocks db ON db.id = dp.block_id JOIN decisions d ON d.id = db.decision_id JOIN cases c ON c.id = d.case_id {where} ORDER BY pe.embedding <=> $1 LIMIT $2 """ async with pool.acquire() as conn: rows = await conn.fetch(sql, *params) return [dict(r) for r in rows] async def search_similar_case_law( query_embedding: list[float], limit: int = 5, ) -> list[dict]: """Search case law by semantic similarity.""" pool = await get_pool() sql = """ SELECT cl.case_number, cl.case_name, cl.court, cl.summary, cl.key_quote, cl.subject_tags, cle.chunk_text, 1 - (cle.embedding <=> $1) AS score FROM case_law_embeddings cle JOIN case_law cl ON cl.id = cle.case_law_id ORDER BY cle.embedding <=> $1 LIMIT $2 """ async with pool.acquire() as conn: rows = await conn.fetch(sql, query_embedding, limit) results = [] for r in rows: d = dict(r) if isinstance(d.get("subject_tags"), str): d["subject_tags"] = json.loads(d["subject_tags"]) results.append(d) return results async def search_precedents( query_embedding: list[float], limit: int = 10, ) -> list[dict]: """Combined search: paragraphs + case law, ranked by score.""" paragraphs = await search_similar_paragraphs(query_embedding, limit=limit) case_law = await search_similar_case_law(query_embedding, limit=limit) # Combine and sort by score results = [] for p in paragraphs: results.append({ "type": "decision_paragraph", "score": float(p["score"]), "case_number": p["case_number"], "case_title": p["case_title"], "block_type": p["block_type"], "content": p["content"][:500], "author": p["author"], }) for c in case_law: results.append({ "type": "case_law", "score": float(c["score"]), "case_number": c["case_number"], "case_name": c["case_name"], "court": c["court"], "content": c["summary"], }) results.sort(key=lambda x: x["score"], reverse=True) return results[:limit] # ── Case precedents (CRUD) ──────────────────────────────────────── async def create_case_precedent( case_id: UUID, quote: str, citation: str, section_id: str | None = None, chair_note: str = "", pdf_document_id: UUID | None = None, practice_area: str | None = None, ) -> dict: """Insert a new precedent attached to a case.""" pool = await get_pool() row = await pool.fetchrow( """ INSERT INTO case_precedents (case_id, section_id, quote, citation, chair_note, pdf_document_id, practice_area) VALUES ($1, $2, $3, $4, $5, $6, $7) RETURNING * """, case_id, section_id, quote, citation, chair_note, pdf_document_id, practice_area, ) return dict(row) async def list_case_precedents(case_id: UUID) -> list[dict]: """List all precedents attached to a case, ordered by section then creation time.""" pool = await get_pool() rows = await pool.fetch( """ SELECT id, case_id, section_id, quote, citation, chair_note, pdf_document_id, practice_area, created_at, updated_at FROM case_precedents WHERE case_id = $1 ORDER BY section_id NULLS LAST, created_at """, case_id, ) return [dict(r) for r in rows] async def delete_case_precedent(precedent_id: UUID) -> bool: """Delete a precedent attachment by ID. Returns True if deleted.""" pool = await get_pool() result = await pool.execute( "DELETE FROM case_precedents WHERE id = $1", precedent_id ) return result == "DELETE 1" async def search_precedent_library( query: str, practice_area: str = "", limit: int = 10, ) -> list[dict]: """Search all precedents across cases by citation or quote text.""" pool = await get_pool() pattern = f"%{query}%" if practice_area: rows = await pool.fetch( """ SELECT id, case_id, section_id, quote, citation, chair_note, practice_area, created_at FROM case_precedents WHERE (citation ILIKE $1 OR quote ILIKE $1) AND practice_area = $2 ORDER BY created_at DESC LIMIT $3 """, pattern, practice_area, limit, ) else: rows = await pool.fetch( """ SELECT id, case_id, section_id, quote, citation, chair_note, practice_area, created_at FROM case_precedents WHERE citation ILIKE $1 OR quote ILIKE $1 ORDER BY created_at DESC LIMIT $2 """, pattern, limit, ) return [dict(r) for r in rows] # ── Chair feedback ──────────────────────────────────────────────── async def record_chair_feedback( case_id: UUID | None, block_id: str, feedback_text: str, category: str = "other", lesson_extracted: str = "", ) -> UUID: """Record feedback from the chair (Dafna) on a draft block.""" pool = await get_pool() feedback_id = uuid4() async with pool.acquire() as conn: await conn.execute( """INSERT INTO chair_feedback (id, case_id, block_id, feedback_text, category, lesson_extracted) VALUES ($1, $2, $3, $4, $5, $6)""", feedback_id, case_id, block_id, feedback_text, category, lesson_extracted, ) return feedback_id async def list_chair_feedback( case_id: UUID | None = None, category: str | None = None, unresolved_only: bool = False, ) -> list[dict]: """List chair feedback, optionally filtered.""" pool = await get_pool() conditions = [] params: list = [] idx = 1 if case_id: conditions.append(f"case_id = ${idx}") params.append(case_id) idx += 1 if category: conditions.append(f"category = ${idx}") params.append(category) idx += 1 if unresolved_only: conditions.append("resolved = FALSE") where = f"WHERE {' AND '.join(conditions)}" if conditions else "" async with pool.acquire() as conn: rows = await conn.fetch( f"SELECT * FROM chair_feedback {where} ORDER BY created_at DESC", *params, ) return [dict(r) for r in rows] async def resolve_chair_feedback( feedback_id: UUID, applied_to: list[str], ) -> None: """Mark feedback as resolved and record where it was applied.""" pool = await get_pool() async with pool.acquire() as conn: await conn.execute( """UPDATE chair_feedback SET resolved = TRUE, applied_to = $2 WHERE id = $1""", feedback_id, applied_to, )