Add full decision writing pipeline: classify, extract, brainstorm, write, QA, export

New services (11 files):
- classifier.py: auto doc-type classification + party identification (Claude Haiku)
- claims_extractor.py: claim extraction from pleadings (Claude Sonnet + regex)
- references_extractor.py: plan/case-law/legislation detection (regex)
- brainstorm.py: direction generation with 2-3 options (Claude Sonnet)
- block_writer.py: 12-block decision writer (template + Claude Sonnet/Opus)
- docx_exporter.py: DOCX export with David font, RTL, headings
- qa_validator.py: 6 QA checks with export blocking on critical failure
- learning_loop.py: draft vs final comparison + lesson extraction
- metrics.py: KPIs dashboard per case and global
- audit.py: action audit log
- cli.py: standalone CLI with 11 commands

Updated pipeline: extract → classify → chunk → embed → store → extract_references
New MCP tools: 29 total (was 16)
New DB tables: audit_log, decisions CRUD, claims CRUD
Config: Infisical support, external service allowlist

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-03 10:21:47 +00:00
parent df7cc4f5a5
commit d9e5ef0f46
21 changed files with 3957 additions and 14 deletions

View File

@@ -131,6 +131,253 @@ CREATE INDEX IF NOT EXISTS idx_cases_number ON cases(case_number);
MIGRATIONS_SQL = """
ALTER TABLE cases ADD COLUMN IF NOT EXISTS expected_outcome TEXT DEFAULT '';
CREATE TABLE IF NOT EXISTS audit_log (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
action TEXT NOT NULL,
case_id UUID REFERENCES cases(id) ON DELETE SET NULL,
document_id UUID REFERENCES documents(id) ON DELETE SET NULL,
details JSONB DEFAULT '{}',
actor TEXT DEFAULT 'system',
created_at TIMESTAMPTZ DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_audit_case ON audit_log(case_id);
CREATE INDEX IF NOT EXISTS idx_audit_action ON audit_log(action);
CREATE INDEX IF NOT EXISTS idx_audit_created ON audit_log(created_at DESC);
"""
# ── Phase 3: Workflow expansion ────────────────────────────────────
SCHEMA_V3_SQL = """
-- הרחבת decisions עם שדות חדשים
ALTER TABLE decisions ADD COLUMN IF NOT EXISTS direction_doc JSONB DEFAULT NULL;
ALTER TABLE decisions ADD COLUMN IF NOT EXISTS outcome_reasoning TEXT DEFAULT '';
-- הרחבת cases עם appeal_type (אם לא קיים)
ALTER TABLE cases ADD COLUMN IF NOT EXISTS appeal_type TEXT DEFAULT '';
-- טבלת qa_results
CREATE TABLE IF NOT EXISTS qa_results (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
decision_id UUID REFERENCES decisions(id) ON DELETE CASCADE,
case_id UUID REFERENCES cases(id) ON DELETE CASCADE,
check_name TEXT NOT NULL,
passed BOOLEAN NOT NULL,
severity TEXT DEFAULT 'warning',
errors JSONB DEFAULT '[]',
details TEXT DEFAULT '',
created_at TIMESTAMPTZ DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_qa_results_decision ON qa_results(decision_id);
CREATE INDEX IF NOT EXISTS idx_qa_results_case ON qa_results(case_id);
-- טבלת decision_definitions (אם לא קיימת)
CREATE TABLE IF NOT EXISTS decision_definitions (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
decision_id UUID REFERENCES decisions(id) ON DELETE CASCADE,
term TEXT NOT NULL,
definition TEXT NOT NULL,
block_id TEXT DEFAULT 'block-he',
created_at TIMESTAMPTZ DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_definitions_decision ON decision_definitions(decision_id);
-- טבלת appeal_type_rules (אם לא קיימת)
CREATE TABLE IF NOT EXISTS appeal_type_rules (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
appeal_type TEXT NOT NULL,
rule_category TEXT NOT NULL,
rule_key TEXT NOT NULL,
rule_value JSONB NOT NULL,
description TEXT DEFAULT '',
created_at TIMESTAMPTZ DEFAULT now(),
UNIQUE(appeal_type, rule_category, rule_key)
);
-- image_placeholders על decision_blocks
ALTER TABLE decision_blocks ADD COLUMN IF NOT EXISTS image_placeholders JSONB DEFAULT '[]';
"""
# ── Phase 2: Decision + Knowledge + RAG layers ────────────────────
SCHEMA_V2_SQL = """
-- ═══════════════════════════════════════════════════════════════════
-- Layer 2: Decision
-- ═══════════════════════════════════════════════════════════════════
-- decisions: מטאדטה של החלטה (גרסה אחת = רשומה אחת)
CREATE TABLE IF NOT EXISTS decisions (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
case_id UUID REFERENCES cases(id) ON DELETE CASCADE,
version INTEGER DEFAULT 1,
status TEXT DEFAULT 'draft', -- draft/review/final/published
outcome TEXT DEFAULT '', -- rejected/accepted/partial
outcome_summary TEXT DEFAULT '', -- תמצית תוצאה (שורה אחת)
total_paragraphs INTEGER DEFAULT 0,
total_words INTEGER DEFAULT 0,
decision_date DATE,
author TEXT DEFAULT 'דפנה תמיר',
panel_members JSONB DEFAULT '[]',
created_at TIMESTAMPTZ DEFAULT now(),
updated_at TIMESTAMPTZ DEFAULT now(),
UNIQUE(case_id, version)
);
-- decision_blocks: 12 בלוקים לפי block-schema.md
CREATE TABLE IF NOT EXISTS decision_blocks (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
decision_id UUID REFERENCES decisions(id) ON DELETE CASCADE,
block_id TEXT NOT NULL, -- block-alef, block-bet, ... block-yod-bet
block_index INTEGER NOT NULL, -- 1-12
title TEXT DEFAULT '', -- כותרת הבלוק (ריק לבלוקים ללא כותרת)
content TEXT DEFAULT '', -- תוכן מלא (markdown)
word_count INTEGER DEFAULT 0,
weight_percent NUMERIC(5,2) DEFAULT 0, -- משקל בפועל (%)
generation_type TEXT DEFAULT '', -- template-fill/reproduction/paraphrase/...
model_used TEXT DEFAULT '', -- sonnet/opus/script
temperature NUMERIC(3,2) DEFAULT 0,
status TEXT DEFAULT 'empty', -- empty/draft/review/final
notes TEXT DEFAULT '',
created_at TIMESTAMPTZ DEFAULT now(),
updated_at TIMESTAMPTZ DEFAULT now(),
UNIQUE(decision_id, block_id)
);
-- decision_paragraphs: סעיפים בודדים עם מעקב ציטוטים
CREATE TABLE IF NOT EXISTS decision_paragraphs (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
block_id UUID REFERENCES decision_blocks(id) ON DELETE CASCADE,
paragraph_number INTEGER NOT NULL, -- מספור רציף בתוך ההחלטה
content TEXT NOT NULL,
word_count INTEGER DEFAULT 0,
citations JSONB DEFAULT '[]', -- [{case_law_id, text, type}]
cross_references JSONB DEFAULT '[]', -- הפניות לסעיפים אחרים ["סעיף 5 לעיל"]
created_at TIMESTAMPTZ DEFAULT now()
);
-- claims: טענות צדדים (בלוק ז)
CREATE TABLE IF NOT EXISTS claims (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
case_id UUID REFERENCES cases(id) ON DELETE CASCADE,
party_role TEXT NOT NULL, -- appellant/respondent/permit_applicant/committee
party_name TEXT DEFAULT '',
claim_text TEXT NOT NULL,
claim_index INTEGER DEFAULT 0, -- סדר הופעה
source_document TEXT DEFAULT '', -- מאיזה מסמך חולצה הטענה
addressed_in_paragraph INTEGER, -- באיזה סעיף בדיון נענתה
created_at TIMESTAMPTZ DEFAULT now()
);
-- ═══════════════════════════════════════════════════════════════════
-- Layer 3: Legal Knowledge
-- ═══════════════════════════════════════════════════════════════════
-- case_law: פסיקה (תקדימים)
CREATE TABLE IF NOT EXISTS case_law (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
case_number TEXT UNIQUE NOT NULL, -- עע"מ 3975/22 או ערר 1011-03-25
case_name TEXT NOT NULL, -- שם קצר: "ב. קרן-נכסים"
court TEXT DEFAULT '', -- בג"ץ / עליון / מנהלי / ועדת ערר
date DATE,
subject_tags JSONB DEFAULT '[]', -- ["proprietary_claims", "parking"]
summary TEXT DEFAULT '', -- תמצית 2-3 משפטים
key_quote TEXT DEFAULT '', -- ציטוט מרכזי
full_text TEXT DEFAULT '', -- טקסט מלא אם זמין
source_url TEXT DEFAULT '',
created_at TIMESTAMPTZ DEFAULT now()
);
-- case_law_citations: קשרים בין פסיקה להחלטות שלנו
CREATE TABLE IF NOT EXISTS case_law_citations (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
case_law_id UUID REFERENCES case_law(id) ON DELETE CASCADE,
decision_id UUID REFERENCES decisions(id) ON DELETE CASCADE,
paragraph_id UUID REFERENCES decision_paragraphs(id) ON DELETE SET NULL,
citation_type TEXT DEFAULT 'support', -- support/distinguish/overrule/obiter
context_text TEXT DEFAULT '', -- ההקשר שבו צוטט
created_at TIMESTAMPTZ DEFAULT now()
);
-- statutory_provisions: חקיקה נפוצה
CREATE TABLE IF NOT EXISTS statutory_provisions (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
statute_name TEXT NOT NULL, -- "חוק התכנון והבנייה"
section_number TEXT NOT NULL, -- "152(א)(2)"
section_title TEXT DEFAULT '', -- "זכות ערר"
full_text TEXT DEFAULT '', -- נוסח הסעיף
common_usage TEXT DEFAULT '', -- מתי משתמשים
subject_tags JSONB DEFAULT '[]',
created_at TIMESTAMPTZ DEFAULT now(),
UNIQUE(statute_name, section_number)
);
-- transition_phrases: ביטויי מעבר של דפנה
CREATE TABLE IF NOT EXISTS transition_phrases (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
phrase TEXT UNIQUE NOT NULL, -- "ועל מנת לא לצאת בחסר"
usage_context TEXT DEFAULT '', -- מתי להשתמש
block_types JSONB DEFAULT '[]', -- באילו בלוקים: ["block-yod"]
frequency INTEGER DEFAULT 1, -- כמה פעמים ראינו
source_decision TEXT DEFAULT '', -- מאיזו החלטה
created_at TIMESTAMPTZ DEFAULT now()
);
-- lessons_learned: לקחים מהשוואת טיוטות לגרסאות סופיות
CREATE TABLE IF NOT EXISTS lessons_learned (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
lesson_title TEXT NOT NULL, -- "Discussion = continuous essay, no sub-headers"
lesson_text TEXT NOT NULL, -- תיאור מלא
category TEXT DEFAULT '', -- structure/style/content/process
applies_to JSONB DEFAULT '[]', -- ["block-yod", "all"]
source_case TEXT DEFAULT '', -- "הכט 1180-1181"
severity TEXT DEFAULT 'important', -- critical/important/nice-to-have
created_at TIMESTAMPTZ DEFAULT now()
);
-- ═══════════════════════════════════════════════════════════════════
-- Layer 4: Extended RAG
-- ═══════════════════════════════════════════════════════════════════
-- paragraph_embeddings: embeddings של סעיפים בהחלטות
CREATE TABLE IF NOT EXISTS paragraph_embeddings (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
paragraph_id UUID REFERENCES decision_paragraphs(id) ON DELETE CASCADE,
embedding vector(1024),
created_at TIMESTAMPTZ DEFAULT now()
);
-- case_law_embeddings: embeddings של פסיקה
CREATE TABLE IF NOT EXISTS case_law_embeddings (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
case_law_id UUID REFERENCES case_law(id) ON DELETE CASCADE,
chunk_text TEXT NOT NULL,
embedding vector(1024),
created_at TIMESTAMPTZ DEFAULT now()
);
-- ═══════════════════════════════════════════════════════════════════
-- Indexes
-- ═══════════════════════════════════════════════════════════════════
CREATE INDEX IF NOT EXISTS idx_decisions_case ON decisions(case_id);
CREATE INDEX IF NOT EXISTS idx_decisions_status ON decisions(status);
CREATE INDEX IF NOT EXISTS idx_decision_blocks_decision ON decision_blocks(decision_id);
CREATE INDEX IF NOT EXISTS idx_decision_blocks_block_id ON decision_blocks(block_id);
CREATE INDEX IF NOT EXISTS idx_decision_paragraphs_block ON decision_paragraphs(block_id);
CREATE INDEX IF NOT EXISTS idx_claims_case ON claims(case_id);
CREATE INDEX IF NOT EXISTS idx_claims_role ON claims(party_role);
CREATE INDEX IF NOT EXISTS idx_case_law_subject ON case_law USING gin(subject_tags);
CREATE INDEX IF NOT EXISTS idx_case_law_citations_decision ON case_law_citations(decision_id);
CREATE INDEX IF NOT EXISTS idx_statutory_provisions_statute ON statutory_provisions(statute_name);
CREATE INDEX IF NOT EXISTS idx_transition_phrases_block ON transition_phrases USING gin(block_types);
CREATE INDEX IF NOT EXISTS idx_lessons_category ON lessons_learned(category);
CREATE INDEX IF NOT EXISTS idx_paragraph_embeddings_vec
ON paragraph_embeddings USING ivfflat (embedding vector_cosine_ops) WITH (lists = 50);
CREATE INDEX IF NOT EXISTS idx_case_law_embeddings_vec
ON case_law_embeddings USING ivfflat (embedding vector_cosine_ops) WITH (lists = 50);
"""
@@ -139,7 +386,9 @@ async def init_schema() -> None:
async with pool.acquire() as conn:
await conn.execute(SCHEMA_SQL)
await conn.execute(MIGRATIONS_SQL)
logger.info("Database schema initialized")
await conn.execute(SCHEMA_V2_SQL)
await conn.execute(SCHEMA_V3_SQL)
logger.info("Database schema initialized (v1 + v2 + v3)")
# ── Case CRUD ───────────────────────────────────────────────────────
@@ -307,6 +556,134 @@ def _row_to_doc(row: asyncpg.Record) -> dict:
return d
# ── Claims ─────────────────────────────────────────────────────────
async def store_claims(case_id: UUID, claims: list[dict], source_document: str = "") -> int:
"""Store extracted claims. Replaces existing claims from same source.
Each claim dict: party_role, claim_text, claim_index, party_name (optional)
"""
pool = await get_pool()
async with pool.acquire() as conn:
if source_document:
await conn.execute(
"DELETE FROM claims WHERE case_id = $1 AND source_document = $2",
case_id, source_document,
)
for claim in claims:
await conn.execute(
"""INSERT INTO claims (case_id, party_role, party_name, claim_text, claim_index, source_document)
VALUES ($1, $2, $3, $4, $5, $6)""",
case_id,
claim["party_role"],
claim.get("party_name", ""),
claim["claim_text"],
claim.get("claim_index", 0),
source_document,
)
return len(claims)
async def get_claims(case_id: UUID, party_role: str | None = None) -> list[dict]:
"""Get claims for a case, optionally filtered by party role."""
pool = await get_pool()
async with pool.acquire() as conn:
if party_role:
rows = await conn.fetch(
"SELECT * FROM claims WHERE case_id = $1 AND party_role = $2 ORDER BY claim_index",
case_id, party_role,
)
else:
rows = await conn.fetch(
"SELECT * FROM claims WHERE case_id = $1 ORDER BY party_role, claim_index",
case_id,
)
return [dict(r) for r in rows]
# ── Decisions ──────────────────────────────────────────────────────
async def create_decision(
case_id: UUID,
outcome: str = "",
outcome_summary: str = "",
outcome_reasoning: str = "",
direction_doc: dict | None = None,
) -> dict:
"""Create a decision record for a case."""
pool = await get_pool()
decision_id = uuid4()
async with pool.acquire() as conn:
# Check if a decision already exists for this case
existing = await conn.fetchrow(
"SELECT id, version FROM decisions WHERE case_id = $1 ORDER BY version DESC LIMIT 1",
case_id,
)
version = (existing["version"] + 1) if existing else 1
await conn.execute(
"""INSERT INTO decisions (id, case_id, version, outcome, outcome_summary,
outcome_reasoning, direction_doc)
VALUES ($1, $2, $3, $4, $5, $6, $7)""",
decision_id, case_id, version, outcome, outcome_summary,
outcome_reasoning, json.dumps(direction_doc) if direction_doc else None,
)
return await get_decision(decision_id)
async def get_decision(decision_id: UUID) -> dict | None:
pool = await get_pool()
async with pool.acquire() as conn:
row = await conn.fetchrow("SELECT * FROM decisions WHERE id = $1", decision_id)
if not row:
return None
d = dict(row)
d["id"] = str(d["id"])
d["case_id"] = str(d["case_id"])
if isinstance(d.get("direction_doc"), str):
d["direction_doc"] = json.loads(d["direction_doc"])
if isinstance(d.get("panel_members"), str):
d["panel_members"] = json.loads(d["panel_members"])
return d
async def get_decision_by_case(case_id: UUID) -> dict | None:
"""Get the latest decision for a case."""
pool = await get_pool()
async with pool.acquire() as conn:
row = await conn.fetchrow(
"SELECT * FROM decisions WHERE case_id = $1 ORDER BY version DESC LIMIT 1",
case_id,
)
if not row:
return None
d = dict(row)
d["id"] = str(d["id"])
d["case_id"] = str(d["case_id"])
if isinstance(d.get("direction_doc"), str):
d["direction_doc"] = json.loads(d["direction_doc"])
if isinstance(d.get("panel_members"), str):
d["panel_members"] = json.loads(d["panel_members"])
return d
async def update_decision(decision_id: UUID, **fields) -> None:
if not fields:
return
pool = await get_pool()
set_clauses = []
values = []
for i, (key, val) in enumerate(fields.items(), start=2):
if key in ("direction_doc", "panel_members") and isinstance(val, (dict, list)):
val = json.dumps(val)
set_clauses.append(f"{key} = ${i}")
values.append(val)
set_clauses.append("updated_at = now()")
sql = f"UPDATE decisions SET {', '.join(set_clauses)} WHERE id = $1"
async with pool.acquire() as conn:
await conn.execute(sql, decision_id, *values)
# ── Chunks & Vectors ───────────────────────────────────────────────
async def store_chunks(
@@ -452,3 +829,104 @@ async def clear_style_patterns() -> None:
pool = await get_pool()
async with pool.acquire() as conn:
await conn.execute("DELETE FROM style_patterns")
# ── Semantic Search (V2 — decision blocks & case law) ─────────────
async def search_similar_paragraphs(
query_embedding: list[float],
limit: int = 10,
block_type: str | None = None,
) -> list[dict]:
"""Search decision paragraphs by semantic similarity."""
pool = await get_pool()
conditions = []
params: list = [query_embedding, limit]
param_idx = 3
if block_type:
conditions.append(f"db.block_id = ${param_idx}")
params.append(block_type)
param_idx += 1
where = f"WHERE {' AND '.join(conditions)}" if conditions else ""
sql = f"""
SELECT dp.content, dp.word_count, dp.paragraph_number,
db.block_id AS block_type, db.title AS block_title,
c.case_number, c.title AS case_title,
d.outcome, d.author,
1 - (pe.embedding <=> $1) AS score
FROM paragraph_embeddings pe
JOIN decision_paragraphs dp ON dp.id = pe.paragraph_id
JOIN decision_blocks db ON db.id = dp.block_id
JOIN decisions d ON d.id = db.decision_id
JOIN cases c ON c.id = d.case_id
{where}
ORDER BY pe.embedding <=> $1
LIMIT $2
"""
async with pool.acquire() as conn:
rows = await conn.fetch(sql, *params)
return [dict(r) for r in rows]
async def search_similar_case_law(
query_embedding: list[float],
limit: int = 5,
) -> list[dict]:
"""Search case law by semantic similarity."""
pool = await get_pool()
sql = """
SELECT cl.case_number, cl.case_name, cl.court, cl.summary,
cl.key_quote, cl.subject_tags,
cle.chunk_text,
1 - (cle.embedding <=> $1) AS score
FROM case_law_embeddings cle
JOIN case_law cl ON cl.id = cle.case_law_id
ORDER BY cle.embedding <=> $1
LIMIT $2
"""
async with pool.acquire() as conn:
rows = await conn.fetch(sql, query_embedding, limit)
results = []
for r in rows:
d = dict(r)
if isinstance(d.get("subject_tags"), str):
d["subject_tags"] = json.loads(d["subject_tags"])
results.append(d)
return results
async def search_precedents(
query_embedding: list[float],
limit: int = 10,
) -> list[dict]:
"""Combined search: paragraphs + case law, ranked by score."""
paragraphs = await search_similar_paragraphs(query_embedding, limit=limit)
case_law = await search_similar_case_law(query_embedding, limit=limit)
# Combine and sort by score
results = []
for p in paragraphs:
results.append({
"type": "decision_paragraph",
"score": float(p["score"]),
"case_number": p["case_number"],
"case_title": p["case_title"],
"block_type": p["block_type"],
"content": p["content"][:500],
"author": p["author"],
})
for c in case_law:
results.append({
"type": "case_law",
"score": float(c["score"]),
"case_number": c["case_number"],
"case_name": c["case_name"],
"court": c["court"],
"content": c["summary"],
})
results.sort(key=lambda x: x["score"], reverse=True)
return results[:limit]