Add full decision writing pipeline: classify, extract, brainstorm, write, QA, export
New services (11 files): - classifier.py: auto doc-type classification + party identification (Claude Haiku) - claims_extractor.py: claim extraction from pleadings (Claude Sonnet + regex) - references_extractor.py: plan/case-law/legislation detection (regex) - brainstorm.py: direction generation with 2-3 options (Claude Sonnet) - block_writer.py: 12-block decision writer (template + Claude Sonnet/Opus) - docx_exporter.py: DOCX export with David font, RTL, headings - qa_validator.py: 6 QA checks with export blocking on critical failure - learning_loop.py: draft vs final comparison + lesson extraction - metrics.py: KPIs dashboard per case and global - audit.py: action audit log - cli.py: standalone CLI with 11 commands Updated pipeline: extract → classify → chunk → embed → store → extract_references New MCP tools: 29 total (was 16) New DB tables: audit_log, decisions CRUD, claims CRUD Config: Infisical support, external service allowlist Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -131,6 +131,253 @@ CREATE INDEX IF NOT EXISTS idx_cases_number ON cases(case_number);
|
||||
|
||||
MIGRATIONS_SQL = """
|
||||
ALTER TABLE cases ADD COLUMN IF NOT EXISTS expected_outcome TEXT DEFAULT '';
|
||||
|
||||
CREATE TABLE IF NOT EXISTS audit_log (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
action TEXT NOT NULL,
|
||||
case_id UUID REFERENCES cases(id) ON DELETE SET NULL,
|
||||
document_id UUID REFERENCES documents(id) ON DELETE SET NULL,
|
||||
details JSONB DEFAULT '{}',
|
||||
actor TEXT DEFAULT 'system',
|
||||
created_at TIMESTAMPTZ DEFAULT now()
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_audit_case ON audit_log(case_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_audit_action ON audit_log(action);
|
||||
CREATE INDEX IF NOT EXISTS idx_audit_created ON audit_log(created_at DESC);
|
||||
"""
|
||||
|
||||
# ── Phase 3: Workflow expansion ────────────────────────────────────
|
||||
|
||||
SCHEMA_V3_SQL = """
|
||||
|
||||
-- הרחבת decisions עם שדות חדשים
|
||||
ALTER TABLE decisions ADD COLUMN IF NOT EXISTS direction_doc JSONB DEFAULT NULL;
|
||||
ALTER TABLE decisions ADD COLUMN IF NOT EXISTS outcome_reasoning TEXT DEFAULT '';
|
||||
|
||||
-- הרחבת cases עם appeal_type (אם לא קיים)
|
||||
ALTER TABLE cases ADD COLUMN IF NOT EXISTS appeal_type TEXT DEFAULT '';
|
||||
|
||||
-- טבלת qa_results
|
||||
CREATE TABLE IF NOT EXISTS qa_results (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
decision_id UUID REFERENCES decisions(id) ON DELETE CASCADE,
|
||||
case_id UUID REFERENCES cases(id) ON DELETE CASCADE,
|
||||
check_name TEXT NOT NULL,
|
||||
passed BOOLEAN NOT NULL,
|
||||
severity TEXT DEFAULT 'warning',
|
||||
errors JSONB DEFAULT '[]',
|
||||
details TEXT DEFAULT '',
|
||||
created_at TIMESTAMPTZ DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_qa_results_decision ON qa_results(decision_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_qa_results_case ON qa_results(case_id);
|
||||
|
||||
-- טבלת decision_definitions (אם לא קיימת)
|
||||
CREATE TABLE IF NOT EXISTS decision_definitions (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
decision_id UUID REFERENCES decisions(id) ON DELETE CASCADE,
|
||||
term TEXT NOT NULL,
|
||||
definition TEXT NOT NULL,
|
||||
block_id TEXT DEFAULT 'block-he',
|
||||
created_at TIMESTAMPTZ DEFAULT now()
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_definitions_decision ON decision_definitions(decision_id);
|
||||
|
||||
-- טבלת appeal_type_rules (אם לא קיימת)
|
||||
CREATE TABLE IF NOT EXISTS appeal_type_rules (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
appeal_type TEXT NOT NULL,
|
||||
rule_category TEXT NOT NULL,
|
||||
rule_key TEXT NOT NULL,
|
||||
rule_value JSONB NOT NULL,
|
||||
description TEXT DEFAULT '',
|
||||
created_at TIMESTAMPTZ DEFAULT now(),
|
||||
UNIQUE(appeal_type, rule_category, rule_key)
|
||||
);
|
||||
|
||||
-- image_placeholders על decision_blocks
|
||||
ALTER TABLE decision_blocks ADD COLUMN IF NOT EXISTS image_placeholders JSONB DEFAULT '[]';
|
||||
"""
|
||||
|
||||
# ── Phase 2: Decision + Knowledge + RAG layers ────────────────────
|
||||
|
||||
SCHEMA_V2_SQL = """
|
||||
|
||||
-- ═══════════════════════════════════════════════════════════════════
|
||||
-- Layer 2: Decision
|
||||
-- ═══════════════════════════════════════════════════════════════════
|
||||
|
||||
-- decisions: מטאדטה של החלטה (גרסה אחת = רשומה אחת)
|
||||
CREATE TABLE IF NOT EXISTS decisions (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
case_id UUID REFERENCES cases(id) ON DELETE CASCADE,
|
||||
version INTEGER DEFAULT 1,
|
||||
status TEXT DEFAULT 'draft', -- draft/review/final/published
|
||||
outcome TEXT DEFAULT '', -- rejected/accepted/partial
|
||||
outcome_summary TEXT DEFAULT '', -- תמצית תוצאה (שורה אחת)
|
||||
total_paragraphs INTEGER DEFAULT 0,
|
||||
total_words INTEGER DEFAULT 0,
|
||||
decision_date DATE,
|
||||
author TEXT DEFAULT 'דפנה תמיר',
|
||||
panel_members JSONB DEFAULT '[]',
|
||||
created_at TIMESTAMPTZ DEFAULT now(),
|
||||
updated_at TIMESTAMPTZ DEFAULT now(),
|
||||
UNIQUE(case_id, version)
|
||||
);
|
||||
|
||||
-- decision_blocks: 12 בלוקים לפי block-schema.md
|
||||
CREATE TABLE IF NOT EXISTS decision_blocks (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
decision_id UUID REFERENCES decisions(id) ON DELETE CASCADE,
|
||||
block_id TEXT NOT NULL, -- block-alef, block-bet, ... block-yod-bet
|
||||
block_index INTEGER NOT NULL, -- 1-12
|
||||
title TEXT DEFAULT '', -- כותרת הבלוק (ריק לבלוקים ללא כותרת)
|
||||
content TEXT DEFAULT '', -- תוכן מלא (markdown)
|
||||
word_count INTEGER DEFAULT 0,
|
||||
weight_percent NUMERIC(5,2) DEFAULT 0, -- משקל בפועל (%)
|
||||
generation_type TEXT DEFAULT '', -- template-fill/reproduction/paraphrase/...
|
||||
model_used TEXT DEFAULT '', -- sonnet/opus/script
|
||||
temperature NUMERIC(3,2) DEFAULT 0,
|
||||
status TEXT DEFAULT 'empty', -- empty/draft/review/final
|
||||
notes TEXT DEFAULT '',
|
||||
created_at TIMESTAMPTZ DEFAULT now(),
|
||||
updated_at TIMESTAMPTZ DEFAULT now(),
|
||||
UNIQUE(decision_id, block_id)
|
||||
);
|
||||
|
||||
-- decision_paragraphs: סעיפים בודדים עם מעקב ציטוטים
|
||||
CREATE TABLE IF NOT EXISTS decision_paragraphs (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
block_id UUID REFERENCES decision_blocks(id) ON DELETE CASCADE,
|
||||
paragraph_number INTEGER NOT NULL, -- מספור רציף בתוך ההחלטה
|
||||
content TEXT NOT NULL,
|
||||
word_count INTEGER DEFAULT 0,
|
||||
citations JSONB DEFAULT '[]', -- [{case_law_id, text, type}]
|
||||
cross_references JSONB DEFAULT '[]', -- הפניות לסעיפים אחרים ["סעיף 5 לעיל"]
|
||||
created_at TIMESTAMPTZ DEFAULT now()
|
||||
);
|
||||
|
||||
-- claims: טענות צדדים (בלוק ז)
|
||||
CREATE TABLE IF NOT EXISTS claims (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
case_id UUID REFERENCES cases(id) ON DELETE CASCADE,
|
||||
party_role TEXT NOT NULL, -- appellant/respondent/permit_applicant/committee
|
||||
party_name TEXT DEFAULT '',
|
||||
claim_text TEXT NOT NULL,
|
||||
claim_index INTEGER DEFAULT 0, -- סדר הופעה
|
||||
source_document TEXT DEFAULT '', -- מאיזה מסמך חולצה הטענה
|
||||
addressed_in_paragraph INTEGER, -- באיזה סעיף בדיון נענתה
|
||||
created_at TIMESTAMPTZ DEFAULT now()
|
||||
);
|
||||
|
||||
-- ═══════════════════════════════════════════════════════════════════
|
||||
-- Layer 3: Legal Knowledge
|
||||
-- ═══════════════════════════════════════════════════════════════════
|
||||
|
||||
-- case_law: פסיקה (תקדימים)
|
||||
CREATE TABLE IF NOT EXISTS case_law (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
case_number TEXT UNIQUE NOT NULL, -- עע"מ 3975/22 או ערר 1011-03-25
|
||||
case_name TEXT NOT NULL, -- שם קצר: "ב. קרן-נכסים"
|
||||
court TEXT DEFAULT '', -- בג"ץ / עליון / מנהלי / ועדת ערר
|
||||
date DATE,
|
||||
subject_tags JSONB DEFAULT '[]', -- ["proprietary_claims", "parking"]
|
||||
summary TEXT DEFAULT '', -- תמצית 2-3 משפטים
|
||||
key_quote TEXT DEFAULT '', -- ציטוט מרכזי
|
||||
full_text TEXT DEFAULT '', -- טקסט מלא אם זמין
|
||||
source_url TEXT DEFAULT '',
|
||||
created_at TIMESTAMPTZ DEFAULT now()
|
||||
);
|
||||
|
||||
-- case_law_citations: קשרים בין פסיקה להחלטות שלנו
|
||||
CREATE TABLE IF NOT EXISTS case_law_citations (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
case_law_id UUID REFERENCES case_law(id) ON DELETE CASCADE,
|
||||
decision_id UUID REFERENCES decisions(id) ON DELETE CASCADE,
|
||||
paragraph_id UUID REFERENCES decision_paragraphs(id) ON DELETE SET NULL,
|
||||
citation_type TEXT DEFAULT 'support', -- support/distinguish/overrule/obiter
|
||||
context_text TEXT DEFAULT '', -- ההקשר שבו צוטט
|
||||
created_at TIMESTAMPTZ DEFAULT now()
|
||||
);
|
||||
|
||||
-- statutory_provisions: חקיקה נפוצה
|
||||
CREATE TABLE IF NOT EXISTS statutory_provisions (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
statute_name TEXT NOT NULL, -- "חוק התכנון והבנייה"
|
||||
section_number TEXT NOT NULL, -- "152(א)(2)"
|
||||
section_title TEXT DEFAULT '', -- "זכות ערר"
|
||||
full_text TEXT DEFAULT '', -- נוסח הסעיף
|
||||
common_usage TEXT DEFAULT '', -- מתי משתמשים
|
||||
subject_tags JSONB DEFAULT '[]',
|
||||
created_at TIMESTAMPTZ DEFAULT now(),
|
||||
UNIQUE(statute_name, section_number)
|
||||
);
|
||||
|
||||
-- transition_phrases: ביטויי מעבר של דפנה
|
||||
CREATE TABLE IF NOT EXISTS transition_phrases (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
phrase TEXT UNIQUE NOT NULL, -- "ועל מנת לא לצאת בחסר"
|
||||
usage_context TEXT DEFAULT '', -- מתי להשתמש
|
||||
block_types JSONB DEFAULT '[]', -- באילו בלוקים: ["block-yod"]
|
||||
frequency INTEGER DEFAULT 1, -- כמה פעמים ראינו
|
||||
source_decision TEXT DEFAULT '', -- מאיזו החלטה
|
||||
created_at TIMESTAMPTZ DEFAULT now()
|
||||
);
|
||||
|
||||
-- lessons_learned: לקחים מהשוואת טיוטות לגרסאות סופיות
|
||||
CREATE TABLE IF NOT EXISTS lessons_learned (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
lesson_title TEXT NOT NULL, -- "Discussion = continuous essay, no sub-headers"
|
||||
lesson_text TEXT NOT NULL, -- תיאור מלא
|
||||
category TEXT DEFAULT '', -- structure/style/content/process
|
||||
applies_to JSONB DEFAULT '[]', -- ["block-yod", "all"]
|
||||
source_case TEXT DEFAULT '', -- "הכט 1180-1181"
|
||||
severity TEXT DEFAULT 'important', -- critical/important/nice-to-have
|
||||
created_at TIMESTAMPTZ DEFAULT now()
|
||||
);
|
||||
|
||||
-- ═══════════════════════════════════════════════════════════════════
|
||||
-- Layer 4: Extended RAG
|
||||
-- ═══════════════════════════════════════════════════════════════════
|
||||
|
||||
-- paragraph_embeddings: embeddings של סעיפים בהחלטות
|
||||
CREATE TABLE IF NOT EXISTS paragraph_embeddings (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
paragraph_id UUID REFERENCES decision_paragraphs(id) ON DELETE CASCADE,
|
||||
embedding vector(1024),
|
||||
created_at TIMESTAMPTZ DEFAULT now()
|
||||
);
|
||||
|
||||
-- case_law_embeddings: embeddings של פסיקה
|
||||
CREATE TABLE IF NOT EXISTS case_law_embeddings (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
case_law_id UUID REFERENCES case_law(id) ON DELETE CASCADE,
|
||||
chunk_text TEXT NOT NULL,
|
||||
embedding vector(1024),
|
||||
created_at TIMESTAMPTZ DEFAULT now()
|
||||
);
|
||||
|
||||
-- ═══════════════════════════════════════════════════════════════════
|
||||
-- Indexes
|
||||
-- ═══════════════════════════════════════════════════════════════════
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_decisions_case ON decisions(case_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_decisions_status ON decisions(status);
|
||||
CREATE INDEX IF NOT EXISTS idx_decision_blocks_decision ON decision_blocks(decision_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_decision_blocks_block_id ON decision_blocks(block_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_decision_paragraphs_block ON decision_paragraphs(block_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_claims_case ON claims(case_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_claims_role ON claims(party_role);
|
||||
CREATE INDEX IF NOT EXISTS idx_case_law_subject ON case_law USING gin(subject_tags);
|
||||
CREATE INDEX IF NOT EXISTS idx_case_law_citations_decision ON case_law_citations(decision_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_statutory_provisions_statute ON statutory_provisions(statute_name);
|
||||
CREATE INDEX IF NOT EXISTS idx_transition_phrases_block ON transition_phrases USING gin(block_types);
|
||||
CREATE INDEX IF NOT EXISTS idx_lessons_category ON lessons_learned(category);
|
||||
CREATE INDEX IF NOT EXISTS idx_paragraph_embeddings_vec
|
||||
ON paragraph_embeddings USING ivfflat (embedding vector_cosine_ops) WITH (lists = 50);
|
||||
CREATE INDEX IF NOT EXISTS idx_case_law_embeddings_vec
|
||||
ON case_law_embeddings USING ivfflat (embedding vector_cosine_ops) WITH (lists = 50);
|
||||
"""
|
||||
|
||||
|
||||
@@ -139,7 +386,9 @@ async def init_schema() -> None:
|
||||
async with pool.acquire() as conn:
|
||||
await conn.execute(SCHEMA_SQL)
|
||||
await conn.execute(MIGRATIONS_SQL)
|
||||
logger.info("Database schema initialized")
|
||||
await conn.execute(SCHEMA_V2_SQL)
|
||||
await conn.execute(SCHEMA_V3_SQL)
|
||||
logger.info("Database schema initialized (v1 + v2 + v3)")
|
||||
|
||||
|
||||
# ── Case CRUD ───────────────────────────────────────────────────────
|
||||
@@ -307,6 +556,134 @@ def _row_to_doc(row: asyncpg.Record) -> dict:
|
||||
return d
|
||||
|
||||
|
||||
# ── Claims ─────────────────────────────────────────────────────────
|
||||
|
||||
async def store_claims(case_id: UUID, claims: list[dict], source_document: str = "") -> int:
|
||||
"""Store extracted claims. Replaces existing claims from same source.
|
||||
|
||||
Each claim dict: party_role, claim_text, claim_index, party_name (optional)
|
||||
"""
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
if source_document:
|
||||
await conn.execute(
|
||||
"DELETE FROM claims WHERE case_id = $1 AND source_document = $2",
|
||||
case_id, source_document,
|
||||
)
|
||||
for claim in claims:
|
||||
await conn.execute(
|
||||
"""INSERT INTO claims (case_id, party_role, party_name, claim_text, claim_index, source_document)
|
||||
VALUES ($1, $2, $3, $4, $5, $6)""",
|
||||
case_id,
|
||||
claim["party_role"],
|
||||
claim.get("party_name", ""),
|
||||
claim["claim_text"],
|
||||
claim.get("claim_index", 0),
|
||||
source_document,
|
||||
)
|
||||
return len(claims)
|
||||
|
||||
|
||||
async def get_claims(case_id: UUID, party_role: str | None = None) -> list[dict]:
|
||||
"""Get claims for a case, optionally filtered by party role."""
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
if party_role:
|
||||
rows = await conn.fetch(
|
||||
"SELECT * FROM claims WHERE case_id = $1 AND party_role = $2 ORDER BY claim_index",
|
||||
case_id, party_role,
|
||||
)
|
||||
else:
|
||||
rows = await conn.fetch(
|
||||
"SELECT * FROM claims WHERE case_id = $1 ORDER BY party_role, claim_index",
|
||||
case_id,
|
||||
)
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
|
||||
# ── Decisions ──────────────────────────────────────────────────────
|
||||
|
||||
async def create_decision(
|
||||
case_id: UUID,
|
||||
outcome: str = "",
|
||||
outcome_summary: str = "",
|
||||
outcome_reasoning: str = "",
|
||||
direction_doc: dict | None = None,
|
||||
) -> dict:
|
||||
"""Create a decision record for a case."""
|
||||
pool = await get_pool()
|
||||
decision_id = uuid4()
|
||||
async with pool.acquire() as conn:
|
||||
# Check if a decision already exists for this case
|
||||
existing = await conn.fetchrow(
|
||||
"SELECT id, version FROM decisions WHERE case_id = $1 ORDER BY version DESC LIMIT 1",
|
||||
case_id,
|
||||
)
|
||||
version = (existing["version"] + 1) if existing else 1
|
||||
|
||||
await conn.execute(
|
||||
"""INSERT INTO decisions (id, case_id, version, outcome, outcome_summary,
|
||||
outcome_reasoning, direction_doc)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7)""",
|
||||
decision_id, case_id, version, outcome, outcome_summary,
|
||||
outcome_reasoning, json.dumps(direction_doc) if direction_doc else None,
|
||||
)
|
||||
return await get_decision(decision_id)
|
||||
|
||||
|
||||
async def get_decision(decision_id: UUID) -> dict | None:
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
row = await conn.fetchrow("SELECT * FROM decisions WHERE id = $1", decision_id)
|
||||
if not row:
|
||||
return None
|
||||
d = dict(row)
|
||||
d["id"] = str(d["id"])
|
||||
d["case_id"] = str(d["case_id"])
|
||||
if isinstance(d.get("direction_doc"), str):
|
||||
d["direction_doc"] = json.loads(d["direction_doc"])
|
||||
if isinstance(d.get("panel_members"), str):
|
||||
d["panel_members"] = json.loads(d["panel_members"])
|
||||
return d
|
||||
|
||||
|
||||
async def get_decision_by_case(case_id: UUID) -> dict | None:
|
||||
"""Get the latest decision for a case."""
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
row = await conn.fetchrow(
|
||||
"SELECT * FROM decisions WHERE case_id = $1 ORDER BY version DESC LIMIT 1",
|
||||
case_id,
|
||||
)
|
||||
if not row:
|
||||
return None
|
||||
d = dict(row)
|
||||
d["id"] = str(d["id"])
|
||||
d["case_id"] = str(d["case_id"])
|
||||
if isinstance(d.get("direction_doc"), str):
|
||||
d["direction_doc"] = json.loads(d["direction_doc"])
|
||||
if isinstance(d.get("panel_members"), str):
|
||||
d["panel_members"] = json.loads(d["panel_members"])
|
||||
return d
|
||||
|
||||
|
||||
async def update_decision(decision_id: UUID, **fields) -> None:
|
||||
if not fields:
|
||||
return
|
||||
pool = await get_pool()
|
||||
set_clauses = []
|
||||
values = []
|
||||
for i, (key, val) in enumerate(fields.items(), start=2):
|
||||
if key in ("direction_doc", "panel_members") and isinstance(val, (dict, list)):
|
||||
val = json.dumps(val)
|
||||
set_clauses.append(f"{key} = ${i}")
|
||||
values.append(val)
|
||||
set_clauses.append("updated_at = now()")
|
||||
sql = f"UPDATE decisions SET {', '.join(set_clauses)} WHERE id = $1"
|
||||
async with pool.acquire() as conn:
|
||||
await conn.execute(sql, decision_id, *values)
|
||||
|
||||
|
||||
# ── Chunks & Vectors ───────────────────────────────────────────────
|
||||
|
||||
async def store_chunks(
|
||||
@@ -452,3 +829,104 @@ async def clear_style_patterns() -> None:
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
await conn.execute("DELETE FROM style_patterns")
|
||||
|
||||
|
||||
# ── Semantic Search (V2 — decision blocks & case law) ─────────────
|
||||
|
||||
async def search_similar_paragraphs(
|
||||
query_embedding: list[float],
|
||||
limit: int = 10,
|
||||
block_type: str | None = None,
|
||||
) -> list[dict]:
|
||||
"""Search decision paragraphs by semantic similarity."""
|
||||
pool = await get_pool()
|
||||
conditions = []
|
||||
params: list = [query_embedding, limit]
|
||||
param_idx = 3
|
||||
|
||||
if block_type:
|
||||
conditions.append(f"db.block_id = ${param_idx}")
|
||||
params.append(block_type)
|
||||
param_idx += 1
|
||||
|
||||
where = f"WHERE {' AND '.join(conditions)}" if conditions else ""
|
||||
|
||||
sql = f"""
|
||||
SELECT dp.content, dp.word_count, dp.paragraph_number,
|
||||
db.block_id AS block_type, db.title AS block_title,
|
||||
c.case_number, c.title AS case_title,
|
||||
d.outcome, d.author,
|
||||
1 - (pe.embedding <=> $1) AS score
|
||||
FROM paragraph_embeddings pe
|
||||
JOIN decision_paragraphs dp ON dp.id = pe.paragraph_id
|
||||
JOIN decision_blocks db ON db.id = dp.block_id
|
||||
JOIN decisions d ON d.id = db.decision_id
|
||||
JOIN cases c ON c.id = d.case_id
|
||||
{where}
|
||||
ORDER BY pe.embedding <=> $1
|
||||
LIMIT $2
|
||||
"""
|
||||
async with pool.acquire() as conn:
|
||||
rows = await conn.fetch(sql, *params)
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
|
||||
async def search_similar_case_law(
|
||||
query_embedding: list[float],
|
||||
limit: int = 5,
|
||||
) -> list[dict]:
|
||||
"""Search case law by semantic similarity."""
|
||||
pool = await get_pool()
|
||||
sql = """
|
||||
SELECT cl.case_number, cl.case_name, cl.court, cl.summary,
|
||||
cl.key_quote, cl.subject_tags,
|
||||
cle.chunk_text,
|
||||
1 - (cle.embedding <=> $1) AS score
|
||||
FROM case_law_embeddings cle
|
||||
JOIN case_law cl ON cl.id = cle.case_law_id
|
||||
ORDER BY cle.embedding <=> $1
|
||||
LIMIT $2
|
||||
"""
|
||||
async with pool.acquire() as conn:
|
||||
rows = await conn.fetch(sql, query_embedding, limit)
|
||||
results = []
|
||||
for r in rows:
|
||||
d = dict(r)
|
||||
if isinstance(d.get("subject_tags"), str):
|
||||
d["subject_tags"] = json.loads(d["subject_tags"])
|
||||
results.append(d)
|
||||
return results
|
||||
|
||||
|
||||
async def search_precedents(
|
||||
query_embedding: list[float],
|
||||
limit: int = 10,
|
||||
) -> list[dict]:
|
||||
"""Combined search: paragraphs + case law, ranked by score."""
|
||||
paragraphs = await search_similar_paragraphs(query_embedding, limit=limit)
|
||||
case_law = await search_similar_case_law(query_embedding, limit=limit)
|
||||
|
||||
# Combine and sort by score
|
||||
results = []
|
||||
for p in paragraphs:
|
||||
results.append({
|
||||
"type": "decision_paragraph",
|
||||
"score": float(p["score"]),
|
||||
"case_number": p["case_number"],
|
||||
"case_title": p["case_title"],
|
||||
"block_type": p["block_type"],
|
||||
"content": p["content"][:500],
|
||||
"author": p["author"],
|
||||
})
|
||||
for c in case_law:
|
||||
results.append({
|
||||
"type": "case_law",
|
||||
"score": float(c["score"]),
|
||||
"case_number": c["case_number"],
|
||||
"case_name": c["case_name"],
|
||||
"court": c["court"],
|
||||
"content": c["summary"],
|
||||
})
|
||||
|
||||
results.sort(key=lambda x: x["score"], reverse=True)
|
||||
return results[:limit]
|
||||
|
||||
Reference in New Issue
Block a user