Merge ui-rewrite into main: methodology + pipeline fixes

Major changes from ui-rewrite branch:
- Decision-writing methodology (decision-methodology.md) based on FJC, Garner, Posner
- 5 source books downloaded and processed (341K words)
- Methodology integrated into block-yod prompt
- All 8 Paperclip agents updated for methodology compliance
- DB schema V4: claim handling, standard of review, precedent hierarchy
- 15 pipeline gaps identified and fixed after test run on case 1130-25
- Negative checks layer added to CEO and QA agents
- HEARTBEAT: wakeup CEO on completion + blocked status
- Flexible claim handling (bundle/skip via chair_directions)

Conflicts resolved: all 5 files use ui-rewrite version (the latest).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-13 12:42:32 +00:00
121 changed files with 52736 additions and 487 deletions

View File

@@ -20,7 +20,7 @@ from uuid import UUID
from legal_mcp import config
from legal_mcp.services import db, embeddings, claude_session
from legal_mcp.services.lessons import get_content_checklist
from legal_mcp.services.lessons import get_content_checklist, get_methodology_summary
logger = logging.getLogger(__name__)
@@ -201,30 +201,22 @@ BLOCK_PROMPTS = {
## זהו הבלוק הקריטי ביותר — ליבת ההחלטה (ratio decidendi).
## אורך נדרש: **2,000-4,000 מילים לפחות**. זהו הבלוק הארוך ביותר בהחלטה (35-50%).
## מתודולוגיה — CREAC:
1. **C** (Conclusion) — פתח במסקנה: "לאחר שעיינו... מצאנו כי הערר [נדחה/מתקבל]"
2. **R** (Rule) — הצג את הכלל המשפטי הרלוונטי עם ציטוט פסיקה
3. **E** (Explanation) — צטט פסיקה שמסבירה את הכלל (200-600 מילים לכל ציטוט)
4. **A** (Application) — יישם על העובדות הספציפיות של התיק
5. **C** (Conclusion) — מסקנת ביניים
## כללים קריטיים:
- **מסקנה בפתיחה** — לא בסוף
- **מענה פרטני לכל טענה** שהוצגה בבלוק ז — עבור על כל טענה ברשימה והתייחס אליה בנפרד. אל תדלג על שום טענה.
- **ציטוטי פסיקה** — צטט לפחות 3-5 פסקי דין רלוונטיים. כל ציטוט עם שם התיק המלא.
- **ללא כפילות** — הפנה לבלוקים קודמים: "כאמור בסעיף X לעיל"
- **ללא כותרות משנה** (חריג: נושאים נפרדים לחלוטין)
- מספור רציף
{methodology_guidance}
{content_checklist}
## כללים נוספים:
- **ללא כפילות** — הפנה לבלוקים קודמים: "כאמור בסעיף X לעיל"
- **מספור רציף** — המשך מספור מהבלוק הקודם
- מותרות כותרות-משנה כשיש נושאים נפרדים לחלוטין
## כיוון מאושר (חובה):
{direction_context}
## מבנה לפי תוצאה:
{structure_guidance}
## טענות שצריך לענות עליהן (חובה — כל טענה חייבת מענה):
## טענות:
{claims_context}
## חומרי מקור:
@@ -315,12 +307,15 @@ async def write_block(
# Content checklist — tells block-yod WHAT topics to cover
content_checklist = ""
methodology_guidance = ""
if block_id == "block-yod":
content_checklist = get_content_checklist(
appeal_type=case.get("appeal_type", ""),
subject=case.get("subject", ""),
subject_categories=case.get("subject_categories", []),
)
# Methodology guidance — tells block-yod HOW to reason (universal, not case-specific)
methodology_guidance = get_methodology_summary()
# Format prompt — per Anthropic long-context best practices:
# Place source documents FIRST (top of prompt), instructions LAST.
@@ -336,6 +331,7 @@ async def write_block(
discussion_context=discussion_context,
structure_guidance=structure_guidance,
content_checklist=content_checklist,
methodology_guidance=methodology_guidance,
)
# Restructure: sources first, then instructions
@@ -431,7 +427,7 @@ async def _build_claims_context(case_id: UUID) -> str:
lines.append(f"\n### {role_heb.get(current_role, current_role)}")
claim_num += 1
lines.append(f"טענה #{claim_num}: {c['claim_text'][:400]}")
lines.append(f"\n**סה\"כ {claim_num} טענות — חובה לענות על כל אחת.**")
lines.append(f"\n**סה\"כ {claim_num} טענות. ענה על כל טענה מהותית; טענות [bundle] — אגד; טענות [skip] — ציון קצר בלבד.**")
return "\n".join(lines)
@@ -489,17 +485,12 @@ async def _build_precedents_context(case_id: UUID, block_id: str) -> str:
case = await db.get_case(case_id)
case_number = case.get("case_number", "") if case else ""
subject = case.get("subject", "") if case else ""
practice_area = case.get("practice_area") if case else None
appeal_subtype = case.get("appeal_subtype") if case else None
query = f"דיון משפטי בנושא {subject}" if subject else "דיון משפטי ועדת ערר"
query_emb = await embeddings.embed_query(query)
# Search 1: paragraph_embeddings (from other decisions by Dafna).
# Filter by practice_area + appeal_subtype so we don't pull a
# betterment-levy paragraph when writing a building-permit decision.
# Search 1: paragraph_embeddings (from other decisions by Dafna)
para_results = await db.search_similar_paragraphs(
query_embedding=query_emb, limit=10, block_type="block-yod",
practice_area=practice_area, appeal_subtype=appeal_subtype,
)
# Filter out same case
para_results = [r for r in para_results if r.get("case_number", "") != case_number]
@@ -667,6 +658,17 @@ async def get_block_context(case_id: UUID, block_id: str, instructions: str = ""
outcome = (decision or {}).get("outcome", "rejected")
structure_guidance = STRUCTURE_GUIDANCE.get(outcome, "")
# Content checklist + methodology for block-yod
content_checklist = ""
methodology_guidance = ""
if block_id == "block-yod":
content_checklist = get_content_checklist(
appeal_type=case.get("appeal_type", ""),
subject=case.get("subject", ""),
subject_categories=case.get("subject_categories", []),
)
methodology_guidance = get_methodology_summary()
formatted_prompt = prompt_template.format(
case_context=case_context,
source_context=source_context,
@@ -677,6 +679,8 @@ async def get_block_context(case_id: UUID, block_id: str, instructions: str = ""
style_context=style_context,
discussion_context=discussion_context,
structure_guidance=structure_guidance,
content_checklist=content_checklist,
methodology_guidance=methodology_guidance,
)
if instructions:

View File

@@ -200,110 +200,6 @@ CREATE TABLE IF NOT EXISTS appeal_type_rules (
ALTER TABLE decision_blocks ADD COLUMN IF NOT EXISTS image_placeholders JSONB DEFAULT '[]';
"""
# ── Phase 4: Practice area separation (multi-tenant axis) ──────────
SCHEMA_V4_SQL = """
-- ═══════════════════════════════════════════════════════════════════
-- practice_area = top-level legal domain (multi-tenant axis):
-- appeals_committee | national_insurance | labor_law | ...
-- appeal_subtype = refines within practice_area:
-- building_permit | betterment_levy | compensation_197 | unknown
-- Both columns are denormalized to documents/chunks/decisions/style_corpus
-- so vector searches can filter without expensive JOINs.
-- ═══════════════════════════════════════════════════════════════════
ALTER TABLE cases ADD COLUMN IF NOT EXISTS practice_area TEXT;
ALTER TABLE cases ADD COLUMN IF NOT EXISTS appeal_subtype TEXT;
ALTER TABLE documents ADD COLUMN IF NOT EXISTS practice_area TEXT;
ALTER TABLE documents ADD COLUMN IF NOT EXISTS appeal_subtype TEXT;
ALTER TABLE document_chunks ADD COLUMN IF NOT EXISTS practice_area TEXT;
ALTER TABLE document_chunks ADD COLUMN IF NOT EXISTS appeal_subtype TEXT;
ALTER TABLE decisions ADD COLUMN IF NOT EXISTS practice_area TEXT;
ALTER TABLE decisions ADD COLUMN IF NOT EXISTS appeal_subtype TEXT;
ALTER TABLE style_corpus ADD COLUMN IF NOT EXISTS practice_area TEXT;
ALTER TABLE style_corpus ADD COLUMN IF NOT EXISTS appeal_subtype TEXT;
CREATE INDEX IF NOT EXISTS idx_cases_practice
ON cases(practice_area, appeal_subtype);
CREATE INDEX IF NOT EXISTS idx_chunks_practice
ON document_chunks(practice_area);
CREATE INDEX IF NOT EXISTS idx_corpus_practice
ON style_corpus(practice_area, appeal_subtype);
CREATE INDEX IF NOT EXISTS idx_decisions_practice
ON decisions(practice_area);
-- Backfill (idempotent — only fills NULLs)
UPDATE cases SET practice_area = 'appeals_committee' WHERE practice_area IS NULL;
UPDATE cases SET appeal_subtype = CASE
WHEN case_number ~ '^1[0-9]{3}' THEN 'building_permit'
WHEN case_number ~ '^8[0-9]{3}' THEN 'betterment_levy'
WHEN case_number ~ '^9[0-9]{3}' THEN 'compensation_197'
ELSE 'unknown'
END WHERE appeal_subtype IS NULL;
UPDATE documents d
SET practice_area = c.practice_area, appeal_subtype = c.appeal_subtype
FROM cases c
WHERE d.case_id = c.id AND d.practice_area IS NULL;
UPDATE document_chunks dc
SET practice_area = c.practice_area, appeal_subtype = c.appeal_subtype
FROM cases c
WHERE dc.case_id = c.id AND dc.practice_area IS NULL;
UPDATE decisions de
SET practice_area = c.practice_area, appeal_subtype = c.appeal_subtype
FROM cases c
WHERE de.case_id = c.id AND de.practice_area IS NULL;
-- All existing style_corpus entries are דפנה's appeals-committee decisions
UPDATE style_corpus SET practice_area = 'appeals_committee' WHERE practice_area IS NULL;
-- Training corpus documents/chunks have case_id = NULL. All historical
-- training material is from דפנה's appeals committee, so default them.
UPDATE documents SET practice_area = 'appeals_committee'
WHERE case_id IS NULL AND practice_area IS NULL;
UPDATE document_chunks dc
SET practice_area = d.practice_area, appeal_subtype = d.appeal_subtype
FROM documents d
WHERE dc.document_id = d.id AND dc.practice_area IS NULL;
"""
# ── Phase 5: case_precedents (user-attached legal quotes) ──────────
SCHEMA_V5_SQL = """
-- ═══════════════════════════════════════════════════════════════════
-- case_precedents: legal support the chair attaches to a case / section
-- during the compose phase. Self-contained — quote + citation are
-- stored inline, with an optional FK to an archived PDF in documents.
-- Not linked to case_law (which has UNIQUE(case_number)) to keep the
-- citation as free-text. A backfill pass into case_law is a future
-- follow-up once the UI stabilizes.
-- ═══════════════════════════════════════════════════════════════════
CREATE TABLE IF NOT EXISTS case_precedents (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
case_id UUID NOT NULL REFERENCES cases(id) ON DELETE CASCADE,
section_id TEXT, -- NULL = case-level
-- else "threshold_1" / "issue_3"
quote TEXT NOT NULL,
citation TEXT NOT NULL, -- free-text "מראה מקום"
chair_note TEXT DEFAULT '',
pdf_document_id UUID REFERENCES documents(id) ON DELETE SET NULL,
practice_area TEXT, -- denormalized from cases
created_at TIMESTAMPTZ DEFAULT now(),
updated_at TIMESTAMPTZ DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_case_precedents_case
ON case_precedents(case_id, section_id);
CREATE INDEX IF NOT EXISTS idx_case_precedents_library
ON case_precedents(citation);
CREATE INDEX IF NOT EXISTS idx_case_precedents_area
ON case_precedents(practice_area);
"""
# ── Phase 2: Decision + Knowledge + RAG layers ────────────────────
SCHEMA_V2_SQL = """
@@ -501,6 +397,51 @@ CREATE INDEX IF NOT EXISTS idx_case_law_embeddings_vec
"""
# ── Phase 4: Methodology alignment ──────────────────────────────
SCHEMA_V4_SQL = """
-- ═══════════════════════════════════════════════════════════════════
-- V4: Methodology alignment (decision-methodology.md)
-- ═══════════════════════════════════════════════════════════════════
-- claims: טיפול בטענות (bundle/skip) + סוג טענה
ALTER TABLE claims ADD COLUMN IF NOT EXISTS claim_type TEXT DEFAULT 'claim';
-- claim / response / reply
ALTER TABLE claims ADD COLUMN IF NOT EXISTS claim_handling TEXT DEFAULT 'address';
-- address (דיון מלא) / bundle (קיבוץ) / skip (דילוג)
ALTER TABLE claims ADD COLUMN IF NOT EXISTS bundle_group TEXT DEFAULT '';
-- שם הקבוצה לקיבוץ (למשל "פגמים פרוצדורליים")
ALTER TABLE claims ADD COLUMN IF NOT EXISTS handling_reason TEXT DEFAULT '';
-- נימוק לדילוג/קיבוץ (למשל "נבחנה ולא מצאנו ממש")
-- cases: תקן ביקורת + קטגוריות נושא
ALTER TABLE cases ADD COLUMN IF NOT EXISTS standard_of_review TEXT DEFAULT '';
-- "שיקול דעת תכנוני עצמאי" / "בחינת שומה מכרעת" / ...
ALTER TABLE cases ADD COLUMN IF NOT EXISTS subject_categories JSONB DEFAULT '[]';
-- ["חניה", "קווי בניין", "גובה", "שימוש חורג", ...]
-- case_law: רמת תקדים + מעמד
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS precedent_level TEXT DEFAULT '';
-- עליון / מנהלי / ועדת ערר ארצית / ועדת ערר מחוזית
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS is_binding BOOLEAN DEFAULT TRUE;
-- הלכה מחייבת (true) / אמרת אגב (false)
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS creac_role TEXT DEFAULT '';
-- rule (הנחה עליונה) / explanation (הרחבה) / analogy (אנלוגיה)
-- decisions: סדר סוגיות + תקן ביקורת
ALTER TABLE decisions ADD COLUMN IF NOT EXISTS issue_order JSONB DEFAULT '[]';
-- סדר הסוגיות שנקבע ע"י המנצח: [{"title": "...", "type": "threshold/dispositive/secondary"}]
ALTER TABLE decisions ADD COLUMN IF NOT EXISTS claim_handling JSONB DEFAULT '{}';
-- {"overrides": [{"claim_id": "...", "handling": "bundle", "group": "..."}]}
-- indexes
CREATE INDEX IF NOT EXISTS idx_claims_handling ON claims(claim_handling);
CREATE INDEX IF NOT EXISTS idx_claims_type ON claims(claim_type);
CREATE INDEX IF NOT EXISTS idx_case_law_level ON case_law(precedent_level);
"""
async def init_schema() -> None:
pool = await get_pool()
async with pool.acquire() as conn:
@@ -509,8 +450,7 @@ async def init_schema() -> None:
await conn.execute(SCHEMA_V2_SQL)
await conn.execute(SCHEMA_V3_SQL)
await conn.execute(SCHEMA_V4_SQL)
await conn.execute(SCHEMA_V5_SQL)
logger.info("Database schema initialized (v1 + v2 + v3 + v4 + v5)")
logger.info("Database schema initialized (v1 + v2 + v3 + v4)")
# ── Case CRUD ───────────────────────────────────────────────────────
@@ -527,8 +467,6 @@ async def create_case(
hearing_date: date | None = None,
notes: str = "",
expected_outcome: str = "",
practice_area: str = "appeals_committee",
appeal_subtype: str | None = None,
) -> dict:
pool = await get_pool()
case_id = uuid4()
@@ -536,43 +474,17 @@ async def create_case(
await conn.execute(
"""INSERT INTO cases (id, case_number, title, appellants, respondents,
subject, property_address, permit_number, committee_type,
hearing_date, notes, expected_outcome,
practice_area, appeal_subtype)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)""",
hearing_date, notes, expected_outcome)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)""",
case_id, case_number, title,
json.dumps(appellants or []),
json.dumps(respondents or []),
subject, property_address, permit_number, committee_type,
hearing_date, notes, expected_outcome,
practice_area, appeal_subtype,
)
return await get_case(case_id)
async def get_case_practice_area(case_id: UUID) -> tuple[str | None, str | None]:
"""Return (practice_area, appeal_subtype) for a case, or (None, None) if missing."""
pool = await get_pool()
async with pool.acquire() as conn:
row = await conn.fetchrow(
"SELECT practice_area, appeal_subtype FROM cases WHERE id = $1", case_id
)
if row is None:
return None, None
return row["practice_area"], row["appeal_subtype"]
async def get_case_practice_area_by_number(case_number: str) -> tuple[str | None, str | None]:
pool = await get_pool()
async with pool.acquire() as conn:
row = await conn.fetchrow(
"SELECT practice_area, appeal_subtype FROM cases WHERE case_number = $1",
case_number,
)
if row is None:
return None, None
return row["practice_area"], row["appeal_subtype"]
async def get_case(case_id: UUID) -> dict | None:
pool = await get_pool()
async with pool.acquire() as conn:
@@ -608,16 +520,6 @@ async def list_cases(status: str | None = None, limit: int = 50) -> list[dict]:
return [_row_to_case(r) for r in rows]
async def delete_case(case_id: UUID) -> bool:
"""Delete a case. Dependent rows in documents/document_chunks/qa_results
cascade automatically (schema-level ON DELETE CASCADE); audit_log rows
nullify their case_id reference. Returns True if a row was deleted."""
pool = await get_pool()
async with pool.acquire() as conn:
result = await conn.execute("DELETE FROM cases WHERE id = $1", case_id)
return result.endswith(" 1")
async def update_case(case_id: UUID, **fields) -> dict | None:
if not fields:
return await get_case(case_id)
@@ -648,34 +550,19 @@ def _row_to_case(row: asyncpg.Record) -> dict:
# ── Document CRUD ───────────────────────────────────────────────────
async def create_document(
case_id: UUID | None,
case_id: UUID,
doc_type: str,
title: str,
file_path: str,
page_count: int | None = None,
practice_area: str | None = None,
appeal_subtype: str | None = None,
) -> dict:
pool = await get_pool()
doc_id = uuid4()
async with pool.acquire() as conn:
# If practice_area not explicitly given, inherit from the parent case
# (for case-bound documents). Training corpus passes case_id=None and
# provides the practice_area directly.
if practice_area is None and case_id is not None:
case_row = await conn.fetchrow(
"SELECT practice_area, appeal_subtype FROM cases WHERE id = $1",
case_id,
)
if case_row:
practice_area = case_row["practice_area"]
appeal_subtype = case_row["appeal_subtype"]
await conn.execute(
"""INSERT INTO documents (id, case_id, doc_type, title, file_path,
page_count, practice_area, appeal_subtype)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)""",
"""INSERT INTO documents (id, case_id, doc_type, title, file_path, page_count)
VALUES ($1, $2, $3, $4, $5, $6)""",
doc_id, case_id, doc_type, title, file_path, page_count,
practice_area, appeal_subtype,
)
row = await conn.fetchrow("SELECT * FROM documents WHERE id = $1", doc_id)
return _row_to_doc(row)
@@ -731,113 +618,6 @@ def _row_to_doc(row: asyncpg.Record) -> dict:
return d
# ── case_precedents CRUD ───────────────────────────────────────────
def _row_to_precedent(row: asyncpg.Record) -> dict:
d = dict(row)
for k in ("id", "case_id"):
if d.get(k) is not None:
d[k] = str(d[k])
if d.get("pdf_document_id") is not None:
d["pdf_document_id"] = str(d["pdf_document_id"])
for ts in ("created_at", "updated_at"):
if d.get(ts) is not None:
d[ts] = d[ts].isoformat()
return d
async def create_case_precedent(
case_id: UUID,
quote: str,
citation: str,
section_id: str | None = None,
chair_note: str = "",
pdf_document_id: UUID | None = None,
practice_area: str | None = None,
) -> dict:
"""Insert a new attached precedent. practice_area is inherited from
the parent case when not explicitly supplied, so the cross-case
library search can filter without a JOIN."""
pool = await get_pool()
async with pool.acquire() as conn:
if practice_area is None:
row = await conn.fetchrow(
"SELECT practice_area FROM cases WHERE id = $1", case_id
)
practice_area = row["practice_area"] if row else None
inserted = await conn.fetchrow(
"""INSERT INTO case_precedents
(case_id, section_id, quote, citation, chair_note,
pdf_document_id, practice_area)
VALUES ($1, $2, $3, $4, $5, $6, $7)
RETURNING *""",
case_id, section_id, quote, citation, chair_note,
pdf_document_id, practice_area,
)
return _row_to_precedent(inserted)
async def list_case_precedents(case_id: UUID) -> list[dict]:
pool = await get_pool()
async with pool.acquire() as conn:
rows = await conn.fetch(
"SELECT * FROM case_precedents WHERE case_id = $1 "
"ORDER BY section_id NULLS FIRST, created_at",
case_id,
)
return [_row_to_precedent(r) for r in rows]
async def delete_case_precedent(precedent_id: UUID) -> bool:
pool = await get_pool()
async with pool.acquire() as conn:
result = await conn.execute(
"DELETE FROM case_precedents WHERE id = $1", precedent_id
)
return result.endswith(" 1")
async def search_precedent_library(
query: str, practice_area: str = "", limit: int = 10,
) -> list[dict]:
"""Cross-case typeahead for the citation field. Returns one row per
distinct citation so the user sees each precedent once even if they
previously attached it to multiple cases/sections. No embeddings —
simple ILIKE is fine at this scale."""
pool = await get_pool()
pattern = f"%{query}%"
async with pool.acquire() as conn:
if practice_area:
rows = await conn.fetch(
"""SELECT DISTINCT ON (citation)
id, citation, quote, chair_note, practice_area, created_at
FROM case_precedents
WHERE practice_area = $1
AND (citation ILIKE $2 OR quote ILIKE $2)
ORDER BY citation, created_at DESC
LIMIT $3""",
practice_area, pattern, limit,
)
else:
rows = await conn.fetch(
"""SELECT DISTINCT ON (citation)
id, citation, quote, chair_note, practice_area, created_at
FROM case_precedents
WHERE citation ILIKE $1 OR quote ILIKE $1
ORDER BY citation, created_at DESC
LIMIT $2""",
pattern, limit,
)
out = []
for r in rows:
d = dict(r)
d["id"] = str(d["id"])
if d.get("created_at"):
d["created_at"] = d["created_at"].isoformat()
out.append(d)
return out
# ── Claims ─────────────────────────────────────────────────────────
async def store_claims(case_id: UUID, claims: list[dict], source_document: str = "") -> int:
@@ -904,20 +684,12 @@ async def create_decision(
)
version = (existing["version"] + 1) if existing else 1
case_row = await conn.fetchrow(
"SELECT practice_area, appeal_subtype FROM cases WHERE id = $1", case_id
)
practice_area = case_row["practice_area"] if case_row else None
appeal_subtype = case_row["appeal_subtype"] if case_row else None
await conn.execute(
"""INSERT INTO decisions (id, case_id, version, outcome, outcome_summary,
outcome_reasoning, direction_doc,
practice_area, appeal_subtype)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)""",
outcome_reasoning, direction_doc)
VALUES ($1, $2, $3, $4, $5, $6, $7)""",
decision_id, case_id, version, outcome, outcome_summary,
outcome_reasoning, json.dumps(direction_doc) if direction_doc else None,
practice_area, appeal_subtype,
)
return await get_decision(decision_id)
@@ -991,37 +763,12 @@ async def store_chunks(
document_id: UUID,
case_id: UUID | None,
chunks: list[dict],
practice_area: str | None = None,
appeal_subtype: str | None = None,
) -> int:
"""Store document chunks with embeddings. Each chunk dict has:
content, section_type, embedding (list[float]), page_number, chunk_index.
practice_area defaults to the parent case's value, or — when case_id is
None (training corpus) — falls back to the parent document's value so
vector search can still filter cleanly.
content, section_type, embedding (list[float]), page_number, chunk_index
"""
pool = await get_pool()
async with pool.acquire() as conn:
# Resolve practice_area in priority order: explicit > case > document.
if practice_area is None:
if case_id is not None:
case_row = await conn.fetchrow(
"SELECT practice_area, appeal_subtype FROM cases WHERE id = $1",
case_id,
)
if case_row:
practice_area = case_row["practice_area"]
appeal_subtype = case_row["appeal_subtype"]
if practice_area is None:
doc_row = await conn.fetchrow(
"SELECT practice_area, appeal_subtype FROM documents WHERE id = $1",
document_id,
)
if doc_row:
practice_area = doc_row["practice_area"]
appeal_subtype = doc_row["appeal_subtype"]
# Delete existing chunks for this document
await conn.execute(
"DELETE FROM document_chunks WHERE document_id = $1", document_id
@@ -1029,16 +776,14 @@ async def store_chunks(
for chunk in chunks:
await conn.execute(
"""INSERT INTO document_chunks
(document_id, case_id, chunk_index, content, section_type,
embedding, page_number, practice_area, appeal_subtype)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)""",
(document_id, case_id, chunk_index, content, section_type, embedding, page_number)
VALUES ($1, $2, $3, $4, $5, $6, $7)""",
document_id, case_id,
chunk["chunk_index"],
chunk["content"],
chunk.get("section_type", "other"),
chunk["embedding"],
chunk.get("page_number"),
practice_area, appeal_subtype,
)
return len(chunks)
@@ -1048,15 +793,8 @@ async def search_similar(
limit: int = 10,
case_id: UUID | None = None,
section_type: str | None = None,
practice_area: str | None = None,
appeal_subtype: str | None = None,
) -> list[dict]:
"""Cosine similarity search on document chunks.
Filter by practice_area to keep precedents from the same legal domain
(e.g. don't surface betterment-levy chunks when working on building
permits). Uses the denormalized column on document_chunks — no JOIN.
"""
"""Cosine similarity search on document chunks."""
pool = await get_pool()
conditions = []
params: list = [query_embedding, limit]
@@ -1070,14 +808,6 @@ async def search_similar(
conditions.append(f"dc.section_type = ${param_idx}")
params.append(section_type)
param_idx += 1
if practice_area:
conditions.append(f"dc.practice_area = ${param_idx}")
params.append(practice_area)
param_idx += 1
if appeal_subtype:
conditions.append(f"dc.appeal_subtype = ${param_idx}")
params.append(appeal_subtype)
param_idx += 1
where = f"WHERE {' AND '.join(conditions)}" if conditions else ""
@@ -1110,8 +840,6 @@ async def add_to_style_corpus(
summary: str = "",
outcome: str = "",
key_principles: list[str] | None = None,
practice_area: str = "appeals_committee",
appeal_subtype: str | None = None,
) -> UUID:
pool = await get_pool()
corpus_id = uuid4()
@@ -1119,13 +847,11 @@ async def add_to_style_corpus(
await conn.execute(
"""INSERT INTO style_corpus
(id, document_id, decision_number, decision_date,
subject_categories, full_text, summary, outcome, key_principles,
practice_area, appeal_subtype)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)""",
subject_categories, full_text, summary, outcome, key_principles)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)""",
corpus_id, document_id, decision_number, decision_date,
json.dumps(subject_categories), full_text, summary, outcome,
json.dumps(key_principles or []),
practice_area, appeal_subtype,
)
return corpus_id
@@ -1229,15 +955,8 @@ async def search_similar_paragraphs(
query_embedding: list[float],
limit: int = 10,
block_type: str | None = None,
practice_area: str | None = None,
appeal_subtype: str | None = None,
) -> list[dict]:
"""Search decision paragraphs by semantic similarity.
Filtering by practice_area uses the denormalized column on `decisions`
so we don't pull, e.g., betterment-levy paragraphs when writing a
building-permit decision.
"""
"""Search decision paragraphs by semantic similarity."""
pool = await get_pool()
conditions = []
params: list = [query_embedding, limit]
@@ -1247,14 +966,6 @@ async def search_similar_paragraphs(
conditions.append(f"db.block_id = ${param_idx}")
params.append(block_type)
param_idx += 1
if practice_area:
conditions.append(f"d.practice_area = ${param_idx}")
params.append(practice_area)
param_idx += 1
if appeal_subtype:
conditions.append(f"d.appeal_subtype = ${param_idx}")
params.append(appeal_subtype)
param_idx += 1
where = f"WHERE {' AND '.join(conditions)}" if conditions else ""

View File

@@ -519,3 +519,65 @@ def get_content_checklist(
# Default: substantive licensing
return CONTENT_CHECKLISTS["licensing_substantive"]
# ── Methodology guidance (condensed from decision-methodology.md) ──
_METHODOLOGY_CORE = """## מתודולוגיה אנליטית — עקרונות מנחים לכתיבת הדיון
### מבנה סילוגיסטי לכל סוגיה
כל סוגיה נבנית כסילוגיזם: (1) הנחה עליונה = הכלל (הוראת תכנית, חוק, הלכה); (2) הנחה תחתונה = העובדות הספציפיות; (3) מסקנה. אם לא ניתן לזהות את הכלל — ההנמקה אינה מספקת. אם לא ניתן לזהות כיצד העובדות מקיימות את הכלל — ההנמקה קריפטית.
### התחל מלשון הטקסט
כשהמקרה נשלט על ידי הוראת תכנית או סעיף חוק — פתח בציטוט ההוראה. פרש מילים במשמעותן הרגילה. תן תוקף לכל מילה. אם יש עמימות — השתמש בכלי פרשנות.
### הפרד ממצא עובדתי ממסקנה משפטית
"הבניה במרחק 1.5 מטרים מגבול המגרש" = ממצא עובדתי. "חריגה זו עולה כדי סטייה ניכרת" = מסקנה משפטית. אל תערבב.
### CREAC לכל סוגיה
1. מסקנה — פתח בתשובה ("הבקשה אינה תואמת...")
2. כלל — ציטוט ההוראה
3. הרחבה — תקדים רלוונטי אחד (אם נדרש)
4. יישום — החלת הכלל על העובדות (לב ההנמקה)
5. מסקנה חוזרת — סגירה תמציתית
### Steel-Man — הצג טענה בחוזקתה לפני דחייה
לפני שדוחים טענה — הצג אותה בגרסה החזקה ביותר: "אמנם צודק העורר כי [נקודה לטובתו], אולם [הנימוק לדחייה]." טענת קש קלה להפריך אך לא משכנעת.
### טכניקת סנדוויץ' לציטוטים
כל ציטוט עטוף: משפט הקדמה (מודיע על התוכן) → ציטוט → ניתוח (מסביר כיצד רלוונטי למקרה). אל תניח שהקורא יקרא ציטוט ארוך ויפיק ממנו מסקנות בעצמו.
### נתונים, לא תיאורים
"הבקשה חורגת ב-1.5 מטרים מקו הבניין" — לא "הבקשה חורגת באופן משמעותי." מספרים, מידות, אחוזים.
### כנות לגבי קושי
כשהמקרה קשה — אמור זאת: "הדבר אינו נקי מספקות, אולם..." אל תעמיד פנים שמקרה קשה הוא קל.
### כל מילה עובדת
"לאחר ששקלנו את כלל השיקולים" — ריק, מחק. מבחן: אם מוחקים את המשפט וההחלטה לא מאבדת מידע — המשפט מיותר.
### איזון ומידתיות (כשהכלל לא נותן תשובה חד-משמעית)
כשנדרש איזון:
1. זהה אינטרסים קונקרטיים (לא "אינטרס הציבור" אלא "שמירה על אופי מגורים צמודי קרקע")
2. בחן השלכות לכל כיוון: מה קורה אם מקבלים? אם דוחים?
3. שקול השלכות מערכתיות: מה הסיגנל שנשלח למערכת?
4. ציין מה מכריע את הכף ולמה
כשמטילים מגבלה/תנאי — מבחן מידתיות: (1) תכלית ראויה?; (2) אמצעי פוגע פחות?; (3) פגיעה מידתית ביחס לתועלת?
### טיפול בטענות
- ההחלטה מנתחת שאלות — לא מתווכחת עם עו"ד. מבנה: שאלה→כלל→עובדות→מסקנה
- טענות שסומנו [bundle] ב-chair_directions: קבץ ודון יחד
- טענות שסומנו [skip] ב-chair_directions: ציון קצר בלבד
- טענות ללא סימון: ענה בנפרד עם מענה מנומק
- טענה מרכזית של הצד המפסיד חייבת מענה Steel-Man
- מיקום ההתמודדות עם טענות נגדיות: באמצע הדיון בסוגיה (לא בהתחלה ולא בסוף)
"""
def get_methodology_summary() -> str:
"""Return the condensed methodology guidance — always the same, always complete.
The methodology is universal: it teaches HOW to think, not WHAT to discuss.
Case-specific content (parking, building lines, significant deviation) belongs
in the content checklists, not here.
"""
return _METHODOLOGY_CORE