Add full decision writing pipeline: classify, extract, brainstorm, write, QA, export

New services (11 files): - classifier.py: auto doc-type classification + party identification (Claude Haiku) - claims_extractor.py: claim extraction from pleadings (Claude Sonnet + regex) - references_extractor.py: plan/case-law/legislation detection (regex) - brainstorm.py: direction generation with 2-3 options (Claude Sonnet) - block_writer.py: 12-block decision writer (template + Claude Sonnet/Opus) - docx_exporter.py: DOCX export with David font, RTL, headings - qa_validator.py: 6 QA checks with export blocking on critical failure - learning_loop.py: draft vs final comparison + lesson extraction - metrics.py: KPIs dashboard per case and global - audit.py: action audit log - cli.py: standalone CLI with 11 commands Updated pipeline: extract → classify → chunk → embed → store → extract_references New MCP tools: 29 total (was 16) New DB tables: audit_log, decisions CRUD, claims CRUD Config: Infisical support, external service allowlist Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-03 10:21:47 +00:00
parent df7cc4f5a5
commit d9e5ef0f46
21 changed files with 3957 additions and 14 deletions
--- a/mcp-server/src/legal_mcp/services/block_writer.py
+++ b/mcp-server/src/legal_mcp/services/block_writer.py
@@ -0,0 +1,573 @@
+"""מנוע כתיבת בלוקים להחלטת ועדת ערר.
+
+מייצר טקסט בפועל לכל בלוק (ה-יב) בהתבסס על:
+- block-schema.md (פרמטרים, constraints, מבנה)
+- SKILL.md (סגנון דפנה)
+- חומרי המקור (מסמכים, טענות, פסיקה)
+- מסמך כיוון (חובה לבלוק י)
+
+בלוקים א-ד ויב = template-fill (ללא AI).
+בלוקים ה-יא = AI generation עם Claude.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+from datetime import date
+from uuid import UUID
+
+import anthropic
+
+from legal_mcp import config
+from legal_mcp.services import db, embeddings
+
+logger = logging.getLogger(__name__)
+
+_anthropic_client: anthropic.Anthropic | None = None
+
+
+def _get_anthropic() -> anthropic.Anthropic:
+    global _anthropic_client
+    if _anthropic_client is None:
+        _anthropic_client = anthropic.Anthropic(api_key=config.ANTHROPIC_API_KEY)
+    return _anthropic_client
+
+
+# ── Block configuration ───────────────────────────────────────────
+
+BLOCK_CONFIG = {
+    "block-alef": {"index": 1, "title": "כותרת מוסדית", "gen_type": "template-fill", "temp": 0, "model": "script"},
+    "block-bet":  {"index": 2, "title": "הרכב הוועדה", "gen_type": "template-fill", "temp": 0, "model": "script"},
+    "block-gimel":{"index": 3, "title": "צדדים", "gen_type": "template-fill", "temp": 0, "model": "script"},
+    "block-dalet":{"index": 4, "title": "החלטה", "gen_type": "template-fill", "temp": 0, "model": "script"},
+    "block-he":   {"index": 5, "title": "פתיחה", "gen_type": "paraphrase", "temp": 0.2, "model": "sonnet", "max_tokens": 1024},
+    "block-vav":  {"index": 6, "title": "רקע עובדתי", "gen_type": "reproduction", "temp": 0, "model": "sonnet", "max_tokens": 4096},
+    "block-zayin":{"index": 7, "title": "טענות הצדדים", "gen_type": "paraphrase", "temp": 0.1, "model": "sonnet", "max_tokens": 4096},
+    "block-chet": {"index": 8, "title": "הליכים", "gen_type": "reproduction", "temp": 0, "model": "sonnet", "max_tokens": 2048},
+    "block-tet":  {"index": 9, "title": "תכניות חלות", "gen_type": "guided-synthesis", "temp": 0.2, "model": "opus", "max_tokens": 2048},
+    "block-yod":  {"index": 10, "title": "דיון והכרעה", "gen_type": "rhetorical-construction", "temp": 0.4, "model": "opus", "max_tokens": 8192},
+    "block-yod-alef": {"index": 11, "title": "סיכום", "gen_type": "paraphrase", "temp": 0.1, "model": "sonnet", "max_tokens": 2048},
+    "block-yod-bet":  {"index": 12, "title": "חתימות", "gen_type": "template-fill", "temp": 0, "model": "script"},
+}
+
+MODEL_MAP = {
+    "sonnet": "claude-sonnet-4-20250514",
+    "opus": "claude-opus-4-20250514",
+}
+
+
+# ── Template blocks (א-ד, יב) ────────────────────────────────────
+
+def write_block_alef(case: dict, decision: dict | None = None) -> str:
+    """כותרת מוסדית."""
+    return f"""מדינת ישראל
+ועדת ערר לתכנון ולבנייה — מחוז ירושלים
+
+ערר מס' {case['case_number']}"""
+
+
+def write_block_bet(case: dict, decision: dict | None = None) -> str:
+    """הרכב הוועדה."""
+    panel = (decision or {}).get("panel_members", [])
+    members_text = ""
+    if panel:
+        for m in panel:
+            members_text += f"\n{m}"
+    return f"""בפני:
+עו"ד דפנה תמיר, יו"ר{members_text}"""
+
+
+def write_block_gimel(case: dict, decision: dict | None = None) -> str:
+    """צדדים."""
+    appellants = "\n".join(case.get("appellants", ["(לא צוין)"]))
+    respondents = "\n".join(case.get("respondents", ["(לא צוין)"]))
+    return f"""{appellants}
+
+נגד
+
+{respondents}"""
+
+
+def write_block_dalet(case: dict, decision: dict | None = None) -> str:
+    """כותרת החלטה."""
+    return "החלטה"
+
+
+def write_block_yod_bet(case: dict, decision: dict | None = None) -> str:
+    """חתימות."""
+    today = date.today().strftime("%d.%m.%Y")
+    return f"""ניתנה היום, {today}, פה אחד.
+
+דפנה תמיר, עו"ד
+יו"ר ועדת הערר"""
+
+
+TEMPLATE_WRITERS = {
+    "block-alef": write_block_alef,
+    "block-bet": write_block_bet,
+    "block-gimel": write_block_gimel,
+    "block-dalet": write_block_dalet,
+    "block-yod-bet": write_block_yod_bet,
+}
+
+
+# ── AI-generated blocks (ה-יא) ───────────────────────────────────
+
+BLOCK_PROMPTS = {
+    "block-he": """כתוב את בלוק הפתיחה (בלוק ה) של החלטת ועדת ערר.
+
+## כללים:
+- פתח ב"לפנינו ערר..." או "עניינה של החלטה זו..."
+- הגדר "להלן" מרכזיים: הוועדה המקומית, התכנית/הבקשה, המגרש
+- 1-2 סעיפים בלבד
+- אין ניתוח, אין ערכי שיפוט, אין ציטוטים מצדדים
+- מספור: 1.
+
+## פרטי התיק:
+{case_context}
+
+## חומרי מקור:
+{source_context}""",
+
+    "block-vav": """כתוב את בלוק הרקע העובדתי (בלוק ו, "פתח דבר") של החלטת ועדת ערר.
+
+## כללים קריטיים:
+- **רקע ניטרלי** — עובדות בלבד. אין ציטוטים ישירים מצדדים. אין מילות ערך/שיפוט ("חריג", "חטא", "בעייתי").
+- סדר פנימי: מקרקעין → סביבה → היסטוריה תכנונית → מהות הבקשה → החלטת הוועדה → הגשת הערר
+- סמן מיקומי תמונות: [📷 מיקום GIS], [📷 תשריט]
+- ציטוט מפרוטוקול ועדה מקומית (אם יש) כ-blockquote
+- מספור רציף מהבלוק הקודם
+
+## פרטי התיק:
+{case_context}
+
+## חומרי מקור:
+{source_context}""",
+
+    "block-zayin": """כתוב את בלוק טענות הצדדים (בלוק ז, "תמצית טענות הצדדים") של החלטת ועדת ערר.
+
+## כללים:
+- כל טענה בסעיף נפרד, גוף שלישי ("העורר טוען כי...")
+- סדר קבוע: טענות העוררים → עמדת הוועדה המקומית → עמדת מבקשי ההיתר (אם יש)
+- כותרת: "תמצית טענות הצדדים"
+- נאמנות מוחלטת למקור — לא לשנות, לא לקצר ללא ציון
+- אין ניתוח, אין מסקנות, אין הערכה
+- רק מכתבי טענות מקוריים (לא השלמות טיעון)
+- מספור רציף
+
+## טענות שחולצו:
+{claims_context}
+
+## פרטי התיק:
+{case_context}""",
+
+    "block-chet": """כתוב את בלוק ההליכים (בלוק ח, "ההליכים בפני ועדת הערר") של החלטת ועדת ערר.
+
+## כללים:
+- תיעוד כרונולוגי: דיון → סיור → השלמות טיעון → החלטות ביניים
+- תאריכים מדויקים
+- תוכן כל השלמת טיעון בסעיף נפרד
+- סמן תמונות מסיור: [📷 צילום מסיור]
+- אין ניתוח או הערכה
+- מספור רציף
+
+## פרטי התיק:
+{case_context}
+
+## חומרי מקור:
+{source_context}""",
+
+    "block-tet": """כתוב את בלוק התכניות החלות (בלוק ט) של החלטת ועדת ערר.
+
+## כללים:
+- ציטוט ישיר מהוראות תכנית עם **הדגשה** של מילים מכריעות
+- מבנה הירכי: תכניות ארציות → מחוזיות → מקומיות
+- אין ניתוח מעמיק (→ בלוק י), אין הכרעה בין פרשנויות
+- מספור רציף
+- בלוק אופציונלי — כתוב רק אם יש מורכבות תכנונית
+
+## פרטי התיק:
+{case_context}
+
+## תכניות שזוהו:
+{plans_context}
+
+## חומרי מקור:
+{source_context}""",
+
+    "block-yod": """כתוב את בלוק הדיון וההכרעה (בלוק י) של החלטת ועדת ערר.
+
+## זהו הבלוק הקריטי ביותר — ליבת ההחלטה (ratio decidendi).
+
+## מתודולוגיה — CREAC:
+1. **C** (Conclusion) — פתח במסקנה: "לאחר שעיינו... מצאנו כי הערר [נדחה/מתקבל]"
+2. **R** (Rule) — הצג את הכלל המשפטי הרלוונטי
+3. **E** (Explanation) — צטט פסיקה שמסבירה את הכלל
+4. **A** (Application) — יישם על העובדות הספציפיות
+5. **C** (Conclusion) — מסקנת ביניים
+
+## כללים קריטיים:
+- **מסקנה בפתיחה** — לא בסוף
+- **מענה לכל טענה** שהוצגה בבלוק ז
+- **ללא כפילות** — הפנה לבלוקים קודמים: "כאמור בסעיף X לעיל"
+- **ללא כותרות משנה** (חריג: נושאים נפרדים לחלוטין)
+- ציטוט פסיקה בבלוקים ארוכים (200-600 מילים)
+- מספור רציף
+
+## כיוון מאושר (חובה):
+{direction_context}
+
+## מבנה לפי תוצאה:
+{structure_guidance}
+
+## טענות שצריך לענות עליהן:
+{claims_context}
+
+## חומרי מקור:
+{source_context}
+
+## פסיקה רלוונטית:
+{precedents_context}
+
+## סגנון דפנה:
+{style_context}""",
+
+    "block-yod-alef": """כתוב את בלוק הסיכום (בלוק יא, "סוף דבר") של החלטת ועדת ערר.
+
+## כללים:
+- כותרת: "סוף דבר" או "סיכום"
+- תוצאה ברורה: "הערר נדחה" / "הערר מתקבל" / "הערר מתקבל באופן חלקי"
+- הוראות אופרטיביות חד-משמעיות
+- אין חזרה על נימוקים — ההנמקה כבר בדיון
+- מספור רציף
+
+## מבנה לפי תוצאה:
+- דחייה: "הערר נדחה" + תתי-סעיפים + פסקה חמה (רישוי בלבד)
+- קבלה: "הערר מתקבל בכפוף ל..." + פרוזה
+- קבלה חלקית: "הערר מתקבל באופן חלקי" + 2-3 הוראות אופרטיביות
+
+## כיוון ותוצאה:
+{direction_context}
+
+## בלוקים קודמים (דיון):
+{discussion_context}""",
+}
+
+# Discussion structure by outcome
+STRUCTURE_GUIDANCE = {
+    "rejected": "דחייה — שכבות הגנה (concentric circles): טענה ראשית → נדחית, טענה חלופית → נדחית, חיזוק.",
+    "accepted": "קבלה — נימוק-נימוק: כל נימוק = CREAC מלא, בניית שכנוע הדרגתי.",
+    "partial": "קבלה חלקית — מיפוי מתחים: מה מתקבל ולמה, מה נדחה ולמה, איזון.",
+}
+
+
+async def write_block(
+    case_id: UUID,
+    block_id: str,
+    instructions: str = "",
+) -> dict:
+    """כתיבת בלוק יחיד בהחלטה.
+
+    Args:
+        case_id: מזהה התיק
+        block_id: מזהה הבלוק (block-alef, block-he, block-yod, ...)
+        instructions: הנחיות נוספות
+
+    Returns:
+        dict עם content, word_count, block_id, generation_type
+    """
+    if block_id not in BLOCK_CONFIG:
+        raise ValueError(f"Unknown block: {block_id}")
+
+    block_cfg = BLOCK_CONFIG[block_id]
+    case = await db.get_case(case_id)
+    if not case:
+        raise ValueError(f"Case {case_id} not found")
+
+    decision = await db.get_decision_by_case(case_id)
+
+    # Template blocks
+    if block_id in TEMPLATE_WRITERS:
+        content = TEMPLATE_WRITERS[block_id](case, decision)
+        return _build_result(block_id, content, block_cfg)
+
+    # AI-generated blocks
+    prompt_template = BLOCK_PROMPTS.get(block_id)
+    if not prompt_template:
+        raise ValueError(f"No prompt template for {block_id}")
+
+    # Build context components
+    case_context = _build_case_context(case, decision)
+    source_context = await _build_source_context(case_id, block_id)
+    claims_context = await _build_claims_context(case_id)
+    direction_context = _build_direction_context(decision)
+    plans_context = await _build_plans_context(case_id)
+    precedents_context = await _build_precedents_context(case_id, block_id)
+    style_context = await _build_style_context()
+    discussion_context = await _build_previous_blocks_context(case_id, decision)
+
+    outcome = (decision or {}).get("outcome", "rejected")
+    structure_guidance = STRUCTURE_GUIDANCE.get(outcome, "")
+
+    # Format prompt
+    prompt = prompt_template.format(
+        case_context=case_context,
+        source_context=source_context,
+        claims_context=claims_context,
+        direction_context=direction_context,
+        plans_context=plans_context,
+        precedents_context=precedents_context,
+        style_context=style_context,
+        discussion_context=discussion_context,
+        structure_guidance=structure_guidance,
+    )
+
+    if instructions:
+        prompt += f"\n\n## הנחיות נוספות:\n{instructions}"
+
+    # Block י requires approved direction
+    if block_id == "block-yod":
+        dir_doc = (decision or {}).get("direction_doc") or {}
+        if not dir_doc.get("approved"):
+            raise ValueError("לא ניתן לכתוב בלוק דיון ללא כיוון מאושר. הפעל brainstorm → approve_direction קודם.")
+
+    # Call Claude
+    model_key = block_cfg["model"]
+    model = MODEL_MAP.get(model_key, MODEL_MAP["sonnet"])
+    temperature = block_cfg["temp"]
+    max_tokens = block_cfg.get("max_tokens", 4096)
+
+    client = _get_anthropic()
+
+    # For opus blocks, use extended thinking
+    kwargs: dict = {
+        "model": model,
+        "max_tokens": max_tokens,
+        "messages": [{"role": "user", "content": prompt}],
+    }
+
+    if model_key == "opus" and temperature >= 0.3:
+        # Extended thinking for complex blocks
+        kwargs["temperature"] = 1  # Required for extended thinking
+        kwargs["thinking"] = {"type": "enabled", "budget_tokens": 16000}
+    else:
+        kwargs["temperature"] = temperature
+
+    message = client.messages.create(**kwargs)
+
+    # Extract text from response (skip thinking blocks)
+    content = ""
+    for block in message.content:
+        if block.type == "text":
+            content = block.text
+            break
+
+    return _build_result(block_id, content, block_cfg)
+
+
+def _build_result(block_id: str, content: str, block_cfg: dict) -> dict:
+    word_count = len(content.split())
+    return {
+        "block_id": block_id,
+        "block_index": block_cfg["index"],
+        "title": block_cfg["title"],
+        "content": content,
+        "word_count": word_count,
+        "generation_type": block_cfg["gen_type"],
+        "model_used": block_cfg["model"],
+        "temperature": block_cfg["temp"],
+    }
+
+
+# ── Context builders ──────────────────────────────────────────────
+
+def _build_case_context(case: dict, decision: dict | None) -> str:
+    outcome = (decision or {}).get("outcome", "")
+    outcome_heb = {"rejected": "דחייה", "accepted": "קבלה", "partial": "קבלה חלקית"}.get(outcome, "")
+    return f"""- מספר תיק: {case['case_number']}
+- כותרת: {case.get('title', '')}
+- עוררים: {', '.join(case.get('appellants', []))}
+- משיבים: {', '.join(case.get('respondents', []))}
+- נושא: {case.get('subject', '')}
+- כתובת: {case.get('property_address', '')}
+- סוג ערר: {case.get('appeal_type', '')}
+- תוצאה: {outcome_heb}"""
+
+
+async def _build_source_context(case_id: UUID, block_id: str, max_chars: int = 15000) -> str:
+    """Get relevant document excerpts for the block."""
+    docs = await db.list_documents(case_id)
+    context_parts = []
+    total = 0
+    for doc in docs:
+        if total >= max_chars:
+            break
+        text = await db.get_document_text(UUID(doc["id"]))
+        if text:
+            excerpt = text[:3000]
+            context_parts.append(f"--- {doc['title']} ({doc['doc_type']}) ---\n{excerpt}")
+            total += len(excerpt)
+    return "\n\n".join(context_parts) if context_parts else "(אין מסמכים)"
+
+
+async def _build_claims_context(case_id: UUID) -> str:
+    claims = await db.get_claims(case_id)
+    if not claims:
+        return "(לא חולצו טענות)"
+    lines = []
+    current_role = ""
+    role_heb = {"appellant": "טענות העוררים", "respondent": "טענות המשיבים",
+                "committee": "עמדת הוועדה המקומית", "permit_applicant": "עמדת מבקשי ההיתר"}
+    for c in claims:
+        if c["party_role"] != current_role:
+            current_role = c["party_role"]
+            lines.append(f"\n### {role_heb.get(current_role, current_role)}")
+        lines.append(f"- {c['claim_text'][:300]}")
+    return "\n".join(lines)
+
+
+def _build_direction_context(decision: dict | None) -> str:
+    if not decision:
+        return "(לא הוגדר כיוון)"
+    dir_doc = decision.get("direction_doc") or {}
+    if not dir_doc.get("approved"):
+        return "(כיוון לא אושר)"
+
+    parts = []
+    outcome_heb = dir_doc.get("outcome_hebrew", "")
+    if outcome_heb:
+        parts.append(f"תוצאה: {outcome_heb}")
+
+    reasoning = dir_doc.get("reasoning", "")
+    if reasoning:
+        parts.append(f"נימוק: {reasoning}")
+
+    direction = dir_doc.get("selected_direction")
+    if direction:
+        parts.append(f"כיוון נבחר: {direction.get('name', '')}")
+        for r in direction.get("reasoning", []):
+            parts.append(f"  - {r}")
+        for p in direction.get("precedents", []):
+            parts.append(f"  פסיקה: {p}")
+
+    notes = dir_doc.get("additional_notes", "")
+    if notes:
+        parts.append(f"הערות: {notes}")
+
+    return "\n".join(parts) if parts else "(אין מסמך כיוון)"
+
+
+async def _build_plans_context(case_id: UUID) -> str:
+    """Get plan references from document metadata."""
+    docs = await db.list_documents(case_id)
+    plans = set()
+    for doc in docs:
+        metadata = doc.get("metadata") or {}
+        if isinstance(metadata, str):
+            metadata = json.loads(metadata)
+        refs = metadata.get("references", {})
+        for p in refs.get("plans", []):
+            plans.add(p.get("plan_name", ""))
+    if plans:
+        return "\n".join(f"- {p}" for p in sorted(plans) if p)
+    return "(לא זוהו תכניות)"
+
+
+async def _build_precedents_context(case_id: UUID, block_id: str) -> str:
+    """Search for similar precedent paragraphs."""
+    try:
+        case = await db.get_case(case_id)
+        subject = case.get("subject", "") if case else ""
+        query = f"דיון משפטי בנושא {subject}" if subject else "דיון משפטי ועדת ערר"
+        query_emb = await embeddings.embed_query(query)
+        results = await db.search_similar(query_embedding=query_emb, limit=5)
+        # Filter out same case
+        results = [r for r in results if str(r.get("case_id")) != str(case_id)]
+        if results:
+            parts = []
+            for r in results[:3]:
+                parts.append(f"[{r.get('case_number', '?')}, {r.get('section_type', '')}] {r['content'][:400]}")
+            return "\n\n".join(parts)
+    except Exception as e:
+        logger.warning("Failed to fetch precedents: %s", e)
+    return "(אין תקדימים)"
+
+
+async def _build_style_context() -> str:
+    patterns = await db.get_style_patterns()
+    if not patterns:
+        return "(אין דפוסי סגנון)"
+    lines = []
+    for p in patterns[:10]:
+        lines.append(f"- [{p['pattern_type']}] {p['pattern_text']}")
+    return "\n".join(lines)
+
+
+async def _build_previous_blocks_context(case_id: UUID, decision: dict | None) -> str:
+    """Get content of previously written blocks."""
+    if not decision:
+        return "(אין בלוקים קודמים)"
+    pool = await db.get_pool()
+    async with pool.acquire() as conn:
+        rows = await conn.fetch(
+            """SELECT block_id, title, content, word_count
+               FROM decision_blocks
+               WHERE decision_id = $1 AND word_count > 0
+               ORDER BY block_index""",
+            UUID(decision["id"]),
+        )
+    if not rows:
+        return "(אין בלוקים קודמים)"
+    parts = []
+    for r in rows:
+        content = r["content"][:2000]
+        parts.append(f"### {r['title']} ({r['block_id']})\n{content}")
+    return "\n\n".join(parts)
+
+
+# ── Store block ───────────────────────────────────────────────────
+
+async def store_block(decision_id: UUID, block_result: dict) -> None:
+    """שמירת בלוק ב-DB (upsert)."""
+    pool = await db.get_pool()
+    async with pool.acquire() as conn:
+        await conn.execute(
+            """INSERT INTO decision_blocks
+               (decision_id, block_id, block_index, title, content, word_count,
+                generation_type, model_used, temperature, status)
+               VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, 'draft')
+               ON CONFLICT (decision_id, block_id) DO UPDATE SET
+                content = EXCLUDED.content,
+                word_count = EXCLUDED.word_count,
+                generation_type = EXCLUDED.generation_type,
+                model_used = EXCLUDED.model_used,
+                temperature = EXCLUDED.temperature,
+                status = 'draft',
+                updated_at = now()""",
+            decision_id,
+            block_result["block_id"],
+            block_result["block_index"],
+            block_result["title"],
+            block_result["content"],
+            block_result["word_count"],
+            block_result["generation_type"],
+            block_result["model_used"],
+            block_result["temperature"],
+        )
+
+
+async def write_and_store_block(
+    case_id: UUID,
+    block_id: str,
+    instructions: str = "",
+) -> dict:
+    """כתיבת בלוק ושמירה ב-DB."""
+    decision = await db.get_decision_by_case(case_id)
+    if not decision:
+        # Create decision if not exists
+        decision = await db.create_decision(case_id=case_id)
+
+    result = await write_block(case_id, block_id, instructions)
+    await store_block(UUID(decision["id"]), result)
+    return result