Add outcome-aware drafting, lessons system, and improved style analysis

- Add expected_outcome field to cases (rejection/partial/full/betterment_levy) - New lessons.py module with golden ratios, templates, and drafting guidance per outcome type - Style analyzer now uses Opus with full decision text (no truncation), with multi-pass fallback for large corpora - Drafting tool provides outcome-specific templates, section guidance, and ratio comments - Improved JSON extraction with bracket-matching fallback Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-24 18:58:42 +00:00
parent 6f515dc2cb
commit 39089dcef5
6 changed files with 726 additions and 71 deletions
--- a/mcp-server/src/legal_mcp/services/db.py
+++ b/mcp-server/src/legal_mcp/services/db.py
@@ -129,10 +129,16 @@ CREATE INDEX IF NOT EXISTS idx_cases_number ON cases(case_number);
 """


+MIGRATIONS_SQL = """
+ALTER TABLE cases ADD COLUMN IF NOT EXISTS expected_outcome TEXT DEFAULT '';
+"""
+
+
 async def init_schema() -> None:
    pool = await get_pool()
    async with pool.acquire() as conn:
        await conn.execute(SCHEMA_SQL)
+        await conn.execute(MIGRATIONS_SQL)
    logger.info("Database schema initialized")


@@ -149,6 +155,7 @@ async def create_case(
    committee_type: str = "ועדה מקומית",
    hearing_date: date | None = None,
    notes: str = "",
+    expected_outcome: str = "",
 ) -> dict:
    pool = await get_pool()
    case_id = uuid4()
@@ -156,13 +163,13 @@ async def create_case(
        await conn.execute(
            """INSERT INTO cases (id, case_number, title, appellants, respondents,
               subject, property_address, permit_number, committee_type,
-               hearing_date, notes)
-               VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)""",
+               hearing_date, notes, expected_outcome)
+               VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)""",
            case_id, case_number, title,
            json.dumps(appellants or []),
            json.dumps(respondents or []),
            subject, property_address, permit_number, committee_type,
-            hearing_date, notes,
+            hearing_date, notes, expected_outcome,
        )
    return await get_case(case_id)

@@ -438,3 +445,10 @@ async def upsert_style_pattern(
                pattern_type, pattern_text, context,
                json.dumps(examples or []),
            )
+
+
+async def clear_style_patterns() -> None:
+    """Delete all existing style patterns (used before re-analysis)."""
+    pool = await get_pool()
+    async with pool.acquire() as conn:
+        await conn.execute("DELETE FROM style_patterns")
--- a/mcp-server/src/legal_mcp/services/lessons.py
+++ b/mcp-server/src/legal_mcp/services/lessons.py
@@ -0,0 +1,331 @@
+"""Lessons learned from comparing AI drafts to Dafna Tamir's final decisions.
+
+Source: /data/uploads/לקחים-לעדכון-שרת-כתיבת-החלטות.md
+Based on analysis of: Hecht 1180-1181 (rejection) and Beit HaKerem 1126/25+1141/25 (partial acceptance).
+"""
+
+from __future__ import annotations
+
+# ── Valid outcome values ────────────────────────────────────────────
+
+VALID_OUTCOMES = ("rejection", "partial_acceptance", "full_acceptance", "betterment_levy")
+
+# ── Golden Ratios (section % of total) ─────────────────────────────
+
+GOLDEN_RATIOS: dict[str, dict[str, tuple[int, int]]] = {
+    "rejection":          {"background": (15, 25), "claims": (30, 40), "discussion": (37, 50), "summary": (2, 9)},
+    "full_acceptance":    {"background": (30, 40), "claims": (20, 30), "discussion": (35, 45), "summary": (3, 5)},
+    "partial_acceptance": {"background": (25, 35), "claims": (25, 30), "discussion": (40, 47), "summary": (2, 3)},
+    "betterment_levy":    {"background": (6, 18),  "claims": (13, 25), "discussion": (32, 48), "summary": (3, 4)},
+}
+
+# ── Paragraph length guidance (word counts) ────────────────────────
+
+PARAGRAPH_LENGTHS = {
+    "claims": (40, 60),
+    "discussion_regular": (40, 80),
+    "discussion_with_citation": (200, 600),
+    "discussion_average": (80, 120),
+}
+
+# ── Transition phrases ─────────────────────────────────────────────
+
+TRANSITION_PHRASES = [
+    # From Hecht (rejection)
+    {"phrase": "ועל מנת לא לצאת בחסר", "context": "פתיחת obiter dicta", "outcome": None},
+    {"phrase": "נציין כי טענות אלו נטענו בלשון רפה", "context": "הכרה בטענות חלשות", "outcome": None},
+    {"phrase": "עינינו הרואות", "context": "סיכום אחרי ציטוט ארוך", "outcome": None},
+    {"phrase": "נוסיף.", "context": "מעבר חד (מילה אחת)", "outcome": None},
+    {"phrase": "אם כך, לעת הזו", "context": "מסקנה מציטוטים", "outcome": None},
+    {"phrase": "למיטב הבנתנו", "context": "עמדה זהירה", "outcome": None},
+    {"phrase": "נשלים ונציין", "context": "נקודה אחרונה לפני סיכום", "outcome": None},
+    # From Beit HaKerem (partial acceptance)
+    {"phrase": "הדברים משליכים על שיקול הדעת ב...", "context": "קישור ממצא למסקנה", "outcome": "partial_acceptance"},
+    {"phrase": "רוצה לומר כי", "context": "הסבר חלופי", "outcome": None},
+    {"phrase": "נוצר מצב בו", "context": "הצגת בעיה", "outcome": None},
+    {"phrase": "לכך נוסיף כי", "context": "הוספת נדבך", "outcome": None},
+    {"phrase": "יש אולי להצר על כך ש...", "context": "ביקורת עדינה", "outcome": None},
+    {"phrase": "עם ההבנה לטענה זו של העוררים, אין בידנו לקבלה", "context": "acknowledge-reject מרוכך", "outcome": None},
+]
+
+# ── Opening strategies by outcome ──────────────────────────────────
+
+OPENING_STRATEGIES = {
+    "rejection": {
+        "style": "broad_contextual",
+        "paragraphs": (5, 8),
+        "description": "פתיחה רחבה — הקשר תכנוני כללי, רקע לפני צלילה לטענות",
+    },
+    "full_acceptance": {
+        "style": "direct_conclusion",
+        "paragraphs": (1, 2),
+        "description": "פתיחה ישירה — ישר למסקנה, תמציתית",
+    },
+    "partial_acceptance": {
+        "style": "tension_mapping",
+        "paragraphs": (3, 6),
+        "description": (
+            "מיפוי מתחים — 1-2 פסקאות על ערך התכנון, "
+            "אחר כך 'בערר דנן עולות שאלות כיצד והאם...' "
+            "עם רשימת 4-6 נקודות מתח בבולטים, "
+            "ואז 'כל הנקודות לעיל עומדות לפנינו...' → מעבר לניתוח"
+        ),
+    },
+    "betterment_levy": {
+        "style": "direct_with_disclaimer",
+        "paragraphs": (1, 3),
+        "description": "פתיחה ישירה עם מסקנה + 'על מנת לא לצאת בחסר'",
+    },
+}
+
+# ── Summary strategies by outcome ──────────────────────────────────
+
+SUMMARY_STRATEGIES = {
+    "rejection": {
+        "heading": "סיכום",
+        "format": "numbered_hebrew_with_warm_closing",
+        "description": "אותיות עבריות (א-ו) עם פירוט נימוקים + פסקת סיום חמה",
+    },
+    "full_acceptance": {
+        "heading": "סוף דבר",
+        "format": "prose_paragraphs",
+        "description": "פרוזה (3-5 פסקאות), ללא פסקה חמה",
+    },
+    "partial_acceptance": {
+        "heading": "סוף דבר",
+        "format": "ultra_minimal_operative",
+        "description": (
+            "אולטרה-מינימלי: 2-3 הוראות אופרטיביות בלבד. "
+            "אפס חזרה על נימוקים. אפס הוצאות. אפס סיום חם. "
+            "כל ההנמקה כבר בדיון — הסיכום = רק מה מתקבל, מה נדחה, ותנאים"
+        ),
+    },
+    "betterment_levy": {
+        "heading": "סיכום",
+        "format": "numbered_hebrew_dry",
+        "description": "אותיות עבריות, סיום יבש ללא פסקה חמה",
+    },
+}
+
+# ── Discussion structure rules ─────────────────────────────────────
+
+DISCUSSION_RULES: dict[str, list[str]] = {
+    "universal": [
+        "פרק הדיון = אסה רציפה. אין כותרות משנה (H2/H3). מעברים רק עם ביטויי מעבר טקסטואליים.",
+        "חריג יחיד לכותרות משנה: נושאים נפרדים לחלוטין (למשל: הקלה בגובה + התייחסות לטענות נוספות).",
+        "טווח אורך סעיפים: 20 עד 600+ מילים. סעיף עם ציטוט מקיף = בלוק אחד שלם, לא שבירה לסעיפים קצרים.",
+    ],
+    "rejection": [
+        "מבנה עיגולים קונצנטריים: שכבות הגנה — סף (ס' 152) → מריט → obiter dicta.",
+        "שאלת הסף (ס' 152) = כלי אסטרטגי, לא חובה. משתמשים בה כשהיא חזקה.",
+    ],
+    "partial_acceptance": [
+        "מבנה: מיפוי מתחים → ניתוח נושא-נושא → הוראות אופרטיביות.",
+        "שאלת הסף (ס' 152) — בדרך כלל מדלגים. כשיש שאלות מהותיות חזקות (חניה, שימור, קווי בניין), דפנה מעדיפה דיון בגוף העניין.",
+        "דפוס 'בית בודד': כשתמ\"א 38 חלה על בית בודד, אינטרס החיזוק מוחלש → שיקול דעת זהיר יותר.",
+        "דפוס 'תכנית אב כמגן': כשקיימת תכנית אב → לצטט אותה → ההיתר 'משתלב עם ראיה כללית'.",
+    ],
+    "full_acceptance": [
+        "מבנה ישיר: נקודות עיקריות → ניתוח → מסקנה.",
+    ],
+    "betterment_levy": [
+        "מבנה ישיר עם מסקנה מוקדמת + 'על מנת לא לצאת בחסר' לנקודות נוספות.",
+    ],
+}
+
+# ── Citation technique ─────────────────────────────────────────────
+
+CITATION_GUIDANCE = (
+    "העדפה לציטוט דרך 'החלטה מרכזת' — החלטה אחת שכבר ריכזה את הפסיקה הרלוונטית. "
+    "דפוס: 'נפנה לניתוח המקיף שערכה ועדת הערר במסגרת ערר [שם]...' → בלוק ציטוט 200-500 מילים → 'אם כך, לעת הזו...'. "
+    "גמישות: כשיש שאלות משפטיות מרובות או חדשניות, כל נושא עשוי לדרוש תקדים נפרד. "
+    "בנושאי חניה/תשתיות — צלילה לעומק: ציטוט ישיר של הוראות תכנית (400+ מילים עם ניתוח שזור)."
+)
+
+# ── Decision templates by outcome ──────────────────────────────────
+
+_HEADER = """# החלטה
+
+## בפני: דפנה תמיר, יו"ר ועדת הערר מחוז ירושלים
+
+**ערר מספר:** {case_number}
+**נושא:** {subject}
+**העוררים:** {appellants}
+**המשיבים:** {respondents}
+**כתובת הנכס:** {property_address}
+
+---
+"""
+
+DECISION_TEMPLATES: dict[str, str] = {
+    "rejection": _HEADER + """## א. רקע עובדתי
+<!-- {ratios_background} -->
+
+[תיאור הרקע העובדתי של הערר]
+
+## ב. טענות העוררים
+<!-- {ratios_claims} -->
+
+[סיכום טענות העוררים]
+
+## ג. טענות המשיבים
+
+[סיכום טענות המשיבים]
+
+## ד. דיון
+<!-- אסה רציפה, ללא כותרות משנה. מבנה עיגולים קונצנטריים: סף → מריט → obiter -->
+<!-- פתיחה רחבה: 5-8 פסקאות הקשר תכנוני -->
+<!-- {ratios_discussion} -->
+
+[ניתוח משפטי — אסה רציפה]
+
+## ה. סיכום
+<!-- אותיות עבריות (א-ו) + פסקת סיום חמה -->
+<!-- {ratios_summary} -->
+
+[סיכום בפורמט רשימה ממוספרת + סיום חם]
+
+---
+ניתנה היום, {date}
+דפנה תמיר, יו"ר ועדת הערר
+""",
+
+    "partial_acceptance": _HEADER + """## א. רקע עובדתי
+<!-- {ratios_background} -->
+
+[תיאור הרקע העובדתי של הערר]
+
+## ב. טענות העוררים
+<!-- {ratios_claims} -->
+
+[סיכום טענות העוררים]
+
+## ג. טענות המשיבים
+
+[סיכום טענות המשיבים]
+
+## ד. דיון
+<!-- אסה רציפה, ללא כותרות משנה. מבנה: מיפוי מתחים → ניתוח נושא-נושא -->
+<!-- פתיחת מיפוי מתחים: 1-2 פסקאות על ערך התכנון, רשימת מתחים, מעבר לניתוח -->
+<!-- {ratios_discussion} -->
+
+[ניתוח משפטי — אסה רציפה]
+
+## ה. סוף דבר
+<!-- אולטרה-מינימלי: 2-3 הוראות אופרטיביות בלבד. אפס נימוקים. אפס הוצאות. -->
+<!-- {ratios_summary} -->
+
+[הוראות אופרטיביות בלבד: מה מתקבל, מה נדחה, תנאים]
+
+---
+ניתנה היום, {date}
+דפנה תמיר, יו"ר ועדת הערר
+""",
+
+    "full_acceptance": _HEADER + """## א. רקע עובדתי
+<!-- {ratios_background} -->
+
+[תיאור הרקע העובדתי של הערר]
+
+## ב. טענות העוררים
+<!-- {ratios_claims} -->
+
+[סיכום טענות העוררים]
+
+## ג. טענות המשיבים
+
+[סיכום טענות המשיבים]
+
+## ד. דיון
+<!-- אסה רציפה, ללא כותרות משנה. מבנה ישיר -->
+<!-- פתיחה ישירה: 1-2 פסקאות, ישר למסקנה -->
+<!-- {ratios_discussion} -->
+
+[ניתוח משפטי — אסה רציפה]
+
+## ה. סוף דבר
+<!-- פרוזה: 3-5 פסקאות, ללא פסקה חמה -->
+<!-- {ratios_summary} -->
+
+[סוף דבר בפרוזה]
+
+---
+ניתנה היום, {date}
+דפנה תמיר, יו"ר ועדת הערר
+""",
+
+    "betterment_levy": _HEADER + """## א. רקע עובדתי
+<!-- {ratios_background} -->
+
+[תיאור הרקע העובדתי של הערר]
+
+## ב. טענות העוררים
+<!-- {ratios_claims} -->
+
+[סיכום טענות העוררים]
+
+## ג. טענות המשיבים
+
+[סיכום טענות המשיבים]
+
+## ד. דיון
+<!-- אסה רציפה, ללא כותרות משנה. מבנה ישיר עם מסקנה מוקדמת -->
+<!-- {ratios_discussion} -->
+
+[ניתוח משפטי — אסה רציפה]
+
+## ה. סיכום
+<!-- אותיות עבריות, סיום יבש -->
+<!-- {ratios_summary} -->
+
+[סיכום בפורמט רשימה ממוספרת, סיום יבש]
+
+---
+ניתנה היום, {date}
+דפנה תמיר, יו"ר ועדת הערר
+""",
+}
+
+
+# ── Helper function ────────────────────────────────────────────────
+
+def get_lessons_for_outcome(outcome: str) -> dict:
+    """Assemble all relevant lessons for a given expected outcome."""
+    if outcome not in VALID_OUTCOMES:
+        return {"error": f"outcome must be one of: {', '.join(VALID_OUTCOMES)}"}
+
+    ratios = GOLDEN_RATIOS[outcome]
+    rules = DISCUSSION_RULES.get("universal", []) + DISCUSSION_RULES.get(outcome, [])
+
+    # Filter transition phrases: universal + outcome-specific
+    phrases = [
+        p for p in TRANSITION_PHRASES
+        if p["outcome"] is None or p["outcome"] == outcome
+    ]
+
+    return {
+        "outcome": outcome,
+        "golden_ratios": {
+            k: f"{v[0]}-{v[1]}%" for k, v in ratios.items()
+        },
+        "opening_strategy": OPENING_STRATEGIES[outcome],
+        "summary_strategy": SUMMARY_STRATEGIES[outcome],
+        "discussion_rules": rules,
+        "citation_guidance": CITATION_GUIDANCE,
+        "transition_phrases": [
+            {"phrase": p["phrase"], "context": p["context"]}
+            for p in phrases
+        ],
+        "paragraph_lengths": {
+            k: f"{v[0]}-{v[1]} מילים" for k, v in PARAGRAPH_LENGTHS.items()
+        },
+    }
+
+
+def format_ratios_comment(outcome: str, section: str) -> str:
+    """Format golden ratio as an HTML comment for templates."""
+    ratios = GOLDEN_RATIOS.get(outcome, {})
+    if section in ratios:
+        lo, hi = ratios[section]
+        return f"יעד: {lo}-{hi}% מסך ההחלטה"
+    return ""
--- a/mcp-server/src/legal_mcp/services/style_analyzer.py
+++ b/mcp-server/src/legal_mcp/services/style_analyzer.py
@@ -2,6 +2,7 @@

 from __future__ import annotations

+import json
 import logging
 import re

@@ -12,24 +13,31 @@ from legal_mcp.services import db

 logger = logging.getLogger(__name__)

+# Token budget for Opus 1M context
+MAX_INPUT_TOKENS = 900_000
+CHARS_PER_TOKEN = 4  # Hebrew text ratio

 ANALYSIS_PROMPT = """\
-אתה מנתח סגנון כתיבה משפטית. לפניך החלטות משפטיות שנכתבו על ידי אותה יושבת ראש של ועדת ערר.
+אתה מנתח סגנון כתיבה משפטית. לפניך החלטות משפטיות מלאות שנכתבו על ידי אותה יושבת ראש של ועדת ערר.

-נתח את ההחלטות וחלץ את דפוסי הכתיבה הבאים:
+נתח את ההחלטות לעומק וחלץ את דפוסי הכתיבה הבאים:

-1. **נוסחאות פתיחה** (opening_formula) - איך מתחילות ההחלטות
+1. **נוסחאות פתיחה** (opening_formula) - איך מתחילות ההחלטות, מה המבנה של הפסקה הראשונה
 2. **ביטויי מעבר** (transition) - ביטויים שמחברים בין חלקי ההחלטה
-3. **סגנון ציטוט** (citation_style) - איך מצטטים חקיקה ופסיקה
-4. **מבנה ניתוח** (analysis_structure) - איך בנוי הניתוח המשפטי
-5. **נוסחאות סיום** (closing_formula) - איך מסתיימות ההחלטות
-6. **ביטויים אופייניים** (characteristic_phrase) - ביטויים ייחודיים שחוזרים
+3. **סגנון ציטוט** (citation_style) - איך מצטטים חקיקה, פסיקה, פרוטוקולים ומסמכים
+4. **מבנה ניתוח** (analysis_structure) - איך בנוי הניתוח המשפטי, סדר הדיון בטענות
+5. **נוסחאות סיום** (closing_formula) - איך מסתיימות ההחלטות, כולל הוצאות ותאריך
+6. **ביטויים אופייניים** (characteristic_phrase) - ביטויים ייחודיים שחוזרים על פני ההחלטות
+7. **זרימת טיעון** (argument_flow) - איך נבנה טיעון משפטי לאורך ההחלטה, מהצגת הבעיה דרך ניתוח ועד הכרעה
+8. **התייחסות לראיות** (evidence_handling) - איך מתייחסת לראיות, מסמכים, חוות דעת ועדויות

 לכל דפוס, תן:
 - הטקסט המדויק של הדפוס
 - הקשר (באיזה חלק של ההחלטה הוא מופיע)
 - דוגמה מתוך הטקסט

+חשוב: אתה רואה את ההחלטות המלאות. נצל את זה כדי לזהות דפוסים מכל חלקי ההחלטה - כולל אמצע הניתוח המשפטי, לא רק פתיחה וסיום.
+
 החזר את התוצאות בפורמט הבא (JSON array):
 ```json
 [
@@ -46,6 +54,62 @@ ANALYSIS_PROMPT = """\
 {decisions}
 """

+SINGLE_DECISION_PROMPT = """\
+אתה מנתח סגנון כתיבה משפטית. לפניך החלטה משפטית מלאה שנכתבה על ידי יושבת ראש של ועדת ערר.
+
+חלץ את כל דפוסי הכתיבה מההחלטה הזו, כולל:
+1. נוסחאות פתיחה (opening_formula)
+2. ביטויי מעבר (transition)
+3. סגנון ציטוט (citation_style)
+4. מבנה ניתוח (analysis_structure)
+5. נוסחאות סיום (closing_formula)
+6. ביטויים אופייניים (characteristic_phrase)
+7. זרימת טיעון (argument_flow)
+8. התייחסות לראיות (evidence_handling)
+
+לכל דפוס, תן: הטקסט המדויק, הקשר, ודוגמה מתוך הטקסט.
+
+החזר JSON array בפורמט:
+```json
+[
+  {{
+    "type": "opening_formula",
+    "text": "...",
+    "context": "...",
+    "example": "..."
+  }}
+]
+```
+
+ההחלטה:
+{decision}
+"""
+
+SYNTHESIS_PROMPT = """\
+לפניך דפוסי כתיבה שחולצו מ-{num_decisions} החלטות משפטיות של אותה יושבת ראש ועדת ערר.
+
+משימתך:
+1. איחוד דפוסים כפולים או דומים
+2. זיהוי דפוסים שחוזרים על פני מספר החלטות (ציין תדירות)
+3. הבחנה בין דפוסים אופייניים באמת לבין ניסוחים חד-פעמיים
+4. שמירה על המבנה: type, text, context, example
+
+החזר JSON array מאוחד של הדפוסים המשמעותיים ביותר:
+```json
+[
+  {{
+    "type": "opening_formula",
+    "text": "...",
+    "context": "...",
+    "example": "..."
+  }}
+]
+```
+
+הדפוסים שחולצו:
+{patterns}
+"""
+

 async def analyze_corpus() -> dict:
    """Analyze the style corpus and extract/update patterns.
@@ -61,20 +125,34 @@ async def analyze_corpus() -> dict:
    if not rows:
        return {"error": "אין החלטות בקורפוס. העלה החלטות קודמות תחילה."}

-    # Prepare text for analysis
+    # Clear old patterns before re-analysis
+    await db.clear_style_patterns()
+
+    # Calculate token budget
+    total_chars = sum(len(row["full_text"]) for row in rows)
+    estimated_tokens = total_chars // CHARS_PER_TOKEN
+
+    logger.info(
+        "Style analysis: %d decisions, %d chars, ~%d tokens",
+        len(rows), total_chars, estimated_tokens,
+    )
+
+    if estimated_tokens < MAX_INPUT_TOKENS:
+        return await _analyze_single_pass(rows)
+    else:
+        return await _analyze_multi_pass(rows)
+
+
+async def _analyze_single_pass(rows) -> dict:
+    """Send all decisions in a single API call."""
    decisions_text = ""
    for row in rows:
        decisions_text += f"\n\n--- החלטה {row['decision_number'] or 'ללא מספר'} ---\n"
-        # Limit each decision to ~3000 chars to fit context
-        text = row["full_text"]
-        if len(text) > 3000:
-            text = text[:1500] + "\n...\n" + text[-1500:]
-        decisions_text += text
+        decisions_text += row["full_text"]

-    # Call Claude to analyze patterns
    client = anthropic.Anthropic(api_key=config.ANTHROPIC_API_KEY)
    message = client.messages.create(
-        model="claude-sonnet-4-6",
+        model="claude-opus-4-6",
        max_tokens=16384,
        messages=[
            {
@@ -84,24 +162,109 @@ async def analyze_corpus() -> dict:
        ],
    )

-    response_text = message.content[0].text
+    return await _parse_and_store_patterns(message.content[0].text, len(rows))

-    # Extract JSON from response - prefer code-block fenced JSON
-    import json
-    code_block = re.search(r"```(?:json)?\s*(\[[\s\S]*?\])\s*```", response_text)
+
+async def _analyze_multi_pass(rows) -> dict:
+    """Analyze each decision individually, then synthesize patterns."""
+    client = anthropic.Anthropic(api_key=config.ANTHROPIC_API_KEY)
+    all_patterns = []
+
+    # Pass 1: Analyze each decision individually
+    for row in rows:
+        decision_text = f"--- החלטה {row['decision_number'] or 'ללא מספר'} ---\n"
+        decision_text += row["full_text"]
+
+        message = client.messages.create(
+            model="claude-opus-4-6",
+            max_tokens=8192,
+            messages=[
+                {
+                    "role": "user",
+                    "content": SINGLE_DECISION_PROMPT.format(decision=decision_text),
+                }
+            ],
+        )
+
+        patterns = _extract_json(message.content[0].text)
+        if patterns:
+            all_patterns.extend(patterns)
+
+    if not all_patterns:
+        return {"error": "לא הצלחתי לחלץ דפוסים מההחלטות"}
+
+    # Pass 2: Synthesize across all decisions
+    message = client.messages.create(
+        model="claude-opus-4-6",
+        max_tokens=16384,
+        messages=[
+            {
+                "role": "user",
+                "content": SYNTHESIS_PROMPT.format(
+                    num_decisions=len(rows),
+                    patterns=json.dumps(all_patterns, ensure_ascii=False, indent=2),
+                ),
+            }
+        ],
+    )
+
+    return await _parse_and_store_patterns(message.content[0].text, len(rows))
+
+
+def _extract_json(response_text: str) -> list | None:
+    """Extract JSON array from Claude's response text."""
+    # Strategy 1: Extract content between code fences, then parse
+    code_block = re.search(r"```(?:json)?\s*([\s\S]*?)```", response_text)
    if code_block:
-        json_str = code_block.group(1)
-    else:
-        # Fallback: find the last JSON array (skip prose brackets)
-        all_arrays = list(re.finditer(r"\[[\s\S]*?\]", response_text))
-        if not all_arrays:
-            return {"error": "Could not parse analysis results", "raw": response_text}
-        json_str = all_arrays[-1].group()
+        block_content = code_block.group(1).strip()
+        try:
+            result = json.loads(block_content)
+            if isinstance(result, list):
+                return result
+        except json.JSONDecodeError:
+            pass

-    try:
-        patterns = json.loads(json_str)
-    except json.JSONDecodeError as e:
-        return {"error": f"JSON parse error: {e}", "raw": response_text}
+    # Strategy 2: Find the outermost JSON array using bracket matching
+    start = response_text.find("[")
+    if start == -1:
+        return None
+
+    depth = 0
+    in_string = False
+    escape_next = False
+    for i in range(start, len(response_text)):
+        c = response_text[i]
+        if escape_next:
+            escape_next = False
+            continue
+        if c == "\\":
+            escape_next = True
+            continue
+        if c == '"':
+            in_string = not in_string
+            continue
+        if in_string:
+            continue
+        if c == "[":
+            depth += 1
+        elif c == "]":
+            depth -= 1
+            if depth == 0:
+                try:
+                    return json.loads(response_text[start:i + 1])
+                except json.JSONDecodeError as e:
+                    logger.warning("JSON parse error: %s", e)
+                    return None
+
+    return None
+
+
+async def _parse_and_store_patterns(response_text: str, num_decisions: int) -> dict:
+    """Parse Claude's response and store patterns in the database."""
+    patterns = _extract_json(response_text)
+
+    if patterns is None:
+        return {"error": "Could not parse analysis results", "raw": response_text}

    # Store patterns
    count = 0
@@ -116,6 +279,6 @@ async def analyze_corpus() -> dict:

    return {
        "patterns_found": count,
-        "decisions_analyzed": len(rows),
+        "decisions_analyzed": num_decisions,
        "pattern_types": list({p.get("type") for p in patterns}),
    }