From 0d995483ce7d19fdf834e1e49dd7cface38d4330 Mon Sep 17 00:00:00 2001
From: Chaim <chaim@marcus-law.co.il>
Date: Sat, 6 Jun 2026 17:20:57 +0000
Subject: [PATCH] =?UTF-8?q?feat(style-acq=20T4+T5):=20=D7=A4=D7=A0=D7=A7?=
 =?UTF-8?q?=D7=A1-=D7=94=D7=AA=D7=90=D7=9E=D7=94=20draft=E2=86=94final=20+?=
 =?UTF-8?q?=20=D7=93=D7=99=D7=A1=D7=98=D7=99=D7=9C=D7=A6=D7=99=D7=94=20?=
 =?UTF-8?q?=D7=90=D7=95=D7=98=D7=95=D7=9E=D7=98=D7=99=D7=AA=20=D7=93=D7=A8?=
 =?UTF-8?q?=D7=9A=20=D7=94-curator?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

סוגר את לולאת-הלמידה (INV-LRN4): כל החלטה נסגרת מול הסופי, וכל סופי
מנותח מול הטיוטה. מזין את הטבלאות ש-T15 כבר קורא מהן.

T5 — פנקס-התאמה:
- SCHEMA_V26: טבלת draft_final_pairs (snapshot draft + final + diff + analysis + status).
- db: create/update/list_draft_final_pairs.
- mark-final (app.py): תופס snapshot של הטיוטה (decision_blocks) ברגע החתימה,
  לפני שאפשר לדרוס אותו, ופותח שורת-פנקס (status=final_received).

T4 — דיסטילציה אוטומטית:
- learning_loop.process_final_version: משתמש ב-snapshot (לא בבלוקים שאולי השתנו),
  מסווג style_method↔substance, שומר הצעה ב-pair (status=analyzed).
  **הוסר ה-auto-upsert של style_patterns** — ביטל את ה-bug שדרס את שער-היו"ר
  וזיהם סגנון במהות (INV-LRN1 + INV-LRN5).
- LESSONS_PROMPT: הפרדת style_method↔substance מפורשת + לקח מופשט בלבד.
- curator wake + hermes-curator.md: מריץ ingest_final_version ראשון; מציע רק
  style_method שלא תועד; substance→מסלול precedent.

INV-LRN1 (שער-יו"ר, אין auto-commit) · INV-LRN4 (ניגוד-אמת) · INV-LRN5 (טוהר).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .claude/agents/hermes-curator.md              |  6 +-
 mcp-server/src/legal_mcp/services/db.py       | 90 ++++++++++++++++++-
 .../src/legal_mcp/services/learning_loop.py   | 87 ++++++++++--------
 web/app.py                                    | 21 +++++
 web/paperclip_client.py                       | 13 ++-
 5 files changed, 174 insertions(+), 43 deletions(-)

diff --git a/.claude/agents/hermes-curator.md b/.claude/agents/hermes-curator.md
index a1f9efa..9936874 100644
--- a/.claude/agents/hermes-curator.md
+++ b/.claude/agents/hermes-curator.md
@@ -62,7 +62,11 @@ profiles:
 ## מה אני עושה בכל wake
 
 1. קורא את ה-issue body שב-`{{taskBody}}` — שם התיק + ID של ההחלטה הסופית
-2. משתמש ב-MCP tools של legal-ai:
+2. **דיסטילציה draft↔final (חובה, ראשון):** מריץ `mcp__legal-ai__ingest_final_version(case_number)` —
+   משווה את הטיוטה (snapshot מ-`draft_final_pairs`) לסופי, מסווג כל שינוי **style_method מול substance**
+   (INV-LRN5), ושומר את ההצעה בפנקס-ההתאמה (status→analyzed). זהו אות-הלימוד הקנוני (INV-LRN4).
+   **אל תקבע לקח לבד — זו הצעה לאישור-יו"ר (INV-LRN1).** ההצעות שלי מבוססות על השינויים מסוג style_method.
+3. משתמש ב-MCP tools של legal-ai:
    - `mcp__legal-ai__case_get` — קבלת פרטי תיק (כולל `expected_outcome` — **הסמכות העובדתית** לתוצאה)
    - `mcp__legal-ai__case_get_final_text` — הטקסט המלא של ההחלטה הסופית
    - `mcp__legal-ai__document_list` — רק אם נדרש רשימת מסמכים נוספים של התיק
diff --git a/mcp-server/src/legal_mcp/services/db.py b/mcp-server/src/legal_mcp/services/db.py
index 1859e0c..34ad703 100644
--- a/mcp-server/src/legal_mcp/services/db.py
+++ b/mcp-server/src/legal_mcp/services/db.py
@@ -1181,6 +1181,29 @@ ALTER TABLE precedent_chunks
     ADD COLUMN IF NOT EXISTS halacha_extracted_at TIMESTAMPTZ;
 """
 
+SCHEMA_V26_SQL = """
+-- draft_final_pairs (T5 / INV-LRN4): the reconciliation ledger.
+-- Every decision is "closed" only after it is compared against the chair's signed
+-- final. Captures an immutable snapshot of the AI draft at mark-final time (before
+-- it can be overwritten), paired with the final. The LLM distillation (curator)
+-- fills final_text + diff_stats + analysis later and advances status.
+CREATE TABLE IF NOT EXISTS draft_final_pairs (
+    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+    case_id UUID NOT NULL REFERENCES cases(id) ON DELETE CASCADE,
+    draft_text TEXT NOT NULL DEFAULT '',
+    final_path TEXT DEFAULT '',
+    final_text TEXT DEFAULT '',
+    diff_stats JSONB DEFAULT NULL,
+    analysis JSONB DEFAULT NULL,
+    -- final_received → analyzed → lessons_folded
+    status TEXT NOT NULL DEFAULT 'final_received',
+    created_at TIMESTAMPTZ DEFAULT now(),
+    updated_at TIMESTAMPTZ DEFAULT now()
+);
+CREATE INDEX IF NOT EXISTS idx_draft_final_pairs_case ON draft_final_pairs(case_id);
+CREATE INDEX IF NOT EXISTS idx_draft_final_pairs_status ON draft_final_pairs(status);
+"""
+
 
 async def _run_schema_migrations(pool: asyncpg.Pool) -> None:
     async with pool.acquire() as conn:
@@ -1210,7 +1233,8 @@ async def _run_schema_migrations(pool: asyncpg.Pool) -> None:
         await conn.execute(SCHEMA_V23_SQL)
         await conn.execute(SCHEMA_V24_SQL)
         await conn.execute(SCHEMA_V25_SQL)
-    logger.info("Database schema initialized (v1-v25)")
+        await conn.execute(SCHEMA_V26_SQL)
+    logger.info("Database schema initialized (v1-v26)")
 
 
 async def init_schema() -> None:
@@ -2241,6 +2265,70 @@ async def get_recent_decision_lessons(limit: int = 15, practice_area: str = "")
     return [dict(r) for r in rows]
 
 
+async def create_draft_final_pair(case_id: UUID, draft_text: str, final_path: str = "") -> str:
+    """Capture the draft↔final pairing at mark-final (T5 / INV-LRN4). Immutable draft
+    snapshot; final_text/diff_stats/analysis filled later by the curator distillation."""
+    pool = await get_pool()
+    async with pool.acquire() as conn:
+        row = await conn.fetchrow(
+            """INSERT INTO draft_final_pairs (case_id, draft_text, final_path, status)
+               VALUES ($1, $2, $3, 'final_received') RETURNING id""",
+            case_id, draft_text, final_path,
+        )
+    return str(row["id"])
+
+
+async def update_draft_final_pair(
+    pair_id: UUID,
+    final_text: str | None = None,
+    diff_stats: dict | None = None,
+    analysis: dict | None = None,
+    status: str | None = None,
+) -> None:
+    """Advance a pairing row (curator distillation): final_text → diff_stats → analysis → status."""
+    sets, params, idx = [], [], 1
+    if final_text is not None:
+        sets.append(f"final_text = ${idx}"); params.append(final_text); idx += 1
+    if diff_stats is not None:
+        sets.append(f"diff_stats = ${idx}::jsonb"); params.append(json.dumps(diff_stats, ensure_ascii=False)); idx += 1
+    if analysis is not None:
+        sets.append(f"analysis = ${idx}::jsonb"); params.append(json.dumps(analysis, ensure_ascii=False)); idx += 1
+    if status is not None:
+        sets.append(f"status = ${idx}"); params.append(status); idx += 1
+    if not sets:
+        return
+    sets.append("updated_at = now()")
+    params.append(pair_id)
+    pool = await get_pool()
+    async with pool.acquire() as conn:
+        await conn.execute(
+            f"UPDATE draft_final_pairs SET {', '.join(sets)} WHERE id = ${idx}", *params,
+        )
+
+
+async def list_draft_final_pairs(status: str | None = None, limit: int = 200) -> list[dict]:
+    """Reconciliation ledger: all decisions paired with their final + status."""
+    pool = await get_pool()
+    async with pool.acquire() as conn:
+        if status:
+            rows = await conn.fetch(
+                """SELECT p.id, p.case_id, c.case_number, c.title, p.status,
+                          p.diff_stats, p.created_at, p.updated_at
+                   FROM draft_final_pairs p LEFT JOIN cases c ON c.id = p.case_id
+                   WHERE p.status = $1 ORDER BY p.created_at DESC LIMIT $2""",
+                status, limit,
+            )
+        else:
+            rows = await conn.fetch(
+                """SELECT p.id, p.case_id, c.case_number, c.title, p.status,
+                          p.diff_stats, p.created_at, p.updated_at
+                   FROM draft_final_pairs p LEFT JOIN cases c ON c.id = p.case_id
+                   ORDER BY p.created_at DESC LIMIT $1""",
+                limit,
+            )
+    return [dict(r) for r in rows]
+
+
 async def upsert_style_pattern(
     pattern_type: str,
     pattern_text: str,
diff --git a/mcp-server/src/legal_mcp/services/learning_loop.py b/mcp-server/src/legal_mcp/services/learning_loop.py
index e529d89..625a3ba 100644
--- a/mcp-server/src/legal_mcp/services/learning_loop.py
+++ b/mcp-server/src/legal_mcp/services/learning_loop.py
@@ -51,26 +51,25 @@ def compute_diff_stats(draft_text: str, final_text: str) -> dict:
     }
 
 
-LESSONS_PROMPT = """אתה מנתח שינויים בהחלטות משפטיות. קיבלת טיוטה (שנוצרה ע"י AI) וגרסה סופית (שעברה עריכת דפנה).
+LESSONS_PROMPT = """אתה מנתח את הפער בין טיוטה (AI) לגרסה סופית שדפנה תמיר חתמה, כדי ללמוד **איך דפנה כותבת ומנתחת** — לא את ההלכה הספציפית.
+
+## הבחנה קריטית (INV-LRN5 — טוהר-הקול):
+לכל שינוי קבע `domain`:
+- **style_method** — *איך* דפנה כותבת/חושבת: ניסוח, קצב, מבנה, תנועות-הנמקה, ביטויי-מעבר, טון, סדר-טיפול. **זה מה שלומדים** (ניתן להכללה לכל תיק).
+- **substance** — תוכן ספציפי-לתיק: הלכה, עובדה, תקדים, מספר. **לא לומדים** (לא ניתן לגרור לתיק אחר).
 
 ## משימה:
-1. זהה את השינויים המהותיים (לא הקלדה/פורמט)
-2. סווג כל שינוי:
-   - expression_change — ביטוי שהוחלף (הצע כלקח לעתיד)
-   - structure_change — שינוי מבני (סדר, חלוקה)
-   - content_addition — תוכן שנוסף (מה חסר?)
-   - content_removal — תוכן שהוסר (מה מיותר?)
-   - tone_change — שינוי טון (רשמי יותר/פחות)
-   - error_fix — תיקון שגיאה עובדתית/משפטית
-3. הסק לקחים שניתן להפעיל בהחלטות עתידיות
+1. זהה שינויים מהותיים (לא הקלדה/פורמט/מספור-אוטומטי).
+2. לכל שינוי: `type` (expression_change / structure_change / content_addition / content_removal / tone_change / reasoning_move / error_fix) + `domain` (style_method / substance).
+3. הסק לקח **מופשט** (על השיטה/הקול, לא על התוכן) — רק עבור style_method.
 
 ## פלט JSON:
 {
   "changes": [
-    {"type": "...", "description": "תיאור השינוי", "draft_text": "...", "final_text": "...", "lesson": "לקח לעתיד"}
+    {"type": "...", "domain": "style_method|substance", "block": "block-yod", "description": "...", "draft_text": "...", "final_text": "...", "lesson": "לקח מופשט (style_method בלבד)"}
   ],
-  "new_expressions": ["ביטוי חדש שדפנה הוסיפה"],
-  "overall_assessment": "הערכה כללית (1-2 משפטים)"
+  "new_expressions": ["ביטוי-מעבר/נוסחה חדשים (style_method בלבד — לא הלכות)"],
+  "overall_assessment": "1-2 משפטים"
 }
 """
 
@@ -114,42 +113,53 @@ async def process_final_version(
     if not decision:
         raise ValueError(f"No decision for case {case_id}")
 
-    # Get draft text (combine all blocks)
+    # Prefer the immutable snapshot captured at mark-final (T5/INV-LRN4); fall back
+    # to the live blocks (which may have been edited after sign-off).
     pool = await db.get_pool()
+    pair_id = None
+    draft_text = ""
     async with pool.acquire() as conn:
-        rows = await conn.fetch(
-            """SELECT content FROM decision_blocks
-               WHERE decision_id = $1 AND word_count > 0
-               ORDER BY block_index""",
-            UUID(decision["id"]),
+        pair = await conn.fetchrow(
+            """SELECT id, draft_text FROM draft_final_pairs
+               WHERE case_id = $1 AND status = 'final_received'
+               ORDER BY created_at DESC LIMIT 1""",
+            case_id,
         )
-    draft_text = "\n\n".join(r["content"] for r in rows if r["content"])
+        if pair:
+            pair_id = pair["id"]
+            draft_text = pair["draft_text"] or ""
+        if not draft_text:
+            rows = await conn.fetch(
+                """SELECT content FROM decision_blocks
+                   WHERE decision_id = $1 AND word_count > 0
+                   ORDER BY block_index""",
+                UUID(decision["id"]),
+            )
+            draft_text = "\n\n".join(r["content"] for r in rows if r["content"])
 
     if not draft_text:
         raise ValueError("No draft content to compare")
 
-    # Compute stats
+    # Compute stats (pure) + AI distillation (style/method vs substance)
     diff_stats = compute_diff_stats(draft_text, final_text)
-
-    # Analyze changes with AI
     analysis = await analyze_changes(draft_text, final_text)
 
-    # Store new expressions as style patterns
-    for expr in analysis.get("new_expressions", []):
-        if expr and len(expr) > 3:
-            await db.upsert_style_pattern(
-                pattern_type="characteristic_phrase",
-                pattern_text=expr,
-                context="למד מגרסה סופית",
-            )
+    # INV-LRN1: do NOT auto-commit learnings into writer-consumed channels.
+    # The distillation is a PROPOSAL stored on the pair; the chair/curator approves
+    # it (→ decision_lessons / appeal_type_rules, surfaced by T15) via the gate.
+    # (Previously this auto-upserted every new_expression as a style_pattern —
+    # that both bypassed the gate and contaminated style with substance. Removed.)
+    if pair_id is not None:
+        await db.update_draft_final_pair(
+            UUID(str(pair_id)),
+            final_text=final_text,
+            diff_stats=diff_stats,
+            analysis=analysis,
+            status="analyzed",
+        )
 
-    # Update decision status
-    await db.update_decision(
-        UUID(decision["id"]),
-        status="final",
-    )
-
-    # Update case status
+    # Update decision + case status
+    await db.update_decision(UUID(decision["id"]), status="final")
     case = await db.get_case(case_id)
     if case:
         await db.update_case(case_id, status="final")
@@ -157,6 +167,7 @@ async def process_final_version(
     return {
         "diff_stats": diff_stats,
         "analysis": analysis,
+        "pair_id": str(pair_id) if pair_id else None,
         "lessons_count": len(analysis.get("changes", [])),
         "new_expressions": len(analysis.get("new_expressions", [])),
     }
diff --git a/web/app.py b/web/app.py
index 305a566..60ceb5a 100644
--- a/web/app.py
+++ b/web/app.py
@@ -3248,6 +3248,27 @@ async def api_mark_final(case_number: str, filename: str):
             UUID(case["id"]),
         )
 
+    # T5/INV-LRN4 — reconciliation ledger: snapshot the AI draft NOW (before any
+    # later edit can overwrite decision_blocks) and open a draft↔final pair. The
+    # LLM distillation (curator) fills final_text/diff_stats/analysis afterwards.
+    pair_id: str | None = None
+    try:
+        decision = await db.get_decision_by_case(UUID(case["id"]))
+        draft_text = ""
+        if decision:
+            async with pool.acquire() as conn:
+                brows = await conn.fetch(
+                    "SELECT content FROM decision_blocks "
+                    "WHERE decision_id = $1 AND word_count > 0 ORDER BY block_index",
+                    UUID(decision["id"]),
+                )
+            draft_text = "\n\n".join(b["content"] for b in brows if b["content"])
+        pair_id = await db.create_draft_final_pair(
+            UUID(case["id"]), draft_text, str(final_path),
+        )
+    except Exception as e:
+        logger.warning("draft_final_pair snapshot failed for %s: %s", case_number, e)
+
     case_dir = config.find_case_dir(case_number)
     if case_dir.exists():
         commit_and_push(case_dir, f"גרסה סופית: {final_name}")
diff --git a/web/paperclip_client.py b/web/paperclip_client.py
index 05d4f73..eaf19b5 100644
--- a/web/paperclip_client.py
+++ b/web/paperclip_client.py
@@ -1083,9 +1083,16 @@ async def wake_curator_for_final(
     description = (
         f"דפנה סימנה את ההחלטה הסופית של תיק {case_number} כסופית.\n"
         f"קובץ סופי: `{final_filename}`\n\n"
-        f"סקור את ההחלטה מול skills/decision/SKILL.md ו-docs/legal-decision-lessons.md.\n"
-        f"חפש 3-5 דפוסי סגנון/דיון שלא תועדו. כתוב comment בעברית, ניטרלי, "
-        f"ממוספר. עדכן את MEMORY.md שלך. סגור את ה-issue (status=done)."
+        f"**שלב 1 — דיסטילציה (חובה, draft↔final):** הרץ "
+        f"`mcp__legal-ai__ingest_final_version(case_number=\"{case_number}\")`. "
+        f"הוא משווה את הטיוטה (snapshot מפנקס-ההתאמה) לסופי, מסווג כל שינוי "
+        f"style_method מול substance (INV-LRN5), ושומר את ההצעה ב-draft_final_pairs "
+        f"(status→analyzed). **אל תקבע לקח לבד — זו הצעה לאישור.**\n"
+        f"**שלב 2 — הצעה:** מתוך השינויים מסוג style_method בלבד, בחר 3-5 דפוסי "
+        f"סגנון/שיטה שלא תועדו ב-skills/decision/SKILL.md / docs/legal-decision-lessons.md / "
+        f"daphna-voice-fingerprint.md (אל תציע מה שכבר שם). כתוב comment בעברית, ניטרלי, ממוספר.\n"
+        f"**שלב 3:** עדכן MEMORY.md, סגור issue (status=done). substance (הלכות/עובדות) — "
+        f"לא נכנס לקול; אם זוהתה הלכה חדשה הפנה למסלול precedent."
     )
     child_resp = await pc_request(
         "POST",
-- 
2.49.1