feat(style-acq T4+T5): פנקס-התאמה draft↔final + דיסטילציה אוטומטית דרך ה-curator

סוגר את לולאת-הלמידה (INV-LRN4): כל החלטה נסגרת מול הסופי, וכל סופי מנותח מול הטיוטה. מזין את הטבלאות ש-T15 כבר קורא מהן. T5 — פנקס-התאמה: - SCHEMA_V26: טבלת draft_final_pairs (snapshot draft + final + diff + analysis + status). - db: create/update/list_draft_final_pairs. - mark-final (app.py): תופס snapshot של הטיוטה (decision_blocks) ברגע החתימה, לפני שאפשר לדרוס אותו, ופותח שורת-פנקס (status=final_received). T4 — דיסטילציה אוטומטית: - learning_loop.process_final_version: משתמש ב-snapshot (לא בבלוקים שאולי השתנו), מסווג style_method↔substance, שומר הצעה ב-pair (status=analyzed). **הוסר ה-auto-upsert של style_patterns** — ביטל את ה-bug שדרס את שער-היו"ר וזיהם סגנון במהות (INV-LRN1 + INV-LRN5). - LESSONS_PROMPT: הפרדת style_method↔substance מפורשת + לקח מופשט בלבד. - curator wake + hermes-curator.md: מריץ ingest_final_version ראשון; מציע רק style_method שלא תועד; substance→מסלול precedent. INV-LRN1 (שער-יו"ר, אין auto-commit) · INV-LRN4 (ניגוד-אמת) · INV-LRN5 (טוהר). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-06 17:20:57 +00:00
parent 014eb4937e
commit 0d995483ce
5 changed files with 174 additions and 43 deletions
--- a/mcp-server/src/legal_mcp/services/db.py
+++ b/mcp-server/src/legal_mcp/services/db.py
@@ -1181,6 +1181,29 @@ ALTER TABLE precedent_chunks
    ADD COLUMN IF NOT EXISTS halacha_extracted_at TIMESTAMPTZ;
 """

+SCHEMA_V26_SQL = """
+-- draft_final_pairs (T5 / INV-LRN4): the reconciliation ledger.
+-- Every decision is "closed" only after it is compared against the chair's signed
+-- final. Captures an immutable snapshot of the AI draft at mark-final time (before
+-- it can be overwritten), paired with the final. The LLM distillation (curator)
+-- fills final_text + diff_stats + analysis later and advances status.
+CREATE TABLE IF NOT EXISTS draft_final_pairs (
+    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+    case_id UUID NOT NULL REFERENCES cases(id) ON DELETE CASCADE,
+    draft_text TEXT NOT NULL DEFAULT '',
+    final_path TEXT DEFAULT '',
+    final_text TEXT DEFAULT '',
+    diff_stats JSONB DEFAULT NULL,
+    analysis JSONB DEFAULT NULL,
+    -- final_received → analyzed → lessons_folded
+    status TEXT NOT NULL DEFAULT 'final_received',
+    created_at TIMESTAMPTZ DEFAULT now(),
+    updated_at TIMESTAMPTZ DEFAULT now()
+);
+CREATE INDEX IF NOT EXISTS idx_draft_final_pairs_case ON draft_final_pairs(case_id);
+CREATE INDEX IF NOT EXISTS idx_draft_final_pairs_status ON draft_final_pairs(status);
+"""
+

 async def _run_schema_migrations(pool: asyncpg.Pool) -> None:
    async with pool.acquire() as conn:
@@ -1210,7 +1233,8 @@ async def _run_schema_migrations(pool: asyncpg.Pool) -> None:
        await conn.execute(SCHEMA_V23_SQL)
        await conn.execute(SCHEMA_V24_SQL)
        await conn.execute(SCHEMA_V25_SQL)
-    logger.info("Database schema initialized (v1-v25)")
+        await conn.execute(SCHEMA_V26_SQL)
+    logger.info("Database schema initialized (v1-v26)")


 async def init_schema() -> None:
@@ -2241,6 +2265,70 @@ async def get_recent_decision_lessons(limit: int = 15, practice_area: str = "")
    return [dict(r) for r in rows]


+async def create_draft_final_pair(case_id: UUID, draft_text: str, final_path: str = "") -> str:
+    """Capture the draft↔final pairing at mark-final (T5 / INV-LRN4). Immutable draft
+    snapshot; final_text/diff_stats/analysis filled later by the curator distillation."""
+    pool = await get_pool()
+    async with pool.acquire() as conn:
+        row = await conn.fetchrow(
+            """INSERT INTO draft_final_pairs (case_id, draft_text, final_path, status)
+               VALUES ($1, $2, $3, 'final_received') RETURNING id""",
+            case_id, draft_text, final_path,
+        )
+    return str(row["id"])
+
+
+async def update_draft_final_pair(
+    pair_id: UUID,
+    final_text: str | None = None,
+    diff_stats: dict | None = None,
+    analysis: dict | None = None,
+    status: str | None = None,
+) -> None:
+    """Advance a pairing row (curator distillation): final_text → diff_stats → analysis → status."""
+    sets, params, idx = [], [], 1
+    if final_text is not None:
+        sets.append(f"final_text = ${idx}"); params.append(final_text); idx += 1
+    if diff_stats is not None:
+        sets.append(f"diff_stats = ${idx}::jsonb"); params.append(json.dumps(diff_stats, ensure_ascii=False)); idx += 1
+    if analysis is not None:
+        sets.append(f"analysis = ${idx}::jsonb"); params.append(json.dumps(analysis, ensure_ascii=False)); idx += 1
+    if status is not None:
+        sets.append(f"status = ${idx}"); params.append(status); idx += 1
+    if not sets:
+        return
+    sets.append("updated_at = now()")
+    params.append(pair_id)
+    pool = await get_pool()
+    async with pool.acquire() as conn:
+        await conn.execute(
+            f"UPDATE draft_final_pairs SET {', '.join(sets)} WHERE id = ${idx}", *params,
+        )
+
+
+async def list_draft_final_pairs(status: str | None = None, limit: int = 200) -> list[dict]:
+    """Reconciliation ledger: all decisions paired with their final + status."""
+    pool = await get_pool()
+    async with pool.acquire() as conn:
+        if status:
+            rows = await conn.fetch(
+                """SELECT p.id, p.case_id, c.case_number, c.title, p.status,
+                          p.diff_stats, p.created_at, p.updated_at
+                   FROM draft_final_pairs p LEFT JOIN cases c ON c.id = p.case_id
+                   WHERE p.status = $1 ORDER BY p.created_at DESC LIMIT $2""",
+                status, limit,
+            )
+        else:
+            rows = await conn.fetch(
+                """SELECT p.id, p.case_id, c.case_number, c.title, p.status,
+                          p.diff_stats, p.created_at, p.updated_at
+                   FROM draft_final_pairs p LEFT JOIN cases c ON c.id = p.case_id
+                   ORDER BY p.created_at DESC LIMIT $1""",
+                limit,
+            )
+    return [dict(r) for r in rows]
+
+
 async def upsert_style_pattern(
    pattern_type: str,
    pattern_text: str,
--- a/mcp-server/src/legal_mcp/services/learning_loop.py
+++ b/mcp-server/src/legal_mcp/services/learning_loop.py
@@ -51,26 +51,25 @@ def compute_diff_stats(draft_text: str, final_text: str) -> dict:
    }


-LESSONS_PROMPT = """אתה מנתח שינויים בהחלטות משפטיות. קיבלת טיוטה (שנוצרה ע"י AI) וגרסה סופית (שעברה עריכת דפנה).
+LESSONS_PROMPT = """אתה מנתח את הפער בין טיוטה (AI) לגרסה סופית שדפנה תמיר חתמה, כדי ללמוד **איך דפנה כותבת ומנתחת** — לא את ההלכה הספציפית.
+
+## הבחנה קריטית (INV-LRN5 — טוהר-הקול):
+לכל שינוי קבע `domain`:
+- **style_method** — *איך* דפנה כותבת/חושבת: ניסוח, קצב, מבנה, תנועות-הנמקה, ביטויי-מעבר, טון, סדר-טיפול. **זה מה שלומדים** (ניתן להכללה לכל תיק).
+- **substance** — תוכן ספציפי-לתיק: הלכה, עובדה, תקדים, מספר. **לא לומדים** (לא ניתן לגרור לתיק אחר).

 ## משימה:
-1. זהה את השינויים המהותיים (לא הקלדה/פורמט)
-2. סווג כל שינוי:
-   - expression_change — ביטוי שהוחלף (הצע כלקח לעתיד)
-   - structure_change — שינוי מבני (סדר, חלוקה)
-   - content_addition — תוכן שנוסף (מה חסר?)
-   - content_removal — תוכן שהוסר (מה מיותר?)
-   - tone_change — שינוי טון (רשמי יותר/פחות)
-   - error_fix — תיקון שגיאה עובדתית/משפטית
-3. הסק לקחים שניתן להפעיל בהחלטות עתידיות
+1. זהה שינויים מהותיים (לא הקלדה/פורמט/מספור-אוטומטי).
+2. לכל שינוי: `type` (expression_change / structure_change / content_addition / content_removal / tone_change / reasoning_move / error_fix) + `domain` (style_method / substance).
+3. הסק לקח **מופשט** (על השיטה/הקול, לא על התוכן) — רק עבור style_method.

 ## פלט JSON:
 {
  "changes": [
-    {"type": "...", "description": "תיאור השינוי", "draft_text": "...", "final_text": "...", "lesson": "לקח לעתיד"}
+    {"type": "...", "domain": "style_method|substance", "block": "block-yod", "description": "...", "draft_text": "...", "final_text": "...", "lesson": "לקח מופשט (style_method בלבד)"}
  ],
-  "new_expressions": ["ביטוי חדש שדפנה הוסיפה"],
-  "overall_assessment": "הערכה כללית (1-2 משפטים)"
+  "new_expressions": ["ביטוי-מעבר/נוסחה חדשים (style_method בלבד — לא הלכות)"],
+  "overall_assessment": "1-2 משפטים"
 }
 """

@@ -114,42 +113,53 @@ async def process_final_version(
    if not decision:
        raise ValueError(f"No decision for case {case_id}")

-    # Get draft text (combine all blocks)
+    # Prefer the immutable snapshot captured at mark-final (T5/INV-LRN4); fall back
+    # to the live blocks (which may have been edited after sign-off).
    pool = await db.get_pool()
+    pair_id = None
+    draft_text = ""
    async with pool.acquire() as conn:
-        rows = await conn.fetch(
-            """SELECT content FROM decision_blocks
-               WHERE decision_id = $1 AND word_count > 0
-               ORDER BY block_index""",
-            UUID(decision["id"]),
+        pair = await conn.fetchrow(
+            """SELECT id, draft_text FROM draft_final_pairs
+               WHERE case_id = $1 AND status = 'final_received'
+               ORDER BY created_at DESC LIMIT 1""",
+            case_id,
        )
-    draft_text = "\n\n".join(r["content"] for r in rows if r["content"])
+        if pair:
+            pair_id = pair["id"]
+            draft_text = pair["draft_text"] or ""
+        if not draft_text:
+            rows = await conn.fetch(
+                """SELECT content FROM decision_blocks
+                   WHERE decision_id = $1 AND word_count > 0
+                   ORDER BY block_index""",
+                UUID(decision["id"]),
+            )
+            draft_text = "\n\n".join(r["content"] for r in rows if r["content"])

    if not draft_text:
        raise ValueError("No draft content to compare")

-    # Compute stats
+    # Compute stats (pure) + AI distillation (style/method vs substance)
    diff_stats = compute_diff_stats(draft_text, final_text)
-
-    # Analyze changes with AI
    analysis = await analyze_changes(draft_text, final_text)

-    # Store new expressions as style patterns
-    for expr in analysis.get("new_expressions", []):
-        if expr and len(expr) > 3:
-            await db.upsert_style_pattern(
-                pattern_type="characteristic_phrase",
-                pattern_text=expr,
-                context="למד מגרסה סופית",
-            )
+    # INV-LRN1: do NOT auto-commit learnings into writer-consumed channels.
+    # The distillation is a PROPOSAL stored on the pair; the chair/curator approves
+    # it (→ decision_lessons / appeal_type_rules, surfaced by T15) via the gate.
+    # (Previously this auto-upserted every new_expression as a style_pattern —
+    # that both bypassed the gate and contaminated style with substance. Removed.)
+    if pair_id is not None:
+        await db.update_draft_final_pair(
+            UUID(str(pair_id)),
+            final_text=final_text,
+            diff_stats=diff_stats,
+            analysis=analysis,
+            status="analyzed",
+        )

-    # Update decision status
-    await db.update_decision(
-        UUID(decision["id"]),
-        status="final",
-    )
-
-    # Update case status
+    # Update decision + case status
+    await db.update_decision(UUID(decision["id"]), status="final")
    case = await db.get_case(case_id)
    if case:
        await db.update_case(case_id, status="final")
@@ -157,6 +167,7 @@ async def process_final_version(
    return {
        "diff_stats": diff_stats,
        "analysis": analysis,
+        "pair_id": str(pair_id) if pair_id else None,
        "lessons_count": len(analysis.get("changes", [])),
        "new_expressions": len(analysis.get("new_expressions", [])),
    }