Merge pull request 'feat(style-acq T15): הכותב צורך את כל הלמידה (/methodology + /training) + תיקון-מספור' (#72) from worktree-style-acquisition-mvp into main

2026-06-06 16:37:01 +00:00
parent f17e0e382a b9bdca0572
commit 014eb4937e
3 changed files with 87 additions and 8 deletions
--- a/docs/daphna-voice-fingerprint.md
+++ b/docs/daphna-voice-fingerprint.md
@@ -181,11 +181,12 @@

 מבוסס על קריאת ה-10 החלטות + ההשוואה לטיוטות ה-AI:

-### 3.1 ❌ אסור: רשימה ממוספרת בתוך פסקה
-**ב-0/33** מהחלטות הסופיות יש `(1) ... (2) ... (3) ...` בתוך פסקת אנליזה אחת.
-**ב-3/3 טיוטות AI** שראיתי הופיעה רשימה ממוספרת — שהוסרה בעריכה.
+### 3.1 ❌ אסור: רשימת-מיני ממוספרת בתוך פסקת-אנליזה (פיצול טיעון ל-`(1)...(2)...`)
+**ב-0/33** מהחלטות הסופיות יש `(1) ... (2) ... (3) ...` המפצל טיעון בתוך פסקת אנליזה אחת. טענות וניתוח נכתבים כ**נרטיב רציף** עם ביטויי-מעבר ("עוד נטען", "באשר ל-", "יתרה מכך"), לא כרשימת-מיני.

-⚠️ **הבחנה חשובה**: זה שונה ממספור פסקאות סדרתי (1, 2, 3 ... כאוטוט-של-פסקאות), שכן עד 2025 דפנה כן השתמשה במספור סדרתי (כמו פסיקה מסורתית). מ-2025-מאוחר זה נטוש; ההחלטות החדשות (1126-25, 1128-25, 1130-25, 1194-25) **ללא** מספור פסקאות. **המגמה החדשה** היא נרטיב רציף ללא מספור.
+✅ **ההחלטה כן ממוספרת — תמיד.** פסקאות ההחלטה ממוספרות סדרתית (1, 2, 3 ... עד הסוף), כמקובל בפסיקה. **המספור מוחל אוטומטית בשלב ייצוא ה-DOCX (Word auto-numbering)** ולכן אינו מופיע בחילוץ-טקסט גולמי — אל תסיק מהיעדרו בטקסט שאין מספור.
+⚠️ **הכותב לא יקליד מספרים כטקסט ידני** ("12. ", "13. ") בתוך התוכן — הם נוצרים אוטומטית בייצוא, ומספרים ידניים שוברים את ה-copy/paste וכופלים מספור. כתוב את הפסקה ללא מספר מוביל.
+**ב-3/3 טיוטות AI** הופיעו מספרים ידניים בטקסט — שהוסרו/הומרו לאוטומטיים בעריכה. (תיקון 2026-06-06: ההנחה הקודמת ש"ההחלטות החדשות ללא מספור" הייתה ארטיפקט-חילוץ.)

 ### 3.2 ⚠️ מותנה: כותרת משנה בלב בלוק י

--- a/mcp-server/src/legal_mcp/services/block_writer.py
+++ b/mcp-server/src/legal_mcp/services/block_writer.py
@@ -334,7 +334,7 @@ async def write_block(
    daphna_style_exemplars, case_law_citations, _precedent_case_law_ids = (
        await _build_precedents_context(case_id, block_id)
    )
-    style_context = await _build_style_context()
+    style_context = await _build_style_context(case.get("practice_area", ""))
    discussion_context = await _build_previous_blocks_context(case_id, decision)
    appraiser_facts_context = await _build_appraiser_facts_context(case_id)
    appraiser_conflicts_context = await _build_appraiser_conflicts_context(case_id)
@@ -805,12 +805,15 @@ def _load_voice_fingerprint() -> str:
    return _VOICE_FINGERPRINT_CACHE


-async def _build_style_context() -> str:
+async def _build_style_context(practice_area: str = "") -> str:
    """Build comprehensive style guide: abstract voice profile (primary) +
-    SKILL.md rules + DB patterns.
+    SKILL.md rules + DB patterns + accumulated chair learnings.

    Per Anthropic: explicit style instructions reduce generic output. The voice
    fingerprint is the primary abstract-profile channel (T0 / INV-LRN4-5).
+    Accumulated learnings (T15) — the chair's /methodology edits and /training
+    decision_lessons — are appended LAST and marked authoritative, so everything
+    we have learned to date reaches the writer (not just hardcoded defaults).
    """
    lines = []

@@ -878,6 +881,39 @@ async def _build_style_context() -> str:
                for item in items[:8]:
                    lines.append(f"- {item['pattern_text']}")

+    # ── למידה מצטברת (T15) — עריכות היו"ר ב-/methodology + לקחי /training ──
+    # גובר על ברירות-המחדל לעיל. כך כל מה שלמדנו עד היום מגיע לכותב.
+    learned: list[str] = []
+    try:
+        for cat, label in (
+            ("golden_ratios", "יחסי-זהב (אחוזי-סעיפים)"),
+            ("discussion_rules", "כללי-דיון"),
+            ("content_checklists", "צ׳קליסטים"),
+        ):
+            ov = await db.get_methodology_overrides(cat)
+            if ov:
+                learned.append(f"\n**{label} — ערכי היו\"ר (גוברים על ברירת-המחדל):**")
+                for k, v in ov.items():
+                    learned.append(f"- {k}: {json.dumps(v, ensure_ascii=False)}")
+    except Exception as e:
+        logger.warning("methodology overrides not loaded: %s", e)
+    try:
+        lessons = await db.get_recent_decision_lessons(limit=15, practice_area=practice_area)
+        if lessons:
+            learned.append("\n**לקחים מהחלטות קודמות (decision_lessons):**")
+            for ls in lessons:
+                src = ls.get("decision_number") or ls.get("source") or ""
+                learned.append(f"- [{ls.get('category', '')}] {ls['lesson_text']}" + (f" ({src})" if src else ""))
+    except Exception as e:
+        logger.warning("decision_lessons not loaded: %s", e)
+
+    if learned:
+        lines.append(
+            "\n## ⭐ למידה מצטברת — חובה, גובר על כל ברירת-מחדל לעיל "
+            "(עריכות היו\"ר ב-/methodology + לקחי /training):"
+        )
+        lines.extend(learned)
+
    return "\n".join(lines)


@@ -942,7 +978,7 @@ async def get_block_context(case_id: UUID, block_id: str, instructions: str = ""
    daphna_style_exemplars, case_law_citations, _ = (
        await _build_precedents_context(case_id, block_id)
    )
-    style_context = await _build_style_context()
+    style_context = await _build_style_context(case.get("practice_area", ""))
    discussion_context = await _build_previous_blocks_context(case_id, decision)
    appraiser_facts_context = await _build_appraiser_facts_context(case_id)
    appraiser_conflicts_context = await _build_appraiser_conflicts_context(case_id)
--- a/mcp-server/src/legal_mcp/services/db.py
+++ b/mcp-server/src/legal_mcp/services/db.py
@@ -2199,6 +2199,48 @@ async def get_style_patterns(pattern_type: str | None = None) -> list[dict]:
    return [dict(r) for r in rows]


+async def get_methodology_overrides(category: str) -> dict:
+    """Chair's /methodology edits for one category (golden_ratios / discussion_rules /
+    content_checklists). Returns {rule_key: parsed_value}. These OVERRIDE the hardcoded
+    lessons.py defaults — the writer must consume them (T15 / INV-LRN4). Mirrors the merge
+    in GET /api/methodology/{category}."""
+    pool = await get_pool()
+    async with pool.acquire() as conn:
+        rows = await conn.fetch(
+            "SELECT rule_key, rule_value FROM appeal_type_rules "
+            "WHERE appeal_type = '_global' AND rule_category = $1",
+            category,
+        )
+    out: dict = {}
+    for r in rows:
+        raw = r["rule_value"]
+        if isinstance(raw, str):
+            try:
+                raw = json.loads(raw)
+            except (json.JSONDecodeError, TypeError):
+                pass
+        out[r["rule_key"]] = raw
+    return out
+
+
+async def get_recent_decision_lessons(limit: int = 15, practice_area: str = "") -> list[dict]:
+    """Per-decision learnings the chair/curator attached in /training (decision_lessons),
+    so the writer consumes them too (T15). Prefers style/structure/lexicon, recent first."""
+    pool = await get_pool()
+    async with pool.acquire() as conn:
+        rows = await conn.fetch(
+            """SELECT dl.lesson_text, dl.category, dl.source,
+                      sc.decision_number, sc.practice_area
+               FROM decision_lessons dl
+               JOIN style_corpus sc ON sc.id = dl.style_corpus_id
+               WHERE ($2 = '' OR sc.practice_area = $2)
+               ORDER BY dl.created_at DESC
+               LIMIT $1""",
+            limit, practice_area,
+        )
+    return [dict(r) for r in rows]
+
+
 async def upsert_style_pattern(
    pattern_type: str,
    pattern_text: str,