feat(style-acq T0): הזרקת פרופיל-הקול לכותב + מדיניות-העתקה + הפרדת דוגמאות↔פסיקה

הלוֹבר הראשי של מערכת רכישת-הסגנון. block_writer עבר היום מ"העתקה + ערבוב-מהות" ל"הכללת-סגנון + הפרדה": - _build_style_context: טוען את daphna-voice-fingerprint.md (פרופיל-הקול המופשט — המנגנון המרכזי) + מדיניות-העתקה מפורשת לפי סוג-תוכן (נוסחה→מותר, ניתוח→הכלל, מהות מתיק אחר→אסור). INV-LRN5. - _build_precedents_context: פוצל לשני זרמים נפרדים — daphna_style_exemplars (איך דפנה כותבת) מול case_law_citations (מהות לציטוט). - block-yod prompt: שני סעיפים מסומנים במקום "פסיקה רלוונטית (צטט מכאן)" שערבב סגנון ומהות; הדוגמאות-סגנוניות מתויגות "מבנה/קול בלבד". INV: G11 (סגנון דפנה), INV-LRN5 (טוהר-הקול). חלק מתוכנית style-acquisition. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-06 16:20:24 +00:00
parent 9c77123fa3
commit 8a3bcd3ffc
1 changed files with 81 additions and 20 deletions
--- a/mcp-server/src/legal_mcp/services/block_writer.py
+++ b/mcp-server/src/legal_mcp/services/block_writer.py
@@ -248,8 +248,12 @@ BLOCK_PROMPTS = {
 ## חומרי מקור:
 {source_context}

-## פסיקה רלוונטית (צטט מכאן ומהידע הכללי שלך):
-{precedents_context}
+## דוגמאות-סגנון מהחלטות דפנה — מבנה וקול בלבד:
+⚠️ אלה דוגמאות ל**איך** דפנה כותבת (מבנה, קצב, תנועות-הנמקה, ביטויים) — **לא מקור-תוכן**. הכלל המבחין: נוסחה/בוילרפלייט קבוע (פתיח דוקטרינלי, תבנית-סיום) → מותר להעתיק; ניתוח/טענות ספציפיים → **הכלל את הדפוס והתאם לתיק שלפניך**, אל תעתיק; מהות משפטית (הלכה/עובדה) מתיק אחר → **אסור** להעתיק.
+{daphna_style_exemplars}
+
+## פסיקה רלוונטית לציטוט (צטט מכאן ומהידע הכללי שלך):
+{case_law_citations}

 ## סגנון דפנה:
 {style_context}""",
@@ -327,7 +331,9 @@ async def write_block(
    claims_context = await _build_claims_context(case_id)
    direction_context = _build_direction_context(decision)
    plans_context = await _build_plans_context(case_id)
-    precedents_context, _precedent_case_law_ids = await _build_precedents_context(case_id, block_id)
+    daphna_style_exemplars, case_law_citations, _precedent_case_law_ids = (
+        await _build_precedents_context(case_id, block_id)
+    )
    style_context = await _build_style_context()
    discussion_context = await _build_previous_blocks_context(case_id, decision)
    appraiser_facts_context = await _build_appraiser_facts_context(case_id)
@@ -363,7 +369,8 @@ async def write_block(
        claims_context=claims_context,
        direction_context=direction_context,
        plans_context=plans_context,
-        precedents_context=precedents_context,
+        daphna_style_exemplars=daphna_style_exemplars,
+        case_law_citations=case_law_citations,
        style_context=style_context,
        discussion_context=discussion_context,
        structure_guidance=structure_guidance,
@@ -707,9 +714,16 @@ async def _build_post_hearing_context(case_id: UUID) -> str:
    return "\n".join(lines)


-async def _build_precedents_context(case_id: UUID, block_id: str) -> tuple[str, list[str]]:
-    """Search for similar precedent paragraphs from other decisions and case law."""
-    parts = []
+async def _build_precedents_context(
+    case_id: UUID, block_id: str,
+) -> tuple[str, str, list[str]]:
+    """Two SEPARATE streams (INV-LRN5 — keep style apart from substance):
+    1. style_exemplars — Dafna's own block-level paragraphs (HOW she writes; structure/voice).
+    2. case_law_citations — precedent case-law (substantive material to quote).
+    Returns (style_exemplars, case_law_citations, case_law_ids).
+    """
+    style_parts: list[str] = []
+    caselaw_parts: list[str] = []
    case_law_ids: list[str] = []
    try:
        case = await db.get_case(case_id)
@@ -718,19 +732,18 @@ async def _build_precedents_context(case_id: UUID, block_id: str) -> tuple[str,
        query = f"דיון משפטי בנושא {subject}" if subject else "דיון משפטי ועדת ערר"
        query_emb = await embeddings.embed_query(query)

-        # Search 1: paragraph_embeddings (from other decisions by Dafna)
+        # Stream 1: paragraph_embeddings — Dafna's own prose (STYLE exemplars, not content)
        para_results = await db.search_similar_paragraphs(
            query_embedding=query_emb, limit=10, block_type="block-yod",
        )
-        # Filter out same case
        para_results = [r for r in para_results if r.get("case_number", "") != case_number]
        for r in para_results[:4]:
-            parts.append(
-                f"[החלטת {r.get('case_number', '?')} — {r.get('case_title', '')}, "
-                f"בלוק {r.get('block_type', '')}]\n{r['content'][:500]}"
+            style_parts.append(
+                f"[דוגמת-סגנון — החלטת {r.get('case_number', '?')} "
+                f"{r.get('case_title', '')}, בלוק {r.get('block_type', '')}]\n{r['content'][:500]}"
            )

-        # Search 2: case_law_embeddings (precedent case law)
+        # Stream 2: case_law_embeddings — substantive precedent (citations)
        pool = await db.get_pool()
        async with pool.acquire() as conn:
            caselaw_rows = await conn.fetch(
@@ -746,7 +759,7 @@ async def _build_precedents_context(case_id: UUID, block_id: str) -> tuple[str,
            case_law_ids.append(str(r["id"]))
            text = r["key_quote"] or r["summary"] or ""
            if text:
-                parts.append(
+                caselaw_parts.append(
                    f"[פסיקה: {r['case_number']} {r['case_name']} ({r.get('court', '')})] "
                    f"score={r['score']:.3f}\n{text[:400]}"
                )
@@ -754,16 +767,60 @@ async def _build_precedents_context(case_id: UUID, block_id: str) -> tuple[str,
    except Exception as e:
        logger.warning("Failed to fetch precedents: %s", e)

-    return ("\n\n".join(parts) if parts else "(אין תקדימים)"), case_law_ids
+    return (
+        "\n\n".join(style_parts) if style_parts else "(אין דוגמאות-סגנון)",
+        "\n\n".join(caselaw_parts) if caselaw_parts else "(אין פסיקה רלוונטית)",
+        case_law_ids,
+    )
+
+
+# Cache for the abstract voice profile (read once per process).
+_VOICE_FINGERPRINT_CACHE: str | None = None
+
+# Style-acquisition policy (INV-LRN5): how to USE the style material below.
+_COPY_POLICY = """## מדיניות-סגנון (איך להשתמש בחומר שלהלן) — חובה:
+**היעד: לכתוב בקול ובשיטה של דפנה — לא להעתיק.** הפרופיל שלהלן הוא ההכללה של *איך* דפנה כותבת; הַחֵל אותו על העובדות של התיק שלפניך.
+- **תוכן קבוע/נוסחאי** (פתיח דוקטרינלי, תבנית-סיום, ביטויי-מעבר) → מותר להשתמש כלשונו.
+- **ניתוח/טענות ספציפיים** → הכלל את הדפוס והתאם לתיק; אל תעתיק ניסוח מתיק אחר.
+- **מהות משפטית (הלכה/עובדה/תקדים) מתיק אחר** → אסור לגרור לכאן; המהות באה מחומרי-המקור והפסיקה של *התיק הזה* בלבד.
+"""
+
+
+def _load_voice_fingerprint() -> str:
+    """Load the abstract authorial-style profile (daphna-voice-fingerprint.md).
+
+    This is the PRIMARY style channel (Authorial Style Profiling): the generalized
+    'how Dafna writes', injected so the writer adapts it rather than copying exemplars.
+    Read-only consumption of a learning artifact (Writing↔Acquisition separation).
+    """
+    global _VOICE_FINGERPRINT_CACHE
+    if _VOICE_FINGERPRINT_CACHE is not None:
+        return _VOICE_FINGERPRINT_CACHE
+    try:
+        path = config.DATA_DIR.parent / "docs" / "daphna-voice-fingerprint.md"
+        _VOICE_FINGERPRINT_CACHE = path.read_text(encoding="utf-8")
+    except Exception as e:
+        logger.warning("voice-fingerprint not loaded: %s", e)
+        _VOICE_FINGERPRINT_CACHE = ""
+    return _VOICE_FINGERPRINT_CACHE


 async def _build_style_context() -> str:
-    """Build comprehensive style guide from DB patterns + SKILL.md rules.
+    """Build comprehensive style guide: abstract voice profile (primary) +
+    SKILL.md rules + DB patterns.

-    Per Anthropic: explicit style instructions reduce generic output.
+    Per Anthropic: explicit style instructions reduce generic output. The voice
+    fingerprint is the primary abstract-profile channel (T0 / INV-LRN4-5).
    """
    lines = []

+    # Copy-policy first, then the abstract voice profile (the PRIMARY channel).
+    lines.append(_COPY_POLICY)
+    fingerprint = _load_voice_fingerprint()
+    if fingerprint:
+        lines.append("## פרופיל-הקול של דפנה (טביעת-אצבע — המנגנון המרכזי):\n")
+        lines.append(fingerprint)
+
    # Core style rules (from SKILL.md analysis)
    lines.append("""## כללי סגנון דפנה תמיר — חובה:

@@ -882,7 +939,9 @@ async def get_block_context(case_id: UUID, block_id: str, instructions: str = ""
    claims_context = await _build_claims_context(case_id)
    direction_context = _build_direction_context(decision)
    plans_context = await _build_plans_context(case_id)
-    precedents_context, _ = await _build_precedents_context(case_id, block_id)
+    daphna_style_exemplars, case_law_citations, _ = (
+        await _build_precedents_context(case_id, block_id)
+    )
    style_context = await _build_style_context()
    discussion_context = await _build_previous_blocks_context(case_id, decision)
    appraiser_facts_context = await _build_appraiser_facts_context(case_id)
@@ -914,7 +973,8 @@ async def get_block_context(case_id: UUID, block_id: str, instructions: str = ""
        claims_context=claims_context,
        direction_context=direction_context,
        plans_context=plans_context,
-        precedents_context=precedents_context,
+        daphna_style_exemplars=daphna_style_exemplars,
+        case_law_citations=case_law_citations,
        style_context=style_context,
        discussion_context=discussion_context,
        structure_guidance=structure_guidance,
@@ -942,7 +1002,8 @@ async def get_block_context(case_id: UUID, block_id: str, instructions: str = ""
        "source_documents": source_context,
        "claims": claims_context,
        "direction": direction_context,
-        "precedents": precedents_context,
+        "precedents": case_law_citations,
+        "style_exemplars": daphna_style_exemplars,
        "style_guide": style_context,
        "previous_blocks": discussion_context,
    }