diff --git a/mcp-server/src/legal_mcp/server.py b/mcp-server/src/legal_mcp/server.py index 81bfbd7..0e35bee 100644 --- a/mcp-server/src/legal_mcp/server.py +++ b/mcp-server/src/legal_mcp/server.py @@ -528,6 +528,16 @@ async def get_style_guide() -> str: return await drafting.get_style_guide() +@mcp.tool() +async def style_distance(case_number: str) -> str: + """מדד מרחק-סגנון (T7) — האם הטיוטה מתכנסת לסגנון דפנה: סטיית יחסי-זהב, + ספירת אנטי-דפוסים, ושיעור-השינוי draft→final מפנקס-ההתאמה. ללא LLM.""" + import json as _json + from legal_mcp.services import style_distance as _sd + result = await _sd.style_distance(case_number) + return _json.dumps(result, ensure_ascii=False, indent=2) + + @mcp.tool() async def draft_section( case_number: str, diff --git a/mcp-server/src/legal_mcp/services/lessons.py b/mcp-server/src/legal_mcp/services/lessons.py index df8f949..a91c4d3 100644 --- a/mcp-server/src/legal_mcp/services/lessons.py +++ b/mcp-server/src/legal_mcp/services/lessons.py @@ -42,6 +42,25 @@ GOLDEN_RATIOS: dict[str, dict[str, tuple[int, int]]] = { "partial_acceptance": {"background": (25, 35), "claims": (25, 30), "discussion": (40, 47), "summary": (2, 3)}, } +# ── Anti-patterns (what Dafna avoids) — detectable signals for style-distance (T7) ── +# Derived from daphna-voice-fingerprint.md §3 (corrected 2026-06-06: sequential +# paragraph numbering is REQUIRED — applied as Word auto-numbering at export — so the +# anti-pattern is MANUAL numbers typed as text, not numbering itself). +ANTI_PATTERNS: list[dict] = [ + {"name": "manual_paragraph_numbers", + "regex": r"(?m)^\s*\d{1,3}\.\s", + "note": "מספרים ידניים כטקסט בראש פסקה — אמורים להיות auto-numbering בייצוא"}, + {"name": "inline_numbered_fragments", + "regex": r"\([0-9]\)[^\n]{0,200}\([0-9]\)", + "note": "פיצול טיעון לרשימת-מיני (1)...(2) בתוך פסקת-אנליזה"}, + {"name": "markdown_headers", + "regex": r"(?m)^#{1,6}\s", + "note": "כותרות markdown — אינן בהחלטה הסופית"}, + {"name": "bullet_lists", + "regex": r"(?m)^\s*[-*•]\s", + "note": "רשימות תבליטים באנליזה — דפנה כותבת נרטיב רציף"}, +] + # ── Paragraph length guidance (word counts) ──────────────────────── PARAGRAPH_LENGTHS = { diff --git a/mcp-server/src/legal_mcp/services/style_distance.py b/mcp-server/src/legal_mcp/services/style_distance.py new file mode 100644 index 0000000..53f9e65 --- /dev/null +++ b/mcp-server/src/legal_mcp/services/style_distance.py @@ -0,0 +1,126 @@ +"""מדד מרחק-סגנון (T7) — האם הטיוטות מתכנסות לדפנה לאורך זמן. + +שלושה רכיבים, כולם ללא LLM (דטרמיניסטי, זול): +1. golden_ratio_adherence — סטיית אחוזי-הסעיפים מ-GOLDEN_RATIOS לפי תוצאה. +2. anti_pattern_hits — ספירת אנטי-דפוסים (מ-lessons.ANTI_PATTERNS) בטקסט הטיוטה. +3. draft_to_final_diff — change_percent מ-draft_final_pairs (ככל שיורד → מתכנס). + +זהו מטא-אות על בריאות-הלמידה (INV-LRN4) — נצרך ע"י לוח-מחוונים / QA, לא ע"י הכותב. +""" + +from __future__ import annotations + +import logging +import re +from uuid import UUID + +from legal_mcp.services import db +from legal_mcp.services.lessons import ANTI_PATTERNS, GOLDEN_RATIOS, canonical_outcome + +logger = logging.getLogger(__name__) + +# block_id → golden-ratio section +_BLOCK_TO_SECTION = { + "block-vav": "background", + "block-zayin": "claims", + "block-yod": "discussion", + "block-yod-alef": "summary", +} + + +def count_anti_patterns(text: str) -> dict: + """Count each anti-pattern occurrence in text. Lower = closer to Dafna.""" + hits = {} + total = 0 + for ap in ANTI_PATTERNS: + n = len(re.findall(ap["regex"], text or "")) + if n: + hits[ap["name"]] = {"count": n, "note": ap["note"]} + total += n + return {"total": total, "by_pattern": hits} + + +def golden_ratio_adherence(block_word_counts: dict[str, int], outcome: str) -> dict: + """% of total per section vs GOLDEN_RATIOS target range. deviation=0 ⇒ within range.""" + outcome = canonical_outcome(outcome) + targets = GOLDEN_RATIOS.get(outcome) + total = sum(block_word_counts.values()) + if not targets or total == 0: + return {"outcome": outcome, "total_words": total, "sections": {}, "max_deviation": None} + + sections = {} + max_dev = 0.0 + for block_id, section in _BLOCK_TO_SECTION.items(): + if section not in targets: + continue + pct = round(block_word_counts.get(block_id, 0) / total * 100, 1) + lo, hi = targets[section] + if pct < lo: + dev = round(lo - pct, 1) + elif pct > hi: + dev = round(pct - hi, 1) + else: + dev = 0.0 + max_dev = max(max_dev, dev) + sections[section] = {"actual_pct": pct, "target": [lo, hi], "deviation_pp": dev} + return {"outcome": outcome, "total_words": total, "sections": sections, "max_deviation": max_dev} + + +async def style_distance(case_number: str) -> dict: + """Assemble the 3 style-distance components for one case (T7).""" + case = await db.get_case_by_number(case_number) + if not case: + return {"error": f"case {case_number} not found"} + case_id = UUID(case["id"]) + decision = await db.get_decision_by_case(case_id) + outcome = (decision or {}).get("outcome", "rejection") + + pool = await db.get_pool() + async with pool.acquire() as conn: + block_rows = [] + draft_text = "" + if decision: + block_rows = await conn.fetch( + "SELECT block_id, content, word_count FROM decision_blocks " + "WHERE decision_id = $1 ORDER BY block_index", + UUID(decision["id"]), + ) + draft_text = "\n\n".join(b["content"] for b in block_rows if b["content"]) + pair = await conn.fetchrow( + "SELECT draft_text, diff_stats, status FROM draft_final_pairs " + "WHERE case_id = $1 ORDER BY created_at DESC LIMIT 1", + case_id, + ) + + # Prefer the immutable snapshot's draft text when present. + if pair and pair["draft_text"]: + draft_text = pair["draft_text"] + + word_counts = {b["block_id"]: (b["word_count"] or 0) for b in block_rows} + ratios = golden_ratio_adherence(word_counts, outcome) + anti = count_anti_patterns(draft_text) + + diff = None + if pair and pair["diff_stats"]: + raw = pair["diff_stats"] + if isinstance(raw, str): + import json + try: + raw = json.loads(raw) + except (json.JSONDecodeError, TypeError): + raw = None + diff = raw + + return { + "case_number": case_number, + "outcome": canonical_outcome(outcome), + "golden_ratio_adherence": ratios, + "anti_pattern_hits": anti, + "draft_to_final_diff": diff, + "pair_status": pair["status"] if pair else None, + "summary": { + "ratio_max_deviation_pp": ratios.get("max_deviation"), + "anti_pattern_total": anti["total"], + "change_percent": (diff or {}).get("change_percent") if diff else None, + }, + }