Merge pull request 'feat(style-acq T7): מדד מרחק-סגנון — סוגר את ה-MVP' (#76) from worktree-style-acquisition-mvp into main
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 6m34s

This commit was merged in pull request #76.
This commit is contained in:
2026-06-06 17:33:50 +00:00
3 changed files with 155 additions and 0 deletions

View File

@@ -528,6 +528,16 @@ async def get_style_guide() -> str:
return await drafting.get_style_guide() return await drafting.get_style_guide()
@mcp.tool()
async def style_distance(case_number: str) -> str:
"""מדד מרחק-סגנון (T7) — האם הטיוטה מתכנסת לסגנון דפנה: סטיית יחסי-זהב,
ספירת אנטי-דפוסים, ושיעור-השינוי draft→final מפנקס-ההתאמה. ללא LLM."""
import json as _json
from legal_mcp.services import style_distance as _sd
result = await _sd.style_distance(case_number)
return _json.dumps(result, ensure_ascii=False, indent=2)
@mcp.tool() @mcp.tool()
async def draft_section( async def draft_section(
case_number: str, case_number: str,

View File

@@ -42,6 +42,25 @@ GOLDEN_RATIOS: dict[str, dict[str, tuple[int, int]]] = {
"partial_acceptance": {"background": (25, 35), "claims": (25, 30), "discussion": (40, 47), "summary": (2, 3)}, "partial_acceptance": {"background": (25, 35), "claims": (25, 30), "discussion": (40, 47), "summary": (2, 3)},
} }
# ── Anti-patterns (what Dafna avoids) — detectable signals for style-distance (T7) ──
# Derived from daphna-voice-fingerprint.md §3 (corrected 2026-06-06: sequential
# paragraph numbering is REQUIRED — applied as Word auto-numbering at export — so the
# anti-pattern is MANUAL numbers typed as text, not numbering itself).
ANTI_PATTERNS: list[dict] = [
{"name": "manual_paragraph_numbers",
"regex": r"(?m)^\s*\d{1,3}\.\s",
"note": "מספרים ידניים כטקסט בראש פסקה — אמורים להיות auto-numbering בייצוא"},
{"name": "inline_numbered_fragments",
"regex": r"\([0-9]\)[^\n]{0,200}\([0-9]\)",
"note": "פיצול טיעון לרשימת-מיני (1)...(2) בתוך פסקת-אנליזה"},
{"name": "markdown_headers",
"regex": r"(?m)^#{1,6}\s",
"note": "כותרות markdown — אינן בהחלטה הסופית"},
{"name": "bullet_lists",
"regex": r"(?m)^\s*[-*•]\s",
"note": "רשימות תבליטים באנליזה — דפנה כותבת נרטיב רציף"},
]
# ── Paragraph length guidance (word counts) ──────────────────────── # ── Paragraph length guidance (word counts) ────────────────────────
PARAGRAPH_LENGTHS = { PARAGRAPH_LENGTHS = {

View File

@@ -0,0 +1,126 @@
"""מדד מרחק-סגנון (T7) — האם הטיוטות מתכנסות לדפנה לאורך זמן.
שלושה רכיבים, כולם ללא LLM (דטרמיניסטי, זול):
1. golden_ratio_adherence — סטיית אחוזי-הסעיפים מ-GOLDEN_RATIOS לפי תוצאה.
2. anti_pattern_hits — ספירת אנטי-דפוסים (מ-lessons.ANTI_PATTERNS) בטקסט הטיוטה.
3. draft_to_final_diff — change_percent מ-draft_final_pairs (ככל שיורד → מתכנס).
זהו מטא-אות על בריאות-הלמידה (INV-LRN4) — נצרך ע"י לוח-מחוונים / QA, לא ע"י הכותב.
"""
from __future__ import annotations
import logging
import re
from uuid import UUID
from legal_mcp.services import db
from legal_mcp.services.lessons import ANTI_PATTERNS, GOLDEN_RATIOS, canonical_outcome
logger = logging.getLogger(__name__)
# block_id → golden-ratio section
_BLOCK_TO_SECTION = {
"block-vav": "background",
"block-zayin": "claims",
"block-yod": "discussion",
"block-yod-alef": "summary",
}
def count_anti_patterns(text: str) -> dict:
"""Count each anti-pattern occurrence in text. Lower = closer to Dafna."""
hits = {}
total = 0
for ap in ANTI_PATTERNS:
n = len(re.findall(ap["regex"], text or ""))
if n:
hits[ap["name"]] = {"count": n, "note": ap["note"]}
total += n
return {"total": total, "by_pattern": hits}
def golden_ratio_adherence(block_word_counts: dict[str, int], outcome: str) -> dict:
"""% of total per section vs GOLDEN_RATIOS target range. deviation=0 ⇒ within range."""
outcome = canonical_outcome(outcome)
targets = GOLDEN_RATIOS.get(outcome)
total = sum(block_word_counts.values())
if not targets or total == 0:
return {"outcome": outcome, "total_words": total, "sections": {}, "max_deviation": None}
sections = {}
max_dev = 0.0
for block_id, section in _BLOCK_TO_SECTION.items():
if section not in targets:
continue
pct = round(block_word_counts.get(block_id, 0) / total * 100, 1)
lo, hi = targets[section]
if pct < lo:
dev = round(lo - pct, 1)
elif pct > hi:
dev = round(pct - hi, 1)
else:
dev = 0.0
max_dev = max(max_dev, dev)
sections[section] = {"actual_pct": pct, "target": [lo, hi], "deviation_pp": dev}
return {"outcome": outcome, "total_words": total, "sections": sections, "max_deviation": max_dev}
async def style_distance(case_number: str) -> dict:
"""Assemble the 3 style-distance components for one case (T7)."""
case = await db.get_case_by_number(case_number)
if not case:
return {"error": f"case {case_number} not found"}
case_id = UUID(case["id"])
decision = await db.get_decision_by_case(case_id)
outcome = (decision or {}).get("outcome", "rejection")
pool = await db.get_pool()
async with pool.acquire() as conn:
block_rows = []
draft_text = ""
if decision:
block_rows = await conn.fetch(
"SELECT block_id, content, word_count FROM decision_blocks "
"WHERE decision_id = $1 ORDER BY block_index",
UUID(decision["id"]),
)
draft_text = "\n\n".join(b["content"] for b in block_rows if b["content"])
pair = await conn.fetchrow(
"SELECT draft_text, diff_stats, status FROM draft_final_pairs "
"WHERE case_id = $1 ORDER BY created_at DESC LIMIT 1",
case_id,
)
# Prefer the immutable snapshot's draft text when present.
if pair and pair["draft_text"]:
draft_text = pair["draft_text"]
word_counts = {b["block_id"]: (b["word_count"] or 0) for b in block_rows}
ratios = golden_ratio_adherence(word_counts, outcome)
anti = count_anti_patterns(draft_text)
diff = None
if pair and pair["diff_stats"]:
raw = pair["diff_stats"]
if isinstance(raw, str):
import json
try:
raw = json.loads(raw)
except (json.JSONDecodeError, TypeError):
raw = None
diff = raw
return {
"case_number": case_number,
"outcome": canonical_outcome(outcome),
"golden_ratio_adherence": ratios,
"anti_pattern_hits": anti,
"draft_to_final_diff": diff,
"pair_status": pair["status"] if pair else None,
"summary": {
"ratio_max_deviation_pp": ratios.get("max_deviation"),
"anti_pattern_total": anti["total"],
"change_percent": (diff or {}).get("change_percent") if diff else None,
},
}