Merge pull request 'feat(style-acq T7): מדד מרחק-סגנון — סוגר את ה-MVP' (#76) from worktree-style-acquisition-mvp into main
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 6m34s
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 6m34s
This commit was merged in pull request #76.
This commit is contained in:
@@ -528,6 +528,16 @@ async def get_style_guide() -> str:
|
|||||||
return await drafting.get_style_guide()
|
return await drafting.get_style_guide()
|
||||||
|
|
||||||
|
|
||||||
|
@mcp.tool()
|
||||||
|
async def style_distance(case_number: str) -> str:
|
||||||
|
"""מדד מרחק-סגנון (T7) — האם הטיוטה מתכנסת לסגנון דפנה: סטיית יחסי-זהב,
|
||||||
|
ספירת אנטי-דפוסים, ושיעור-השינוי draft→final מפנקס-ההתאמה. ללא LLM."""
|
||||||
|
import json as _json
|
||||||
|
from legal_mcp.services import style_distance as _sd
|
||||||
|
result = await _sd.style_distance(case_number)
|
||||||
|
return _json.dumps(result, ensure_ascii=False, indent=2)
|
||||||
|
|
||||||
|
|
||||||
@mcp.tool()
|
@mcp.tool()
|
||||||
async def draft_section(
|
async def draft_section(
|
||||||
case_number: str,
|
case_number: str,
|
||||||
|
|||||||
@@ -42,6 +42,25 @@ GOLDEN_RATIOS: dict[str, dict[str, tuple[int, int]]] = {
|
|||||||
"partial_acceptance": {"background": (25, 35), "claims": (25, 30), "discussion": (40, 47), "summary": (2, 3)},
|
"partial_acceptance": {"background": (25, 35), "claims": (25, 30), "discussion": (40, 47), "summary": (2, 3)},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# ── Anti-patterns (what Dafna avoids) — detectable signals for style-distance (T7) ──
|
||||||
|
# Derived from daphna-voice-fingerprint.md §3 (corrected 2026-06-06: sequential
|
||||||
|
# paragraph numbering is REQUIRED — applied as Word auto-numbering at export — so the
|
||||||
|
# anti-pattern is MANUAL numbers typed as text, not numbering itself).
|
||||||
|
ANTI_PATTERNS: list[dict] = [
|
||||||
|
{"name": "manual_paragraph_numbers",
|
||||||
|
"regex": r"(?m)^\s*\d{1,3}\.\s",
|
||||||
|
"note": "מספרים ידניים כטקסט בראש פסקה — אמורים להיות auto-numbering בייצוא"},
|
||||||
|
{"name": "inline_numbered_fragments",
|
||||||
|
"regex": r"\([0-9]\)[^\n]{0,200}\([0-9]\)",
|
||||||
|
"note": "פיצול טיעון לרשימת-מיני (1)...(2) בתוך פסקת-אנליזה"},
|
||||||
|
{"name": "markdown_headers",
|
||||||
|
"regex": r"(?m)^#{1,6}\s",
|
||||||
|
"note": "כותרות markdown — אינן בהחלטה הסופית"},
|
||||||
|
{"name": "bullet_lists",
|
||||||
|
"regex": r"(?m)^\s*[-*•]\s",
|
||||||
|
"note": "רשימות תבליטים באנליזה — דפנה כותבת נרטיב רציף"},
|
||||||
|
]
|
||||||
|
|
||||||
# ── Paragraph length guidance (word counts) ────────────────────────
|
# ── Paragraph length guidance (word counts) ────────────────────────
|
||||||
|
|
||||||
PARAGRAPH_LENGTHS = {
|
PARAGRAPH_LENGTHS = {
|
||||||
|
|||||||
126
mcp-server/src/legal_mcp/services/style_distance.py
Normal file
126
mcp-server/src/legal_mcp/services/style_distance.py
Normal file
@@ -0,0 +1,126 @@
|
|||||||
|
"""מדד מרחק-סגנון (T7) — האם הטיוטות מתכנסות לדפנה לאורך זמן.
|
||||||
|
|
||||||
|
שלושה רכיבים, כולם ללא LLM (דטרמיניסטי, זול):
|
||||||
|
1. golden_ratio_adherence — סטיית אחוזי-הסעיפים מ-GOLDEN_RATIOS לפי תוצאה.
|
||||||
|
2. anti_pattern_hits — ספירת אנטי-דפוסים (מ-lessons.ANTI_PATTERNS) בטקסט הטיוטה.
|
||||||
|
3. draft_to_final_diff — change_percent מ-draft_final_pairs (ככל שיורד → מתכנס).
|
||||||
|
|
||||||
|
זהו מטא-אות על בריאות-הלמידה (INV-LRN4) — נצרך ע"י לוח-מחוונים / QA, לא ע"י הכותב.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
|
from legal_mcp.services import db
|
||||||
|
from legal_mcp.services.lessons import ANTI_PATTERNS, GOLDEN_RATIOS, canonical_outcome
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# block_id → golden-ratio section
|
||||||
|
_BLOCK_TO_SECTION = {
|
||||||
|
"block-vav": "background",
|
||||||
|
"block-zayin": "claims",
|
||||||
|
"block-yod": "discussion",
|
||||||
|
"block-yod-alef": "summary",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def count_anti_patterns(text: str) -> dict:
|
||||||
|
"""Count each anti-pattern occurrence in text. Lower = closer to Dafna."""
|
||||||
|
hits = {}
|
||||||
|
total = 0
|
||||||
|
for ap in ANTI_PATTERNS:
|
||||||
|
n = len(re.findall(ap["regex"], text or ""))
|
||||||
|
if n:
|
||||||
|
hits[ap["name"]] = {"count": n, "note": ap["note"]}
|
||||||
|
total += n
|
||||||
|
return {"total": total, "by_pattern": hits}
|
||||||
|
|
||||||
|
|
||||||
|
def golden_ratio_adherence(block_word_counts: dict[str, int], outcome: str) -> dict:
|
||||||
|
"""% of total per section vs GOLDEN_RATIOS target range. deviation=0 ⇒ within range."""
|
||||||
|
outcome = canonical_outcome(outcome)
|
||||||
|
targets = GOLDEN_RATIOS.get(outcome)
|
||||||
|
total = sum(block_word_counts.values())
|
||||||
|
if not targets or total == 0:
|
||||||
|
return {"outcome": outcome, "total_words": total, "sections": {}, "max_deviation": None}
|
||||||
|
|
||||||
|
sections = {}
|
||||||
|
max_dev = 0.0
|
||||||
|
for block_id, section in _BLOCK_TO_SECTION.items():
|
||||||
|
if section not in targets:
|
||||||
|
continue
|
||||||
|
pct = round(block_word_counts.get(block_id, 0) / total * 100, 1)
|
||||||
|
lo, hi = targets[section]
|
||||||
|
if pct < lo:
|
||||||
|
dev = round(lo - pct, 1)
|
||||||
|
elif pct > hi:
|
||||||
|
dev = round(pct - hi, 1)
|
||||||
|
else:
|
||||||
|
dev = 0.0
|
||||||
|
max_dev = max(max_dev, dev)
|
||||||
|
sections[section] = {"actual_pct": pct, "target": [lo, hi], "deviation_pp": dev}
|
||||||
|
return {"outcome": outcome, "total_words": total, "sections": sections, "max_deviation": max_dev}
|
||||||
|
|
||||||
|
|
||||||
|
async def style_distance(case_number: str) -> dict:
|
||||||
|
"""Assemble the 3 style-distance components for one case (T7)."""
|
||||||
|
case = await db.get_case_by_number(case_number)
|
||||||
|
if not case:
|
||||||
|
return {"error": f"case {case_number} not found"}
|
||||||
|
case_id = UUID(case["id"])
|
||||||
|
decision = await db.get_decision_by_case(case_id)
|
||||||
|
outcome = (decision or {}).get("outcome", "rejection")
|
||||||
|
|
||||||
|
pool = await db.get_pool()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
block_rows = []
|
||||||
|
draft_text = ""
|
||||||
|
if decision:
|
||||||
|
block_rows = await conn.fetch(
|
||||||
|
"SELECT block_id, content, word_count FROM decision_blocks "
|
||||||
|
"WHERE decision_id = $1 ORDER BY block_index",
|
||||||
|
UUID(decision["id"]),
|
||||||
|
)
|
||||||
|
draft_text = "\n\n".join(b["content"] for b in block_rows if b["content"])
|
||||||
|
pair = await conn.fetchrow(
|
||||||
|
"SELECT draft_text, diff_stats, status FROM draft_final_pairs "
|
||||||
|
"WHERE case_id = $1 ORDER BY created_at DESC LIMIT 1",
|
||||||
|
case_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Prefer the immutable snapshot's draft text when present.
|
||||||
|
if pair and pair["draft_text"]:
|
||||||
|
draft_text = pair["draft_text"]
|
||||||
|
|
||||||
|
word_counts = {b["block_id"]: (b["word_count"] or 0) for b in block_rows}
|
||||||
|
ratios = golden_ratio_adherence(word_counts, outcome)
|
||||||
|
anti = count_anti_patterns(draft_text)
|
||||||
|
|
||||||
|
diff = None
|
||||||
|
if pair and pair["diff_stats"]:
|
||||||
|
raw = pair["diff_stats"]
|
||||||
|
if isinstance(raw, str):
|
||||||
|
import json
|
||||||
|
try:
|
||||||
|
raw = json.loads(raw)
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
raw = None
|
||||||
|
diff = raw
|
||||||
|
|
||||||
|
return {
|
||||||
|
"case_number": case_number,
|
||||||
|
"outcome": canonical_outcome(outcome),
|
||||||
|
"golden_ratio_adherence": ratios,
|
||||||
|
"anti_pattern_hits": anti,
|
||||||
|
"draft_to_final_diff": diff,
|
||||||
|
"pair_status": pair["status"] if pair else None,
|
||||||
|
"summary": {
|
||||||
|
"ratio_max_deviation_pp": ratios.get("max_deviation"),
|
||||||
|
"anti_pattern_total": anti["total"],
|
||||||
|
"change_percent": (diff or {}).get("change_percent") if diff else None,
|
||||||
|
},
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user