feat(#99 / T10): get_style_guide — יחסי-זהב נמדדים מהקורפוס לצד היעד

style_distance.measure_corpus_ratios(): מפצל כל החלטה ב-style_corpus לסעיפים (chunker) ומחשב ממוצע %-סעיף — אגרגט "_all" + פר-תוצאה (כשיש). cached. get_style_guide מציג שורת "נמדד בפועל" עם ⚠️ על פער מטווח-היעד. מצב נוכחי: style_corpus.outcome לא מאוכלס → מוצג אגרגט כל-ההחלטות (n=48: רקע 26.4% / טענות 9.7% / דיון 43.8% / סיכום 20.1%); פיצול לפי-תוצאה future-ready. המדידה גם מאירה מגבלות זיהוי-סעיפים (כוונת T10 — לסמן פער לבדיקה). חופף-חלקית ל-T7 שמודד adherence per-draft; זה מודד את הקורפוס. כשל מדידה מוצג, לא נבלע. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-06 21:01:42 +00:00
parent a571ad535b
commit e4651a9d06
2 changed files with 91 additions and 0 deletions
--- a/mcp-server/src/legal_mcp/services/style_distance.py
+++ b/mcp-server/src/legal_mcp/services/style_distance.py
@@ -27,6 +27,62 @@ _BLOCK_TO_SECTION = {
    "block-yod-alef": "summary",
 }

+# chunker section_type → golden-ratio section (for corpus measurement, T10)
+_CHUNK_SECTION_TO_GOLDEN = {
+    "facts": "background", "intro": "background",
+    "appellant_claims": "claims", "respondent_claims": "claims",
+    "legal_analysis": "discussion",
+    "conclusion": "summary", "ruling": "summary",
+}
+
+_CORPUS_RATIOS_CACHE: dict | None = None
+
+
+async def measure_corpus_ratios() -> dict:
+    """Measure ACTUAL section %-of-total from Dafna's style_corpus, averaged per
+    outcome — the empirical counterpart to lessons.GOLDEN_RATIOS (T10). Splits each
+    decision via chunker (accurate, not the filtered exemplars). Cached for the
+    process. Returns {outcome: {"n": int, "sections": {sec: pct}}}."""
+    global _CORPUS_RATIOS_CACHE
+    if _CORPUS_RATIOS_CACHE is not None:
+        return _CORPUS_RATIOS_CACHE
+
+    from legal_mcp.services.chunker import _split_into_sections
+    pool = await db.get_pool()
+    async with pool.acquire() as conn:
+        rows = await conn.fetch("SELECT full_text, outcome FROM style_corpus WHERE full_text <> ''")
+
+    # Per-outcome AND an "_all" aggregate. style_corpus.outcome is currently
+    # unpopulated for the imported corpus, so per-outcome may be empty — "_all"
+    # is the meaningful signal today, and per-outcome becomes live once outcomes
+    # are backfilled. No silent loss: callers see which buckets have data via n.
+    by_outcome: dict[str, list[dict]] = {}
+    for r in rows:
+        sect_words: dict[str, int] = {}
+        for stype, stext in _split_into_sections(r["full_text"]):
+            g = _CHUNK_SECTION_TO_GOLDEN.get(stype)
+            if g:
+                sect_words[g] = sect_words.get(g, 0) + len(stext.split())
+        total = sum(sect_words.values())
+        if total < 100:  # sections didn't parse — skip
+            continue
+        pct = {s: w / total * 100 for s, w in sect_words.items()}
+        by_outcome.setdefault("_all", []).append(pct)
+        outcome = canonical_outcome(r["outcome"] or "")
+        if outcome:
+            by_outcome.setdefault(outcome, []).append(pct)
+
+    result: dict = {}
+    for outcome, decs in by_outcome.items():
+        avg = {}
+        for sec in ("background", "claims", "discussion", "summary"):
+            vals = [d.get(sec, 0.0) for d in decs]
+            if vals:
+                avg[sec] = round(sum(vals) / len(vals), 1)
+        result[outcome] = {"n": len(decs), "sections": avg}
+    _CORPUS_RATIOS_CACHE = result
+    return result
+

 def count_anti_patterns(text: str) -> dict:
    """Count each anti-pattern occurrence in text. Lower = closer to Dafna."""
--- a/mcp-server/src/legal_mcp/tools/drafting.py
+++ b/mcp-server/src/legal_mcp/tools/drafting.py
@@ -170,6 +170,41 @@ async def get_style_guide() -> str:
    )
    result += "\n"

+    # T10 — measured-from-corpus ratios alongside the targets, ⚠️ flags a gap
+    # (actual average outside the target range → revisit the target or the corpus).
+    try:
+        from legal_mcp.services.style_distance import measure_corpus_ratios
+        measured = await measure_corpus_ratios()
+        if measured:
+            result += "### נמדד מהקורפוס בפועל (ממוצע) — ⚠️ = פער מהיעד\n\n"
+            result += "| קבוצה | רקע | טענות | דיון | סיכום |\n|---|------|-------|------|-------|\n"
+            # Per-outcome rows (flagged vs that outcome's target), when outcomes exist.
+            for outcome in VALID_OUTCOMES:
+                m = measured.get(outcome)
+                if not m:
+                    continue
+                tgt = GOLDEN_RATIOS[outcome]
+                cells = []
+                for sec in ("background", "claims", "discussion", "summary"):
+                    val = m["sections"].get(sec)
+                    if val is None:
+                        cells.append("—")
+                        continue
+                    lo, hi = tgt[sec]
+                    cells.append(f"{val}%" + ("" if lo <= val <= hi else " ⚠️"))
+                result += f"| {outcome_labels[outcome]} (n={m['n']}) | " + " | ".join(cells) + " |\n"
+            # "_all" aggregate — the meaningful row today (corpus outcome unpopulated);
+            # shown informationally (no single target to flag against).
+            allm = measured.get("_all")
+            if allm:
+                cells = [f"{allm['sections'].get(s, '—')}%" if allm['sections'].get(s) is not None else "—"
+                         for s in ("background", "claims", "discussion", "summary")]
+                result += f"| כל ההחלטות (n={allm['n']}) | " + " | ".join(cells) + " |\n"
+            result += ("\n_⚠️ = הממוצע בפועל חורג מטווח-היעד; שקול לעדכן יעד ב-/methodology או לבדוק את הקורפוס. "
+                       "פיצול לפי-תוצאה יופיע כש-`style_corpus.outcome` יאוכלס._\n\n")
+    except Exception as e:  # surfaced, not swallowed
+        result += f"_מדידת יחסי-זהב מהקורפוס נכשלה: {e}_\n\n"
+
    # Opening and summary strategies
    result += "## אסטרטגיות פתיחה וסיכום לפי תוצאה\n\n"
    for outcome in VALID_OUTCOMES: