diff --git a/mcp-server/src/legal_mcp/services/style_distance.py b/mcp-server/src/legal_mcp/services/style_distance.py index 53f9e65..5718d85 100644 --- a/mcp-server/src/legal_mcp/services/style_distance.py +++ b/mcp-server/src/legal_mcp/services/style_distance.py @@ -27,6 +27,62 @@ _BLOCK_TO_SECTION = { "block-yod-alef": "summary", } +# chunker section_type → golden-ratio section (for corpus measurement, T10) +_CHUNK_SECTION_TO_GOLDEN = { + "facts": "background", "intro": "background", + "appellant_claims": "claims", "respondent_claims": "claims", + "legal_analysis": "discussion", + "conclusion": "summary", "ruling": "summary", +} + +_CORPUS_RATIOS_CACHE: dict | None = None + + +async def measure_corpus_ratios() -> dict: + """Measure ACTUAL section %-of-total from Dafna's style_corpus, averaged per + outcome — the empirical counterpart to lessons.GOLDEN_RATIOS (T10). Splits each + decision via chunker (accurate, not the filtered exemplars). Cached for the + process. Returns {outcome: {"n": int, "sections": {sec: pct}}}.""" + global _CORPUS_RATIOS_CACHE + if _CORPUS_RATIOS_CACHE is not None: + return _CORPUS_RATIOS_CACHE + + from legal_mcp.services.chunker import _split_into_sections + pool = await db.get_pool() + async with pool.acquire() as conn: + rows = await conn.fetch("SELECT full_text, outcome FROM style_corpus WHERE full_text <> ''") + + # Per-outcome AND an "_all" aggregate. style_corpus.outcome is currently + # unpopulated for the imported corpus, so per-outcome may be empty — "_all" + # is the meaningful signal today, and per-outcome becomes live once outcomes + # are backfilled. No silent loss: callers see which buckets have data via n. + by_outcome: dict[str, list[dict]] = {} + for r in rows: + sect_words: dict[str, int] = {} + for stype, stext in _split_into_sections(r["full_text"]): + g = _CHUNK_SECTION_TO_GOLDEN.get(stype) + if g: + sect_words[g] = sect_words.get(g, 0) + len(stext.split()) + total = sum(sect_words.values()) + if total < 100: # sections didn't parse — skip + continue + pct = {s: w / total * 100 for s, w in sect_words.items()} + by_outcome.setdefault("_all", []).append(pct) + outcome = canonical_outcome(r["outcome"] or "") + if outcome: + by_outcome.setdefault(outcome, []).append(pct) + + result: dict = {} + for outcome, decs in by_outcome.items(): + avg = {} + for sec in ("background", "claims", "discussion", "summary"): + vals = [d.get(sec, 0.0) for d in decs] + if vals: + avg[sec] = round(sum(vals) / len(vals), 1) + result[outcome] = {"n": len(decs), "sections": avg} + _CORPUS_RATIOS_CACHE = result + return result + def count_anti_patterns(text: str) -> dict: """Count each anti-pattern occurrence in text. Lower = closer to Dafna.""" diff --git a/mcp-server/src/legal_mcp/tools/drafting.py b/mcp-server/src/legal_mcp/tools/drafting.py index 89db307..05c1269 100644 --- a/mcp-server/src/legal_mcp/tools/drafting.py +++ b/mcp-server/src/legal_mcp/tools/drafting.py @@ -170,6 +170,41 @@ async def get_style_guide() -> str: ) result += "\n" + # T10 — measured-from-corpus ratios alongside the targets, ⚠️ flags a gap + # (actual average outside the target range → revisit the target or the corpus). + try: + from legal_mcp.services.style_distance import measure_corpus_ratios + measured = await measure_corpus_ratios() + if measured: + result += "### נמדד מהקורפוס בפועל (ממוצע) — ⚠️ = פער מהיעד\n\n" + result += "| קבוצה | רקע | טענות | דיון | סיכום |\n|---|------|-------|------|-------|\n" + # Per-outcome rows (flagged vs that outcome's target), when outcomes exist. + for outcome in VALID_OUTCOMES: + m = measured.get(outcome) + if not m: + continue + tgt = GOLDEN_RATIOS[outcome] + cells = [] + for sec in ("background", "claims", "discussion", "summary"): + val = m["sections"].get(sec) + if val is None: + cells.append("—") + continue + lo, hi = tgt[sec] + cells.append(f"{val}%" + ("" if lo <= val <= hi else " ⚠️")) + result += f"| {outcome_labels[outcome]} (n={m['n']}) | " + " | ".join(cells) + " |\n" + # "_all" aggregate — the meaningful row today (corpus outcome unpopulated); + # shown informationally (no single target to flag against). + allm = measured.get("_all") + if allm: + cells = [f"{allm['sections'].get(s, '—')}%" if allm['sections'].get(s) is not None else "—" + for s in ("background", "claims", "discussion", "summary")] + result += f"| כל ההחלטות (n={allm['n']}) | " + " | ".join(cells) + " |\n" + result += ("\n_⚠️ = הממוצע בפועל חורג מטווח-היעד; שקול לעדכן יעד ב-/methodology או לבדוק את הקורפוס. " + "פיצול לפי-תוצאה יופיע כש-`style_corpus.outcome` יאוכלס._\n\n") + except Exception as e: # surfaced, not swallowed + result += f"_מדידת יחסי-זהב מהקורפוס נכשלה: {e}_\n\n" + # Opening and summary strategies result += "## אסטרטגיות פתיחה וסיכום לפי תוצאה\n\n" for outcome in VALID_OUTCOMES: