diff --git a/scripts/backfill_pattern_frequency.py b/scripts/backfill_pattern_frequency.py new file mode 100644 index 0000000..6ba3fe6 --- /dev/null +++ b/scripts/backfill_pattern_frequency.py @@ -0,0 +1,163 @@ +"""Backfill style_patterns.frequency with real occurrence counts. + +The analyzer currently stores frequency=1 for every pattern (it only extracts +unique patterns, doesn't count occurrences). This script scans the full_text +of every decision in style_corpus and updates each pattern's frequency to +the true count of decisions containing the pattern_text as a substring. + +Run once after analysis, and again whenever new decisions are added. +""" + +from __future__ import annotations + +import asyncio +import os +import re +import sys +import unicodedata +from pathlib import Path + +# Load env +for line in (Path.home() / ".env").read_text().splitlines(): + if "=" in line and not line.startswith("#"): + k, v = line.split("=", 1) + os.environ.setdefault(k.strip(), v.strip().strip('"').strip("'")) + +sys.path.insert(0, "/home/chaim/legal-ai/mcp-server/src") + +from legal_mcp.services import db as db_mod # noqa: E402 + + +def _strip_nikud(text: str) -> str: + """Remove Hebrew combining marks (nikud) for robust matching.""" + return "".join( + c for c in unicodedata.normalize("NFD", text) + if not unicodedata.combining(c) + ) + + +def _extract_searchable_variants(pattern_text: str) -> list[str]: + """Extract searchable substrings from a pattern template. + + The analyzer stores patterns as templates with: + - Placeholders in [brackets]: "בפנינו ערר על החלטת [הגוף] מיום [תאריך]" + - Alternatives separated by / : "נפנה ל... / ראה והשווה / נפנה להחלטה" + - Ellipsis ... for variable parts + + This function returns a list of concrete substrings to search for. + We pick the longest fixed segment from each alternative (>= 4 chars) + so that matching is specific enough to be meaningful but still flexible. + """ + # Split on " / " or " או " to get alternatives + alternatives = re.split(r"\s*/\s*|\s+או\s+", pattern_text) + + variants: list[str] = [] + for alt in alternatives: + alt = alt.strip() + if not alt: + continue + + # Remove bracket placeholders [X] + alt = re.sub(r"\[[^\]]*\]", "|", alt) + # Replace ellipsis with separator + alt = re.sub(r"\.{2,}", "|", alt) + # Remove ellipsis unicode + alt = alt.replace("…", "|") + + # Split on the | separator and take fixed segments + segments = [s.strip(" ,.:;\"'") for s in alt.split("|")] + # Keep segments long enough to be meaningful (>= 4 chars, not just common words) + good = [s for s in segments if len(s) >= 4] + + if good: + # Use the longest segment as the key variant for this alternative + variants.append(max(good, key=len)) + elif alt.strip(): + # Fallback: use the whole cleaned alternative + stripped = alt.replace("|", " ").strip() + if len(stripped) >= 4: + variants.append(stripped) + + # Deduplicate while preserving order + seen = set() + unique = [] + for v in variants: + if v not in seen: + seen.add(v) + unique.append(v) + return unique + + +def _count_decisions_containing(variants: list[str], normalized_decisions: list) -> int: + """Count how many decisions contain ANY of the variants.""" + count = 0 + for _, _, text in normalized_decisions: + if any(v in text for v in variants): + count += 1 + return count + + +async def main() -> int: + pool = await db_mod.get_pool() + + async with pool.acquire() as conn: + decisions = await conn.fetch( + "SELECT id, decision_number, full_text FROM style_corpus " + "WHERE full_text IS NOT NULL AND length(full_text) > 0" + ) + patterns = await conn.fetch( + "SELECT id, pattern_text, pattern_type FROM style_patterns" + ) + + print(f"Scanning {len(patterns)} patterns across {len(decisions)} decisions...") + + # Normalize decisions once + normalized_decisions = [ + (d["id"], d["decision_number"], _strip_nikud(d["full_text"])) + for d in decisions + ] + + updates = [] + for p in patterns: + pattern_text = p["pattern_text"] + if not pattern_text or len(pattern_text) < 3: + updates.append((0, p["id"])) + continue + + variants = _extract_searchable_variants(_strip_nikud(pattern_text)) + if not variants: + updates.append((0, p["id"])) + continue + + count = _count_decisions_containing(variants, normalized_decisions) + updates.append((count, p["id"])) + + await conn.executemany( + "UPDATE style_patterns SET frequency = $1 WHERE id = $2", + updates, + ) + + # Show distribution + rows = await conn.fetch( + "SELECT pattern_type, pattern_text, frequency " + "FROM style_patterns " + "ORDER BY frequency DESC " + "LIMIT 15" + ) + print(f"\nTop 15 patterns by real frequency:") + for r in rows: + print(f" {r['frequency']:>3} [{r['pattern_type']:<22}] {r['pattern_text'][:90]}") + + dist = await conn.fetch( + "SELECT frequency, count(*) FROM style_patterns " + "GROUP BY frequency ORDER BY frequency DESC" + ) + print(f"\nFrequency distribution:") + for r in dist: + print(f" frequency={r['frequency']:>3} → {r['count']} patterns") + + return 0 + + +if __name__ == "__main__": + sys.exit(asyncio.run(main())) diff --git a/web/app.py b/web/app.py index 46c9820..5b30731 100644 --- a/web/app.py +++ b/web/app.py @@ -390,6 +390,369 @@ async def training_analyze_style_status(): return state +# ── Style Report — visual dashboard data ───────────────────────── + + +_SECTION_TYPE_HEBREW = { + "intro": "פתיחה", + "facts": "רקע", + "appellant_claims": "טענות העורר", + "respondent_claims": "טענות המשיב", + "legal_analysis": "דיון משפטי", + "ruling": "הכרעה", + "conclusion": "סוף דבר", +} + +_SECTION_DISPLAY_ORDER = [ + "intro", "facts", "appellant_claims", "respondent_claims", + "legal_analysis", "ruling", "conclusion", +] + + +def _strip_nikud(text: str) -> str: + import unicodedata + return "".join( + c for c in unicodedata.normalize("NFD", text) + if not unicodedata.combining(c) + ) + + +def _extract_pattern_variants(pattern_text: str) -> list[str]: + """Mirror of scripts/backfill_pattern_frequency.py logic for matching.""" + alternatives = re.split(r"\s*/\s*|\s+או\s+", pattern_text) + variants: list[str] = [] + for alt in alternatives: + alt = alt.strip() + if not alt: + continue + alt = re.sub(r"\[[^\]]*\]", "|", alt) + alt = re.sub(r"\.{2,}", "|", alt) + alt = alt.replace("…", "|") + segments = [s.strip(" ,.:;\"'") for s in alt.split("|")] + good = [s for s in segments if len(s) >= 4] + if good: + variants.append(max(good, key=len)) + return list(dict.fromkeys(variants)) + + +async def _compute_corpus_stats(conn) -> dict: + """Hero section: decision count, chars, subject distribution, timeline.""" + stats = await conn.fetchrow( + "SELECT count(*) as n, " + " sum(length(full_text)) as total_chars, " + " avg(length(full_text))::int as avg_chars, " + " min(decision_date) as min_date, " + " max(decision_date) as max_date " + "FROM style_corpus" + ) + + decisions = await conn.fetch( + "SELECT decision_number, decision_date, length(full_text) as chars, " + " subject_categories " + "FROM style_corpus ORDER BY decision_date NULLS LAST" + ) + + # Subject distribution + from collections import Counter + subject_counter: Counter = Counter() + for d in decisions: + cats = d["subject_categories"] + if isinstance(cats, str): + try: + cats = json.loads(cats) + except Exception: + cats = [] + for c in (cats or []): + subject_counter[c] += 1 + + # Cap at top 6 subjects, collapse rest to "אחר" + top = subject_counter.most_common(6) + other_count = sum(subject_counter.values()) - sum(c for _, c in top) + subject_distribution = [{"label": label, "count": count} for label, count in top] + if other_count > 0: + subject_distribution.append({"label": "אחר", "count": other_count}) + + n = stats["n"] + top_subject = top[0] if top else None + headline = ( + f"קראתי {n} מההחלטות שלך. ממוצע {stats['avg_chars']:,} תווים לכל החלטה" + + (f", הנושא הנפוץ אצלך: {top_subject[0]} ({top_subject[1]} החלטות)" if top_subject else "") + ) + + return { + "decision_count": n, + "total_chars": stats["total_chars"], + "avg_chars": stats["avg_chars"], + "date_range": [ + str(stats["min_date"]) if stats["min_date"] else None, + str(stats["max_date"]) if stats["max_date"] else None, + ], + "decisions": [ + { + "number": d["decision_number"] or "", + "date": str(d["decision_date"]) if d["decision_date"] else "", + "chars": d["chars"], + "subjects": ( + json.loads(d["subject_categories"]) + if isinstance(d["subject_categories"], str) + else (d["subject_categories"] or []) + ), + } + for d in decisions + ], + "subject_distribution": subject_distribution, + "headline": headline, + } + + +async def _compute_anatomy(conn) -> dict: + """Section 2: average section lengths across the training corpus.""" + rows = await conn.fetch( + """ + SELECT dc.section_type, + sum(length(dc.content))::int as total_chars, + count(distinct dc.document_id) as docs + FROM document_chunks dc + JOIN documents d ON dc.document_id = d.id + WHERE d.title LIKE '[קורפוס]%' + AND dc.section_type IS NOT NULL + GROUP BY dc.section_type + """ + ) + + if not rows: + return { + "sections": [], + "total_coverage": 0, + "headline": "אין עדיין נתונים על מבנה ההחלטות", + } + + # Map to average per decision (total_chars / docs that have this section) + sections_raw = {r["section_type"]: r for r in rows} + + # Compute avg chars per section across decisions that contain it + items = [] + total_all_chars = sum(r["total_chars"] for r in rows) + + for st_key in _SECTION_DISPLAY_ORDER: + if st_key not in sections_raw: + continue + r = sections_raw[st_key] + avg = round(r["total_chars"] / r["docs"]) if r["docs"] else 0 + pct = r["total_chars"] / total_all_chars if total_all_chars else 0 + items.append({ + "type": st_key, + "label": _SECTION_TYPE_HEBREW.get(st_key, st_key), + "avg_chars": avg, + "pct": round(pct, 4), + "coverage": r["docs"], + }) + + # Max coverage (decisions that had any chunks) + total_coverage = await conn.fetchval( + "SELECT count(distinct dc.document_id) " + "FROM document_chunks dc JOIN documents d ON dc.document_id=d.id " + "WHERE d.title LIKE '[קורפוס]%'" + ) + + # Headline: biggest section + biggest = max(items, key=lambda x: x["pct"]) if items else None + if biggest: + pct_int = round(biggest["pct"] * 100) + headline = f"{biggest['label']} הוא {pct_int}% מכל החלטה אצלך — זה המוקד שלך" + else: + headline = "" + + return { + "sections": items, + "total_coverage": total_coverage, + "headline": headline, + } + + +async def _compute_signature_phrases(conn) -> dict: + """Section 3: all patterns with real frequencies, plus headline about top.""" + rows = await conn.fetch( + "SELECT pattern_type, pattern_text, context, frequency, examples " + "FROM style_patterns " + "WHERE frequency > 0 " + "ORDER BY frequency DESC" + ) + + items = [] + for r in rows: + examples = r["examples"] + if isinstance(examples, str): + try: + examples = json.loads(examples) + except Exception: + examples = [] + items.append({ + "type": r["pattern_type"], + "text": r["pattern_text"], + "context": r["context"] or "", + "frequency": r["frequency"], + "examples": examples or [], + }) + + # Total decision count for denominator + total_decisions = await conn.fetchval("SELECT count(*) FROM style_corpus") + + if items: + top = items[0] + # Clean up for display: strip placeholder brackets and split alternatives + display = re.sub(r"\[[^\]]*\]", "", top["text"]).replace(" ", " ").strip() + display = display.split(" / ")[0].split(" או ")[0].strip(" .,:;\"'") + if len(display) > 60: + display = display[:57] + "..." + headline = f'הפטרן האהוב עלייך: "{display}" — מופיע ב-{top["frequency"]} מתוך {total_decisions} החלטות' + else: + headline = "טרם חולצו דפוסים — הרץ ניתוח קורפוס" + + return {"items": items, "total_decisions": total_decisions, "headline": headline} + + +async def _compute_contribution(conn) -> dict: + """Section 4: per-decision contribution + growth curve.""" + decisions = await conn.fetch( + "SELECT id, decision_number, decision_date, full_text, " + " length(full_text) as chars, subject_categories " + "FROM style_corpus ORDER BY decision_date NULLS LAST, created_at" + ) + patterns = await conn.fetch( + "SELECT id, pattern_type, pattern_text, context " + "FROM style_patterns WHERE frequency > 0" + ) + + if not decisions or not patterns: + return { + "growth_curve": [], + "decision_contributions": [], + "headline": "אין עדיין מספיק נתונים", + } + + # Normalize texts once + normalized_decisions = [ + (d["id"], d["decision_number"], _strip_nikud(d["full_text"])) + for d in decisions + ] + + # For each pattern, find first decision (chronologically) that contains it + # and the full set of decisions that contain it + pattern_info: dict = {} # pattern_id → {"first": decision_id, "all": set} + + for p in patterns: + variants = _extract_pattern_variants(_strip_nikud(p["pattern_text"])) + if not variants: + continue + + first_seen = None + all_matches = set() + for dec_id, _, text in normalized_decisions: + if any(v in text for v in variants): + if first_seen is None: + first_seen = dec_id + all_matches.add(dec_id) + + if first_seen is not None: + pattern_info[p["id"]] = { + "first": first_seen, + "all": all_matches, + "type": p["pattern_type"], + "text": p["pattern_text"], + "context": p["context"] or "", + } + + # Per-decision: which patterns are new vs confirmed + decision_contributions = [] + cumulative_patterns: set = set() + growth_curve = [] + + for d in decisions: + dec_id = d["id"] + new_patterns = [] + confirmed_patterns = [] + + for pid, info in pattern_info.items(): + if info["first"] == dec_id: + new_patterns.append(info) + elif dec_id in info["all"]: + confirmed_patterns.append(info) + + # First 3 new patterns as "highlight" + highlight = new_patterns[0] if new_patterns else None + + decision_contributions.append({ + "decision_number": d["decision_number"] or "", + "decision_date": str(d["decision_date"]) if d["decision_date"] else "", + "chars": d["chars"], + "subjects": ( + json.loads(d["subject_categories"]) + if isinstance(d["subject_categories"], str) + else (d["subject_categories"] or []) + ), + "new_count": len(new_patterns), + "confirmed_count": len(confirmed_patterns), + "new_patterns": [ + {"type": p["type"], "text": p["text"], "context": p["context"]} + for p in new_patterns[:10] # cap to keep payload small + ], + "highlight": ( + {"type": highlight["type"], "text": highlight["text"]} + if highlight else None + ), + }) + + cumulative_patterns.update(pid for pid, info in pattern_info.items() if info["first"] == dec_id) + growth_curve.append({ + "decision_number": d["decision_number"] or "", + "date": str(d["decision_date"]) if d["decision_date"] else "", + "cumulative": len(cumulative_patterns), + }) + + # Headline: when did we hit ~85%? + total_patterns = len(pattern_info) + threshold = int(total_patterns * 0.85) + n_decisions_to_85pct = None + for i, point in enumerate(growth_curve, 1): + if point["cumulative"] >= threshold: + n_decisions_to_85pct = i + break + + if n_decisions_to_85pct: + headline = ( + f"אחרי {n_decisions_to_85pct} החלטות כבר למדתי 85% " + f"מהסגנון שלך — השאר מיקד וחידד את הידע" + ) + else: + headline = f"למדתי {total_patterns} דפוסים מ-{len(decisions)} החלטות" + + return { + "growth_curve": growth_curve, + "decision_contributions": decision_contributions, + "total_patterns": total_patterns, + "headline": headline, + } + + +@app.get("/api/training/style-report") +async def training_style_report(): + """Visual dashboard data for Dafna's Style Portrait page.""" + pool = await db.get_pool() + async with pool.acquire() as conn: + corpus = await _compute_corpus_stats(conn) + anatomy = await _compute_anatomy(conn) + phrases = await _compute_signature_phrases(conn) + contribution = await _compute_contribution(conn) + + return { + "corpus": corpus, + "anatomy": anatomy, + "signature_phrases": phrases, + "contribution": contribution, + } + + @app.get("/api/training/corpus") async def training_corpus_list(): """List all decisions currently in the style corpus.""" diff --git a/web/static/index.html b/web/static/index.html index 84a5681..2b1207a 100644 --- a/web/static/index.html +++ b/web/static/index.html @@ -397,6 +397,206 @@ header nav a:hover, header nav a.active { color: #fff; background: rgba(255,255, display: flex; gap: 10px; } +/* ── Style Report (Dafna's Portrait) ────────────────── */ +.style-report-header { text-align: center; margin-bottom: 32px; padding-top: 16px; } +.style-report-header h1 { font-size: 2em; font-weight: 600; color: #1a1a2e; margin-bottom: 6px; } +.style-report-header .subtitle-muted { color: #888; font-size: 0.95em; } + +.portrait-card { + background: #fff; border-radius: 12px; padding: 28px 32px; + margin-bottom: 20px; box-shadow: 0 1px 4px rgba(0,0,0,0.06), 0 0 1px rgba(0,0,0,0.08); +} +.portrait-section-title { + font-size: 1.3em; font-weight: 600; color: #1a1a2e; + margin-bottom: 8px; padding-bottom: 10px; + border-bottom: 2px solid #f0f0f0; +} +.portrait-headline { + font-size: 1.05em; color: #555; line-height: 1.6; + margin-bottom: 20px; padding: 12px 16px; + background: #fff9ed; border-right: 3px solid #e9a13f; + border-radius: 4px; +} + +/* Hero section */ +.portrait-hero .hero-body { + display: grid; grid-template-columns: 1fr auto; gap: 32px; align-items: center; + margin-bottom: 24px; +} +.hero-stats { display: grid; grid-template-columns: repeat(3, 1fr); gap: 20px; } +.hero-stat { text-align: center; padding: 14px; background: #fafafa; border-radius: 8px; } +.hero-stat-value { font-size: 1.9em; font-weight: 700; color: #1a1a2e; line-height: 1; } +.hero-stat-label { font-size: 0.8em; color: #888; margin-top: 6px; } + +.hero-donut-wrap { display: flex; align-items: center; gap: 20px; } +.donut { + width: 160px; height: 160px; border-radius: 50%; + position: relative; flex-shrink: 0; +} +.donut::after { + content: ''; position: absolute; inset: 24%; + background: #fff; border-radius: 50%; +} +.donut-center { + position: absolute; inset: 0; display: flex; + align-items: center; justify-content: center; + font-size: 0.85em; color: #666; z-index: 1; font-weight: 600; +} +.donut-legend { + display: flex; flex-direction: column; gap: 6px; font-size: 0.82em; +} +.donut-legend-item { + display: flex; align-items: center; gap: 8px; +} +.donut-legend-dot { + width: 11px; height: 11px; border-radius: 50%; flex-shrink: 0; +} + +.hero-timeline { + position: relative; height: 44px; margin-top: 8px; + background: linear-gradient(to left, #fafafa, #fff, #fafafa); + border-radius: 6px; padding: 0 16px; +} +.hero-timeline-line { + position: absolute; top: 50%; right: 16px; left: 16px; + height: 2px; background: #e5e5e5; transform: translateY(-50%); +} +.hero-timeline-dot { + position: absolute; top: 50%; width: 10px; height: 10px; + border-radius: 50%; background: #e94560; transform: translate(50%, -50%); + cursor: pointer; transition: transform 0.15s; + box-shadow: 0 0 0 2px #fff; +} +.hero-timeline-dot:hover { transform: translate(50%, -50%) scale(1.4); z-index: 1; } + +/* Anatomy section */ +.anatomy-bar { + display: flex; width: 100%; height: 56px; + border-radius: 8px; overflow: hidden; + box-shadow: inset 0 1px 3px rgba(0,0,0,0.06); +} +.anatomy-segment { + display: flex; align-items: center; justify-content: center; + font-size: 0.82em; font-weight: 600; color: #fff; + transition: filter 0.15s; position: relative; cursor: help; + text-align: center; padding: 0 4px; overflow: hidden; +} +.anatomy-segment:hover { filter: brightness(1.08); } +.anatomy-segment small { + display: block; font-size: 0.72em; font-weight: 400; opacity: 0.85; +} +.anatomy-legend { + display: flex; flex-wrap: wrap; gap: 14px; + margin-top: 14px; font-size: 0.8em; color: #666; +} +.anatomy-legend-item { display: flex; align-items: center; gap: 6px; } +.anatomy-legend-dot { + width: 10px; height: 10px; border-radius: 2px; flex-shrink: 0; +} + +/* Phrase wall */ +.phrase-filters { + display: flex; gap: 8px; flex-wrap: wrap; margin-bottom: 16px; +} +.phrase-filter { + padding: 6px 14px; border-radius: 18px; + border: 1px solid #ddd; background: #fff; + font-size: 0.82em; cursor: pointer; transition: all 0.12s; +} +.phrase-filter:hover { background: #f5f5f5; } +.phrase-filter.active { + background: #1a1a2e; color: #fff; border-color: #1a1a2e; +} +.phrase-wall { + display: grid; grid-template-columns: repeat(auto-fill, minmax(240px, 1fr)); + gap: 12px; +} +.phrase-card { + padding: 14px 16px; border-radius: 8px; background: #fafafa; + border-right: 3px solid; cursor: pointer; transition: all 0.15s; + display: flex; flex-direction: column; gap: 6px; +} +.phrase-card:hover { background: #fff; transform: translateY(-1px); box-shadow: 0 2px 8px rgba(0,0,0,0.08); } +.phrase-card-text { font-weight: 500; color: #1a1a2e; line-height: 1.4; } +.phrase-card-meta { + display: flex; justify-content: space-between; font-size: 0.75em; color: #999; + margin-top: auto; +} +.phrase-card-freq { + background: #fff; padding: 2px 8px; border-radius: 10px; font-weight: 600; +} + +/* Growth curve + Contribution */ +.growth-curve-wrap { margin-bottom: 24px; } +.growth-curve-label { font-size: 0.82em; color: #888; margin-bottom: 8px; } +.growth-curve { + width: 100%; height: 160px; + background: linear-gradient(to bottom, #fafafa, #fff); + border-radius: 6px; +} +.growth-curve-path { fill: none; stroke: #e94560; stroke-width: 2.5; } +.growth-curve-area { fill: #fce4e9; opacity: 0.6; } +.growth-curve-dot { fill: #e94560; cursor: pointer; transition: r 0.15s; } +.growth-curve-dot:hover { r: 6; } + +.contribution-grid { + display: grid; grid-template-columns: repeat(auto-fill, minmax(300px, 1fr)); + gap: 12px; +} +.contribution-card { + padding: 14px 16px; background: #fafafa; border-radius: 8px; + border: 1px solid #eee; + transition: all 0.15s; +} +.contribution-card:hover { background: #fff; box-shadow: 0 2px 6px rgba(0,0,0,0.05); } +.contribution-card-header { + display: flex; justify-content: space-between; align-items: center; + margin-bottom: 8px; font-size: 0.82em; color: #555; +} +.contribution-card-number { font-weight: 600; color: #1a1a2e; font-size: 1em; } +.contribution-badges { display: flex; gap: 8px; margin: 10px 0; } +.contribution-badge { + padding: 4px 10px; border-radius: 10px; font-size: 0.78em; font-weight: 600; +} +.contribution-badge.new { background: #e8f5e9; color: #2e7d32; } +.contribution-badge.confirmed { background: #e3f2fd; color: #1565c0; } +.contribution-highlight { + font-size: 0.82em; color: #666; margin-top: 8px; padding-top: 8px; + border-top: 1px dashed #e5e5e5; line-height: 1.5; +} +.contribution-highlight strong { color: #1a1a2e; } + +/* Modal */ +.phrase-modal { + border: none; border-radius: 12px; padding: 24px 28px; + max-width: 640px; width: 90%; direction: rtl; + box-shadow: 0 20px 60px rgba(0,0,0,0.2); +} +.phrase-modal::backdrop { background: rgba(0,0,0,0.4); } +.phrase-modal-header { + display: flex; justify-content: space-between; align-items: center; + margin-bottom: 14px; padding-bottom: 12px; border-bottom: 1px solid #eee; +} +.phrase-modal-type { + font-size: 0.78em; color: #888; text-transform: uppercase; + letter-spacing: 0.05em; +} +.phrase-modal-text { + font-size: 1.1em; font-weight: 600; color: #1a1a2e; + margin-bottom: 12px; line-height: 1.5; +} +.phrase-modal-context { + font-size: 0.88em; color: #666; margin-bottom: 16px; line-height: 1.6; +} +.phrase-modal-examples { + display: flex; flex-direction: column; gap: 10px; + max-height: 300px; overflow-y: auto; +} +.phrase-modal-example { + padding: 10px 14px; background: #fafafa; border-right: 3px solid #e94560; + font-size: 0.86em; line-height: 1.5; color: #333; border-radius: 4px; +} + @media (max-width: 800px) { .main { padding: 16px; } header { padding: 14px 16px; } @@ -417,6 +617,7 @@ header nav a:hover, header nav a.active { color: #fff; background: rgba(255,255, + תיק חדש העלאה אימון סגנון + הסגנון שלי Skills @@ -735,9 +936,83 @@ header nav a:hover, header nav a.active { color: #fff; background: rgba(255,255,
טוען...
+ +
+ + ← צפי בפורטרט הסגנון המלא + +
+ + + +
+
+

פורטרט הסגנון שלך

+

דוח ויזואלי על סמך הקורפוס שלמדתי ממך

+
+ +
+
+
טוען את הפורטרט...
+
+ +
+ + +
+ + +
+
+
+
+
+
תיקים:
@@ -803,6 +1078,11 @@ function handleRoute() { document.getElementById('navTraining').classList.add('active'); subtitle = 'אימון סגנון'; initTrainingPage(); + } else if (hash === '#/style-report') { + document.getElementById('page-style-report').classList.add('active'); + document.getElementById('navStyleReport').classList.add('active'); + subtitle = 'פורטרט הסגנון שלי'; + loadStyleReport(); } document.getElementById('pageSubtitle').textContent = subtitle; @@ -2086,6 +2366,288 @@ async function pollStyleAnalysisStatus() { } } +// ── Style Report Page ──────────────────────────────────── + +const PATTERN_TYPE_COLORS = { + opening_formula: '#5e9a6e', + closing_formula: '#c87533', + transition: '#4e7cb3', + characteristic_phrase: '#a7547c', + argument_flow: '#7e5c9a', + analysis_structure: '#3e8583', + evidence_handling: '#b8894a', + citation_style: '#5f6b8c', +}; + +const SECTION_COLORS = { + intro: '#4e7cb3', + facts: '#5e9a6e', + appellant_claims: '#a7547c', + respondent_claims: '#c87533', + legal_analysis: '#7e5c9a', + ruling: '#3e8583', + conclusion: '#b8894a', +}; + +const DONUT_COLORS = ['#e94560', '#5e9a6e', '#4e7cb3', '#a7547c', '#c87533', '#7e5c9a', '#b8894a']; + +let _styleReportData = null; +let _activeFilter = 'all'; + +async function loadStyleReport() { + document.getElementById('styleReportLoading').style.display = ''; + document.getElementById('styleReportContent').style.display = 'none'; + try { + const res = await fetch(API + '/training/style-report'); + if (!res.ok) throw new Error('Failed to load report'); + _styleReportData = await res.json(); + renderHero(_styleReportData.corpus); + renderAnatomy(_styleReportData.anatomy); + renderPhrases(_styleReportData.signature_phrases); + renderContribution(_styleReportData.contribution); + document.getElementById('styleReportLoading').style.display = 'none'; + document.getElementById('styleReportContent').style.display = ''; + } catch (e) { + document.getElementById('styleReportLoading').innerHTML = `
שגיאה: ${esc(e.message)}
`; + } +} + +function renderHero(corpus) { + document.getElementById('heroHeadline').textContent = '★ ' + corpus.headline; + + document.getElementById('heroStats').innerHTML = ` +
+
${corpus.decision_count}
+
החלטות בקורפוס
+
+
+
${(corpus.total_chars / 1000).toFixed(0)}K
+
סך תווים
+
+
+
${(corpus.avg_chars / 1000).toFixed(0)}K
+
ממוצע להחלטה
+
+ `; + + // Donut (CSS conic-gradient) + const total = corpus.subject_distribution.reduce((a, b) => a + b.count, 0); + let pct = 0; + const segments = corpus.subject_distribution.map((s, i) => { + const start = (pct / total) * 360; + pct += s.count; + const end = (pct / total) * 360; + const color = DONUT_COLORS[i % DONUT_COLORS.length]; + return `${color} ${start}deg ${end}deg`; + }).join(', '); + + const donut = document.getElementById('heroDonut'); + donut.style.background = `conic-gradient(${segments})`; + donut.innerHTML = `
${corpus.decision_count} החלטות
`; + + document.getElementById('heroDonutLegend').innerHTML = corpus.subject_distribution.map((s, i) => ` +
+ + ${esc(s.label)} · ${s.count} +
+ `).join(''); + + // Timeline + const tl = document.getElementById('heroTimeline'); + const dated = corpus.decisions.filter(d => d.date); + if (dated.length < 2) { + tl.innerHTML = ''; + return; + } + const dates = dated.map(d => new Date(d.date).getTime()); + const minT = Math.min(...dates); + const maxT = Math.max(...dates); + const range = maxT - minT || 1; + + let html = '
'; + dated.forEach(d => { + const t = new Date(d.date).getTime(); + const pct = ((t - minT) / range) * 100; + html += `
`; + }); + tl.innerHTML = html; +} + +function renderAnatomy(anatomy) { + document.getElementById('anatomyHeadline').textContent = '★ ' + anatomy.headline; + + if (!anatomy.sections || !anatomy.sections.length) { + document.getElementById('anatomyBar').innerHTML = '
אין עדיין נתונים
'; + return; + } + + const bar = document.getElementById('anatomyBar'); + bar.innerHTML = anatomy.sections.map(s => { + const color = SECTION_COLORS[s.type] || '#888'; + const pctDisplay = Math.round(s.pct * 100); + return ` +
+
+
${esc(s.label)}
+ ${pctDisplay}% +
+
+ `; + }).join(''); + + document.getElementById('anatomyLegend').innerHTML = anatomy.sections.map(s => ` +
+ + ${esc(s.label)} · ממוצע ${s.avg_chars.toLocaleString('he-IL')} תווים · ${s.coverage} החלטות +
+ `).join(''); +} + +function renderPhrases(phrases) { + document.getElementById('phrasesHeadline').textContent = '★ ' + phrases.headline; + + // Build filter chips — one per pattern_type that appears + const types = [...new Set(phrases.items.map(p => p.type))]; + const typeLabels = { + opening_formula: 'פתיחה', + closing_formula: 'סיום', + transition: 'מעברים', + characteristic_phrase: 'ביטויים', + argument_flow: 'טיעון', + analysis_structure: 'מבנה', + evidence_handling: 'ראיות', + citation_style: 'ציטוט', + }; + + const filters = [{ id: 'all', label: 'הכל' }] + .concat(types.map(t => ({ id: t, label: typeLabels[t] || t }))); + document.getElementById('phraseFilters').innerHTML = filters.map(f => ` +
${esc(f.label)}
+ `).join(''); + + renderPhraseWall(phrases.items); +} + +function setPhraseFilter(filterId) { + _activeFilter = filterId; + document.querySelectorAll('.phrase-filter').forEach(el => { + el.classList.toggle('active', el.dataset.filter === filterId); + }); + renderPhraseWall(_styleReportData.signature_phrases.items); +} + +function renderPhraseWall(items) { + const filtered = _activeFilter === 'all' + ? items + : items.filter(p => p.type === _activeFilter); + + document.getElementById('phraseWall').innerHTML = filtered.map((p, idx) => { + // Clean display text — first alternative, strip placeholders + let display = p.text.replace(/\[[^\]]*\]/g, '').replace(/\s+/g, ' ').trim(); + display = display.split(' / ')[0].split(' או ')[0].trim(); + if (display.length > 80) display = display.substring(0, 77) + '...'; + + const color = PATTERN_TYPE_COLORS[p.type] || '#888'; + const origIdx = items.indexOf(p); + return ` +
+
${esc(display)}
+
+ ${esc(p.context.substring(0, 40))} + ${p.frequency}/24 +
+
+ `; + }).join(''); +} + +function showPhraseModal(idx) { + const p = _styleReportData.signature_phrases.items[idx]; + if (!p) return; + const typeLabels = { + opening_formula: 'נוסחת פתיחה', + closing_formula: 'נוסחת סיום', + transition: 'ביטוי מעבר', + characteristic_phrase: 'ביטוי אופייני', + argument_flow: 'זרימת טיעון', + analysis_structure: 'מבנה ניתוח', + evidence_handling: 'טיפול בראיות', + citation_style: 'סגנון ציטוט', + }; + document.getElementById('phraseModalType').textContent = + (typeLabels[p.type] || p.type) + ` · ${p.frequency}/24 החלטות`; + document.getElementById('phraseModalText').textContent = p.text; + document.getElementById('phraseModalContext').textContent = p.context || ''; + const examples = (p.examples || []).filter(e => e && e.length > 0); + document.getElementById('phraseModalExamples').innerHTML = examples.length + ? examples.map(e => `
${esc(e)}
`).join('') + : '
אין דוגמאות שמורות
'; + document.getElementById('phraseModal').showModal(); +} + +function renderContribution(contrib) { + document.getElementById('contributionHeadline').textContent = '★ ' + contrib.headline; + + // Growth curve — SVG polyline + const points = contrib.growth_curve; + if (points.length >= 2) { + const w = 800, h = 160, pad = 20; + const maxY = Math.max(...points.map(p => p.cumulative)) || 1; + const step = (w - pad * 2) / (points.length - 1); + // RTL: right = start, so reverse X + const coords = points.map((p, i) => { + const x = w - pad - i * step; + const y = h - pad - (p.cumulative / maxY) * (h - pad * 2); + return { x, y, ...p }; + }); + const path = coords.map((c, i) => `${i === 0 ? 'M' : 'L'} ${c.x.toFixed(1)} ${c.y.toFixed(1)}`).join(' '); + const areaPath = path + ` L ${coords[coords.length - 1].x} ${h - pad} L ${coords[0].x} ${h - pad} Z`; + + const svg = document.getElementById('growthCurve'); + svg.innerHTML = ` + + + ${coords.map(c => ` + + ${esc(c.decision_number || 'ללא מספר')}: ${c.cumulative} דפוסים מצטברים + + `).join('')} + `; + } + + // Contribution cards — sort by date + const cards = contrib.decision_contributions; + document.getElementById('contributionGrid').innerHTML = cards.map(d => { + const highlight = d.highlight; + let highlightDisplay = ''; + if (highlight) { + let text = highlight.text.replace(/\[[^\]]*\]/g, '').replace(/\s+/g, ' ').trim(); + text = text.split(' / ')[0].split(' או ')[0].trim(); + if (text.length > 80) text = text.substring(0, 77) + '...'; + highlightDisplay = ` +
+ ▸ תרומה בולטת: "${esc(text)}" +
+ `; + } + return ` +
+
+ ${esc(d.decision_number || 'ללא מספר')} + ${esc(d.decision_date || '—')} +
+
+ 🟢 ${d.new_count} חדשים + 🔵 ${d.confirmed_count} חיזקה +
+ ${highlightDisplay} +
+ `; + }).join(''); +} + async function loadCorpusList() { const container = document.getElementById('corpusList'); const count = document.getElementById('corpusCount');