From 2fbc0cd3c28f546f2c4c8a4df8be9cda82d707e0 Mon Sep 17 00:00:00 2001 From: Chaim Date: Sun, 7 Jun 2026 21:04:47 +0000 Subject: [PATCH] feat(graph): centrality + cluster analytics (corpus graph PR B) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Obsidian "Graph Analysis" equivalent — surfaces influence and structure beyond raw citation count. Backend (new web/graph_metrics.py — pure, dependency-free, no DB → G2): - PageRank (power-iteration), betweenness (Brandes), community (deterministic label-propagation + connected-components fallback), computed in-memory over the precedent citation subgraph that build_corpus_graph already fetched. Normalized 0–1; community ints dense-ranked by size (stable colours). - GraphNode += pagerank/betweenness/community (None unless metrics=true). - build_corpus_graph + /api/graph/corpus gain metrics=false (default path unchanged). Validated on the live corpus: 147 nodes in 13ms. Frontend: - graph.ts: GraphNode metrics fields + metrics param. - graph-canvas: color-by (type | practice_area | precedent_level | community | recency) and size-by (in-degree | pagerank | betweenness) via colorForNode / radiusForNode; exported palettes. - graph-view: colorBy/sizeBy controls; metrics requested only when needed; global metrics overlaid onto neighborhood nodes by id (a node's PageRank shouldn't change when focused); a ranking panel (Tabs: המשפיעות / גשרים, click → focus); dynamic legend per color-by. - graph-filter-panel: "צביעה לפי" + "גודל נקודה לפי" Selects. web-ui build + lint pass. Invariants: G2 (metrics pure, no DB writes), UI2 (model grows on explicit Pydantic). api:types post-deploy. Co-Authored-By: Claude Opus 4.8 (1M context) --- web-ui/src/components/graph/graph-canvas.tsx | 82 +++++++- .../components/graph/graph-filter-panel.tsx | 57 ++++++ web-ui/src/components/graph/graph-view.tsx | 181 ++++++++++++++++-- web-ui/src/lib/api/graph.ts | 5 + web/app.py | 2 + web/graph_api.py | 31 +++ web/graph_metrics.py | 158 +++++++++++++++ 7 files changed, 497 insertions(+), 19 deletions(-) create mode 100644 web/graph_metrics.py diff --git a/web-ui/src/components/graph/graph-canvas.tsx b/web-ui/src/components/graph/graph-canvas.tsx index ccb7c9f..742e100 100644 --- a/web-ui/src/components/graph/graph-canvas.tsx +++ b/web-ui/src/components/graph/graph-canvas.tsx @@ -37,6 +37,14 @@ type FGLink = { treatment: string | null; }; +export type ColorBy = + | "type" + | "practice_area" + | "community" + | "precedent_level" + | "recency"; +export type SizeBy = "indegree" | "pagerank" | "betweenness"; + const NODE_COLORS: Record = { precedent: "#1e3a5f", // navy halacha: "#b45309", // amber @@ -51,8 +59,66 @@ const TREATMENT_COLORS: Record = { distinguished: "#d97706", }; -function nodeRadius(n: GraphNode): number { +// Distinct, cyclic palette for community (cluster) colouring. +export const COMMUNITY_PALETTE = [ + "#1e3a5f", "#a97d3a", "#3b7a57", "#8c3b4a", "#5b4b8a", "#b06a2c", + "#2f6f7a", "#9a4f6a", "#46688a", "#7d6b3a", "#6b7280", "#4d7c5a", +]; + +// Authority hierarchy: עליון darkest → ועדת ערר lightest. +export const LEVEL_COLORS: Record = { + עליון: "#13294b", + מנהלי: "#3b6ea5", + ועדת_ערר_מחוזית: "#8fb0cf", +}; + +export const PA_COLORS: Record = { + rishuy_uvniya: "#1e3a5f", + betterment_levy: "#a97d3a", + compensation_197: "#3b7a57", +}; + +const FALLBACK_COLOR = "#94a3b8"; + +/** Old (slate) → recent (gold) gradient over 1994–2026. */ +export function recencyColor(dateStr: string | null): string { + if (!dateStr) return FALLBACK_COLOR; + const y = Number(dateStr.slice(0, 4)); + if (!y) return FALLBACK_COLOR; + const t = Math.max(0, Math.min(1, (y - 1994) / (2026 - 1994))); + const oldC = [100, 116, 139]; + const newC = [169, 125, 58]; + const c = oldC.map((o, i) => Math.round(o + (newC[i] - o) * t)); + return `rgb(${c[0]},${c[1]},${c[2]})`; +} + +export function colorForNode(n: GraphNode, colorBy: ColorBy): string { + // Hubs always keep their type colour — only precedents recolour. + if (n.type !== "precedent") return NODE_COLORS[n.type] ?? FALLBACK_COLOR; + switch (colorBy) { + case "practice_area": + return PA_COLORS[n.practice_area ?? ""] ?? FALLBACK_COLOR; + case "community": + return n.community != null + ? COMMUNITY_PALETTE[n.community % COMMUNITY_PALETTE.length] + : FALLBACK_COLOR; + case "precedent_level": + return LEVEL_COLORS[n.precedent_level ?? ""] ?? FALLBACK_COLOR; + case "recency": + return recencyColor(n.date); + default: + return NODE_COLORS.precedent; + } +} + +export function radiusForNode(n: GraphNode, sizeBy: SizeBy): number { if (n.type === "topic" || n.type === "practice_area") return 5; + if (sizeBy === "pagerank" && n.pagerank != null) { + return 3 + Math.sqrt(n.pagerank) * 18; + } + if (sizeBy === "betweenness" && n.betweenness != null) { + return 3 + Math.sqrt(n.betweenness) * 18; + } return Math.min(22, 3 + Math.sqrt(Math.max(0, n.size)) * 1.7); } @@ -76,10 +142,14 @@ export function GraphCanvas({ data, selectedId, onNodeClick, + colorBy = "type", + sizeBy = "indegree", }: { data: CorpusGraph | undefined; selectedId: string | null; onNodeClick: (node: GraphNode) => void; + colorBy?: ColorBy; + sizeBy?: SizeBy; }) { const { ref, size } = useElementSize(); // eslint-disable-next-line @typescript-eslint/no-explicit-any @@ -143,9 +213,9 @@ export function GraphCanvas({ const drawNode = useCallback( (node: FGNode, ctx: CanvasRenderingContext2D, globalScale: number) => { - const r = nodeRadius(node); + const r = radiusForNode(node, sizeBy); const dimmed = isDimmed(node.id); - const color = NODE_COLORS[node.type] ?? "#64748b"; + const color = colorForNode(node, colorBy); ctx.globalAlpha = dimmed ? 0.18 : 1; ctx.beginPath(); @@ -194,17 +264,17 @@ export function GraphCanvas({ } ctx.globalAlpha = 1; }, - [activeId, isDimmed], + [activeId, isDimmed, colorBy, sizeBy], ); const drawPointerArea = useCallback( (node: FGNode, color: string, ctx: CanvasRenderingContext2D) => { ctx.fillStyle = color; ctx.beginPath(); - ctx.arc(node.x ?? 0, node.y ?? 0, nodeRadius(node) + 2, 0, 2 * Math.PI); + ctx.arc(node.x ?? 0, node.y ?? 0, radiusForNode(node, sizeBy) + 2, 0, 2 * Math.PI); ctx.fill(); }, - [], + [sizeBy], ); const linkColor = useCallback( diff --git a/web-ui/src/components/graph/graph-filter-panel.tsx b/web-ui/src/components/graph/graph-filter-panel.tsx index feeb008..778830c 100644 --- a/web-ui/src/components/graph/graph-filter-panel.tsx +++ b/web-ui/src/components/graph/graph-filter-panel.tsx @@ -25,6 +25,7 @@ import { SelectValue, } from "@/components/ui/select"; import type { GraphFacets } from "@/lib/api/graph"; +import type { ColorBy, SizeBy } from "@/components/graph/graph-canvas"; export type GraphControls = { practiceArea: string; @@ -37,6 +38,8 @@ export type GraphControls = { district: string; yearFrom: number; yearTo: number; + colorBy: ColorBy; + sizeBy: SizeBy; showTopics: boolean; showPracticeAreas: boolean; showHalachot: boolean; @@ -45,6 +48,20 @@ export type GraphControls = { const ALL = "__all__"; const YEARS = Array.from({ length: 2026 - 1994 + 1 }, (_, i) => 2026 - i); +const COLOR_BY: { value: ColorBy; label: string }[] = [ + { value: "type", label: "סוג נקודה" }, + { value: "practice_area", label: "תחום" }, + { value: "precedent_level", label: "דרגת סמכות" }, + { value: "community", label: "אשכול (זיהוי אוטומטי)" }, + { value: "recency", label: "עדכניות" }, +]; + +const SIZE_BY: { value: SizeBy; label: string }[] = [ + { value: "indegree", label: "ציטוטים נכנסים" }, + { value: "pagerank", label: "השפעה (PageRank)" }, + { value: "betweenness", label: "גשריות (Betweenness)" }, +]; + const PRACTICE_AREAS: { value: string; label: string }[] = [ { value: "rishuy_uvniya", label: "רישוי ובנייה" }, { value: "betterment_levy", label: "היטל השבחה" }, @@ -142,6 +159,46 @@ export function GraphFilterPanel({ + + +
+ + +
+ +
+ + +
+ diff --git a/web-ui/src/components/graph/graph-view.tsx b/web-ui/src/components/graph/graph-view.tsx index f714c6b..781692b 100644 --- a/web-ui/src/components/graph/graph-view.tsx +++ b/web-ui/src/components/graph/graph-view.tsx @@ -3,12 +3,20 @@ /** * Corpus graph orchestrator. Owns filter + selection state, decides whether to * render the full graph or a focused node neighborhood (the Obsidian "local - * graph"), and wires the filter sidebar, canvas, and node detail panel. + * graph"), and wires the filter sidebar, canvas, ranking panel, and node panel. + * + * Analytics (PR B): when color-by=community or size-by=pagerank/betweenness, + * `metrics=true` is requested on the FULL graph (global importance). The + * neighborhood endpoint does not compute metrics — instead we overlay the + * cached full-graph metrics onto neighborhood nodes by id, so a node's + * PageRank doesn't change just because you zoomed into it. */ import { useEffect, useMemo, useState } from "react"; import { Button } from "@/components/ui/button"; +import { Card, CardContent } from "@/components/ui/card"; +import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs"; import { type CorpusGraph, type GraphNode, @@ -20,7 +28,13 @@ import { type GraphControls, GraphFilterPanel, } from "@/components/graph/graph-filter-panel"; -import { GraphCanvas } from "@/components/graph/graph-canvas"; +import { + type ColorBy, + COMMUNITY_PALETTE, + GraphCanvas, + LEVEL_COLORS, + PA_COLORS, +} from "@/components/graph/graph-canvas"; import { GraphNodePanel } from "@/components/graph/graph-node-panel"; const NODE_LIMIT = 400; @@ -46,6 +60,8 @@ export function GraphView() { district: "", yearFrom: 0, yearTo: 0, + colorBy: "type", + sizeBy: "indegree", showTopics: true, showPracticeAreas: true, showHalachot: false, @@ -65,6 +81,10 @@ export function GraphView() { return t.join(","); }, [controls.showTopics, controls.showPracticeAreas, controls.showHalachot]); + // Metrics are needed when colouring by cluster or sizing by a centrality. + const metricsOn = + controls.sizeBy !== "indegree" || controls.colorBy === "community"; + const debouncedQ = useDebouncedValue(controls.q, 350); const filters = useMemo( @@ -81,6 +101,7 @@ export function GraphView() { district: controls.district, year_from: controls.yearFrom, year_to: controls.yearTo, + metrics: metricsOn, }), [ controls.practiceArea, @@ -92,19 +113,51 @@ export function GraphView() { controls.district, controls.yearFrom, controls.yearTo, + metricsOn, nodeTypes, debouncedQ, ], ); const isFocused = !!focusNodeId; - const full = useCorpusGraph(filters, !isFocused); + // Keep the full query alive when metrics are on so the overlay map stays warm. + const full = useCorpusGraph(filters, !isFocused || metricsOn); const neighborhood = useNodeNeighborhood(focusNodeId, 1, nodeTypes); const active = isFocused ? neighborhood : full; - const data: CorpusGraph | undefined = active.data; const error = active.error as Error | undefined; + // Cache of global metrics by node id, overlaid onto neighborhood nodes. + const metricsMap = useMemo(() => { + const m = new Map< + string, + { pagerank: number | null; betweenness: number | null; community: number | null } + >(); + for (const n of full.data?.nodes ?? []) { + if (n.pagerank != null || n.community != null) { + m.set(n.id, { + pagerank: n.pagerank, + betweenness: n.betweenness, + community: n.community, + }); + } + } + return m; + }, [full.data]); + + const data: CorpusGraph | undefined = useMemo(() => { + if (!isFocused) return full.data; + if (!neighborhood.data) return undefined; + if (metricsMap.size === 0) return neighborhood.data; + return { + ...neighborhood.data, + nodes: neighborhood.data.nodes.map((n) => { + const mv = metricsMap.get(n.id); + return mv ? { ...n, ...mv } : n; + }), + }; + }, [isFocused, full.data, neighborhood.data, metricsMap]); + const handleNodeClick = (node: GraphNode) => { setSelectedNode(node); setFocusNodeId(node.id); @@ -115,13 +168,13 @@ export function GraphView() { setSelectedNode(null); }; + const showRanking = metricsOn && !selectedNode && (full.data?.nodes.length ?? 0) > 0; + return (
- {data - ? `${data.nodes.length} נקודות · ${data.edges.length} קשרים` - : "—"} + {data ? `${data.nodes.length} נקודות · ${data.edges.length} קשרים` : "—"} {!isFocused && full.data?.truncated && ( @@ -158,6 +211,8 @@ export function GraphView() { data={data} selectedId={selectedNode?.id ?? null} onNodeClick={handleNodeClick} + colorBy={controls.colorBy} + sizeBy={controls.sizeBy} /> )} @@ -172,23 +227,123 @@ export function GraphView() { )} - +
- {selectedNode && ( + {selectedNode ? ( setSelectedNode(null)} /> - )} + ) : showRanking ? ( + + ) : null}
); } -function Legend() { - const items = [ +function RankingPanel({ + nodes, + onPick, +}: { + nodes: GraphNode[]; + onPick: (n: GraphNode) => void; +}) { + const precedents = nodes.filter((n) => n.type === "precedent"); + const byPagerank = [...precedents] + .filter((n) => n.pagerank != null) + .sort((a, b) => (b.pagerank ?? 0) - (a.pagerank ?? 0)) + .slice(0, 12); + const byBetweenness = [...precedents] + .filter((n) => n.betweenness != null) + .sort((a, b) => (b.betweenness ?? 0) - (a.betweenness ?? 0)) + .slice(0, 12); + + return ( + + + + + + המשפיעות + + + גשרים + + + + + + + + + + + + ); +} + +function RankList({ + items, + metric, + onPick, +}: { + items: GraphNode[]; + metric: "pagerank" | "betweenness"; + onPick: (n: GraphNode) => void; +}) { + if (items.length === 0) { + return

אין נתונים.

; + } + return ( +
    + {items.map((n, i) => ( +
  1. + +
  2. + ))} +
+ ); +} + +const LEGENDS: Record = { + type: [ { color: "#1e3a5f", label: "פסיקה" }, { color: "#a97d3a", label: "נושא" }, { color: "#475569", label: "תחום" }, - ]; + ], + practice_area: [ + { color: PA_COLORS.rishuy_uvniya, label: "רישוי ובנייה" }, + { color: PA_COLORS.betterment_levy, label: "היטל השבחה" }, + { color: PA_COLORS.compensation_197, label: "פיצויים" }, + ], + precedent_level: [ + { color: LEVEL_COLORS["עליון"], label: "עליון" }, + { color: LEVEL_COLORS["מנהלי"], label: "מנהלי" }, + { color: LEVEL_COLORS["ועדת_ערר_מחוזית"], label: "ועדת ערר" }, + ], + community: [ + { color: COMMUNITY_PALETTE[0], label: "אשכול עיקרי" }, + { color: COMMUNITY_PALETTE[1], label: "אשכול נוסף" }, + { color: COMMUNITY_PALETTE[2], label: "אשכול נוסף" }, + ], + recency: [ + { color: "rgb(100,116,139)", label: "ישן (1994)" }, + { color: "rgb(169,125,58)", label: "עדכני (2026)" }, + ], +}; + +function Legend({ colorBy }: { colorBy: ColorBy }) { + const items = LEGENDS[colorBy] ?? LEGENDS.type; return (
{items.map((i) => ( diff --git a/web-ui/src/lib/api/graph.ts b/web-ui/src/lib/api/graph.ts index 8bc1f5a..4a95060 100644 --- a/web-ui/src/lib/api/graph.ts +++ b/web-ui/src/lib/api/graph.ts @@ -35,6 +35,9 @@ export type GraphNode = { court: string | null; date: string | null; // ISO date case_law_id: string | null; + pagerank: number | null; // normalized 0–1, only when metrics requested + betweenness: number | null; // normalized 0–1 + community: number | null; // dense cluster id, 0 = largest }; export type GraphFacets = { @@ -72,6 +75,7 @@ export type GraphFilters = { district?: string; year_from?: number; year_to?: number; + metrics?: boolean; }; export const graphKeys = { @@ -95,6 +99,7 @@ function buildParams(f: GraphFilters): string { if (f.district) p.set("district", f.district); if (f.year_from) p.set("year_from", String(f.year_from)); if (f.year_to) p.set("year_to", String(f.year_to)); + if (f.metrics) p.set("metrics", "true"); return p.toString(); } diff --git a/web/app.py b/web/app.py index d6d5db5..92475d8 100644 --- a/web/app.py +++ b/web/app.py @@ -5780,6 +5780,7 @@ async def graph_corpus( district: str = "", year_from: int = 0, year_to: int = 0, + metrics: bool = False, ): """Full corpus graph under the given filters (most-cited nodes survive the cap).""" if practice_area and practice_area not in _PRACTICE_AREAS: @@ -5799,6 +5800,7 @@ async def graph_corpus( district=district, year_from=year_from, year_to=year_to, + metrics=metrics, ) diff --git a/web/graph_api.py b/web/graph_api.py index 1917178..99ca117 100644 --- a/web/graph_api.py +++ b/web/graph_api.py @@ -35,6 +35,8 @@ from uuid import UUID import asyncpg from pydantic import BaseModel +from web import graph_metrics + # ── Node-type vocabulary ───────────────────────────────────────────── VALID_NODE_TYPES = {"precedent", "halacha", "topic", "practice_area"} DEFAULT_NODE_TYPES = ("precedent", "topic", "practice_area") @@ -63,6 +65,10 @@ class GraphNode(BaseModel): court: str | None = None # precedents only — for color-by / filter date: str | None = None # precedents only — ISO date, for recency color/filter case_law_id: str | None = None # canonical id for deep-link (precedents) + # Graph metrics — populated only when ``metrics=true`` (precedents only). + pagerank: float | None = None # normalized 0–1 (global influence) + betweenness: float | None = None # normalized 0–1 (bridge-ness) + community: int | None = None # dense cluster id, 0 = largest class GraphFacets(BaseModel): @@ -243,6 +249,7 @@ async def build_corpus_graph( district: str = "", year_from: int = 0, year_to: int = 0, + metrics: bool = False, ) -> CorpusGraph: """Assemble the full corpus graph under the given filters. @@ -250,6 +257,10 @@ async def build_corpus_graph( so clipping never hides the structurally important nodes. ``truncated`` + ``total_available`` let the UI prompt the user to narrow filters. All filters are applied server-side in the WHERE clause (G5). + + When ``metrics`` is true, PageRank / betweenness / community are computed + in-memory over the precedent citation subgraph (``graph_metrics``) and + stamped onto precedent nodes — no extra DB work (G2). """ types = normalize_node_types(node_types) cap = max(1, min(int(limit), NODE_CAP_MAX)) @@ -298,6 +309,9 @@ async def build_corpus_graph( hub_nodes, edges = await _edges_and_hubs(conn, prec_rows, types) nodes.extend(hub_nodes) + if metrics: + _stamp_metrics(nodes, edges) + return CorpusGraph( nodes=nodes, edges=edges, @@ -306,6 +320,23 @@ async def build_corpus_graph( ) +def _stamp_metrics(nodes: list[GraphNode], edges: list[GraphEdge]) -> None: + """Compute PageRank/betweenness/community over the precedent subgraph and + stamp them onto precedent nodes in place (hubs stay ``None``).""" + prec_ids = [n.id for n in nodes if n.type == "precedent"] + if not prec_ids: + return + directed = [(e.source, e.target) for e in edges if e.type == "cites"] + undirected = [(e.source, e.target) for e in edges if e.type == "same_chain"] + m = graph_metrics.compute(prec_ids, directed, undirected) + for n in nodes: + mv = m.get(n.id) + if mv: + n.pagerank = mv["pagerank"] + n.betweenness = mv["betweenness"] + n.community = mv["community"] + + async def build_node_neighborhood( pool: asyncpg.Pool, node_id: str, diff --git a/web/graph_metrics.py b/web/graph_metrics.py new file mode 100644 index 0000000..f00703f --- /dev/null +++ b/web/graph_metrics.py @@ -0,0 +1,158 @@ +"""Graph metrics for the corpus graph — dependency-free (no networkx). + +Computed in-memory over the precedent citation subgraph that ``graph_api`` +already fetched (**G2**: no DB access here — pure functions over data the caller +holds). The corpus graph is tiny (≤ ``NODE_CAP_MAX`` = 1500 nodes, sparse), so +power-iteration PageRank, Brandes betweenness, and label-propagation communities +all run synchronously well under a second. + +Edge model: ``cites`` is directional (authority flows citing → cited); +``same_chain`` is non-directional. PageRank uses cites-direction + same_chain +both ways; betweenness and communities treat the whole graph as undirected. +Determinism (stable colors across requests): nodes are processed in sorted +order and ties break by lowest label — no randomness. +""" +from __future__ import annotations + +from collections import Counter, defaultdict, deque + + +def compute( + node_ids: list[str], + directed_edges: list[tuple[str, str]], + undirected_edges: list[tuple[str, str]] | None = None, +) -> dict[str, dict]: + """Return ``{node_id: {pagerank, betweenness, community}}``. + + ``pagerank`` / ``betweenness`` are normalized to max = 1.0 (easy client + scaling); ``community`` is a dense int 0..k-1 ordered by descending cluster + size (so the largest cluster is always colour 0). + """ + nodes = list(node_ids) + node_set = set(nodes) + if not nodes: + return {} + undirected_edges = undirected_edges or [] + + de = [(s, t) for s, t in directed_edges if s in node_set and t in node_set and s != t] + ue = [(s, t) for s, t in undirected_edges if s in node_set and t in node_set and s != t] + + pr = _normalize(_pagerank(nodes, de, ue)) + bt = _normalize(_betweenness(nodes, de, ue)) + comm = _communities(nodes, de, ue) + + return { + n: { + "pagerank": round(pr[n], 4), + "betweenness": round(bt[n], 4), + "community": comm[n], + } + for n in nodes + } + + +def _normalize(d: dict[str, float]) -> dict[str, float]: + m = max(d.values()) if d else 0.0 + if m <= 0: + return {k: 0.0 for k in d} + return {k: v / m for k, v in d.items()} + + +def _undirected_adj(nodes: list[str], de, ue) -> dict[str, set[str]]: + adj: dict[str, set[str]] = {n: set() for n in nodes} + for s, t in de: + adj[s].add(t) + adj[t].add(s) + for s, t in ue: + adj[s].add(t) + adj[t].add(s) + return adj + + +def _pagerank(nodes, de, ue, d: float = 0.85, iters: int = 100, tol: float = 1e-9): + """Power-iteration PageRank. cites direction + same_chain both ways.""" + out: dict[str, list[str]] = defaultdict(list) + for s, t in de: + out[s].append(t) + for s, t in ue: + out[s].append(t) + out[t].append(s) + n = len(nodes) + pr = {x: 1.0 / n for x in nodes} + for _ in range(iters): + dangling = sum(pr[x] for x in nodes if not out[x]) + base = (1.0 - d) / n + d * dangling / n + new = {x: base for x in nodes} + for x in nodes: + deg = len(out[x]) + if deg: + share = d * pr[x] / deg + for m in out[x]: + new[m] += share + if sum(abs(new[x] - pr[x]) for x in nodes) < tol: + return new + pr = new + return pr + + +def _betweenness(nodes, de, ue): + """Brandes betweenness on the undirected graph. O(V·(V+E)).""" + adj = _undirected_adj(nodes, de, ue) + bc = {x: 0.0 for x in nodes} + for s in nodes: + stack: list[str] = [] + preds: dict[str, list[str]] = {w: [] for w in nodes} + sigma = {w: 0.0 for w in nodes} + sigma[s] = 1.0 + dist = {w: -1 for w in nodes} + dist[s] = 0 + queue = deque([s]) + while queue: + v = queue.popleft() + stack.append(v) + for w in adj[v]: + if dist[w] < 0: + dist[w] = dist[v] + 1 + queue.append(w) + if dist[w] == dist[v] + 1: + sigma[w] += sigma[v] + preds[w].append(v) + delta = {w: 0.0 for w in nodes} + while stack: + w = stack.pop() + for v in preds[w]: + if sigma[w]: + delta[v] += (sigma[v] / sigma[w]) * (1.0 + delta[w]) + if w != s: + bc[w] += delta[w] + # Undirected: each shortest path counted from both endpoints. + return {x: v / 2.0 for x, v in bc.items()} + + +def _communities(nodes, de, ue) -> dict[str, int]: + """Deterministic synchronous label propagation (+ dense renumbering). + + Each node starts in its own community and repeatedly adopts the most common + label among its neighbours (ties → lowest label). Isolated nodes keep their + own singleton community. Labels are renumbered 0..k-1 by descending size. + """ + adj = _undirected_adj(nodes, de, ue) + order = sorted(nodes) + label = {n: n for n in nodes} + for _ in range(30): + changed = False + for n in order: + neigh = adj[n] + if not neigh: + continue + counts = Counter(label[m] for m in neigh) + best = min(counts.items(), key=lambda kv: (-kv[1], kv[0]))[0] + if label[n] != best: + label[n] = best + changed = True + if not changed: + break + sizes = Counter(label.values()) + ranked = [lab for lab, _ in sorted(sizes.items(), key=lambda kv: (-kv[1], kv[0]))] + remap = {lab: i for i, lab in enumerate(ranked)} + return {n: remap[label[n]] for n in nodes}