Merge pull request 'feat(graph): centrality + cluster analytics (corpus graph PR B)' (#129) from worktree-graph-analytics into main
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 49s
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 49s
This commit was merged in pull request #129.
This commit is contained in:
@@ -37,6 +37,14 @@ type FGLink = {
|
||||
treatment: string | null;
|
||||
};
|
||||
|
||||
export type ColorBy =
|
||||
| "type"
|
||||
| "practice_area"
|
||||
| "community"
|
||||
| "precedent_level"
|
||||
| "recency";
|
||||
export type SizeBy = "indegree" | "pagerank" | "betweenness";
|
||||
|
||||
const NODE_COLORS: Record<string, string> = {
|
||||
precedent: "#1e3a5f", // navy
|
||||
halacha: "#b45309", // amber
|
||||
@@ -51,8 +59,66 @@ const TREATMENT_COLORS: Record<string, string> = {
|
||||
distinguished: "#d97706",
|
||||
};
|
||||
|
||||
function nodeRadius(n: GraphNode): number {
|
||||
// Distinct, cyclic palette for community (cluster) colouring.
|
||||
export const COMMUNITY_PALETTE = [
|
||||
"#1e3a5f", "#a97d3a", "#3b7a57", "#8c3b4a", "#5b4b8a", "#b06a2c",
|
||||
"#2f6f7a", "#9a4f6a", "#46688a", "#7d6b3a", "#6b7280", "#4d7c5a",
|
||||
];
|
||||
|
||||
// Authority hierarchy: עליון darkest → ועדת ערר lightest.
|
||||
export const LEVEL_COLORS: Record<string, string> = {
|
||||
עליון: "#13294b",
|
||||
מנהלי: "#3b6ea5",
|
||||
ועדת_ערר_מחוזית: "#8fb0cf",
|
||||
};
|
||||
|
||||
export const PA_COLORS: Record<string, string> = {
|
||||
rishuy_uvniya: "#1e3a5f",
|
||||
betterment_levy: "#a97d3a",
|
||||
compensation_197: "#3b7a57",
|
||||
};
|
||||
|
||||
const FALLBACK_COLOR = "#94a3b8";
|
||||
|
||||
/** Old (slate) → recent (gold) gradient over 1994–2026. */
|
||||
export function recencyColor(dateStr: string | null): string {
|
||||
if (!dateStr) return FALLBACK_COLOR;
|
||||
const y = Number(dateStr.slice(0, 4));
|
||||
if (!y) return FALLBACK_COLOR;
|
||||
const t = Math.max(0, Math.min(1, (y - 1994) / (2026 - 1994)));
|
||||
const oldC = [100, 116, 139];
|
||||
const newC = [169, 125, 58];
|
||||
const c = oldC.map((o, i) => Math.round(o + (newC[i] - o) * t));
|
||||
return `rgb(${c[0]},${c[1]},${c[2]})`;
|
||||
}
|
||||
|
||||
export function colorForNode(n: GraphNode, colorBy: ColorBy): string {
|
||||
// Hubs always keep their type colour — only precedents recolour.
|
||||
if (n.type !== "precedent") return NODE_COLORS[n.type] ?? FALLBACK_COLOR;
|
||||
switch (colorBy) {
|
||||
case "practice_area":
|
||||
return PA_COLORS[n.practice_area ?? ""] ?? FALLBACK_COLOR;
|
||||
case "community":
|
||||
return n.community != null
|
||||
? COMMUNITY_PALETTE[n.community % COMMUNITY_PALETTE.length]
|
||||
: FALLBACK_COLOR;
|
||||
case "precedent_level":
|
||||
return LEVEL_COLORS[n.precedent_level ?? ""] ?? FALLBACK_COLOR;
|
||||
case "recency":
|
||||
return recencyColor(n.date);
|
||||
default:
|
||||
return NODE_COLORS.precedent;
|
||||
}
|
||||
}
|
||||
|
||||
export function radiusForNode(n: GraphNode, sizeBy: SizeBy): number {
|
||||
if (n.type === "topic" || n.type === "practice_area") return 5;
|
||||
if (sizeBy === "pagerank" && n.pagerank != null) {
|
||||
return 3 + Math.sqrt(n.pagerank) * 18;
|
||||
}
|
||||
if (sizeBy === "betweenness" && n.betweenness != null) {
|
||||
return 3 + Math.sqrt(n.betweenness) * 18;
|
||||
}
|
||||
return Math.min(22, 3 + Math.sqrt(Math.max(0, n.size)) * 1.7);
|
||||
}
|
||||
|
||||
@@ -76,10 +142,14 @@ export function GraphCanvas({
|
||||
data,
|
||||
selectedId,
|
||||
onNodeClick,
|
||||
colorBy = "type",
|
||||
sizeBy = "indegree",
|
||||
}: {
|
||||
data: CorpusGraph | undefined;
|
||||
selectedId: string | null;
|
||||
onNodeClick: (node: GraphNode) => void;
|
||||
colorBy?: ColorBy;
|
||||
sizeBy?: SizeBy;
|
||||
}) {
|
||||
const { ref, size } = useElementSize<HTMLDivElement>();
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
@@ -143,9 +213,9 @@ export function GraphCanvas({
|
||||
|
||||
const drawNode = useCallback(
|
||||
(node: FGNode, ctx: CanvasRenderingContext2D, globalScale: number) => {
|
||||
const r = nodeRadius(node);
|
||||
const r = radiusForNode(node, sizeBy);
|
||||
const dimmed = isDimmed(node.id);
|
||||
const color = NODE_COLORS[node.type] ?? "#64748b";
|
||||
const color = colorForNode(node, colorBy);
|
||||
ctx.globalAlpha = dimmed ? 0.18 : 1;
|
||||
|
||||
ctx.beginPath();
|
||||
@@ -194,17 +264,17 @@ export function GraphCanvas({
|
||||
}
|
||||
ctx.globalAlpha = 1;
|
||||
},
|
||||
[activeId, isDimmed],
|
||||
[activeId, isDimmed, colorBy, sizeBy],
|
||||
);
|
||||
|
||||
const drawPointerArea = useCallback(
|
||||
(node: FGNode, color: string, ctx: CanvasRenderingContext2D) => {
|
||||
ctx.fillStyle = color;
|
||||
ctx.beginPath();
|
||||
ctx.arc(node.x ?? 0, node.y ?? 0, nodeRadius(node) + 2, 0, 2 * Math.PI);
|
||||
ctx.arc(node.x ?? 0, node.y ?? 0, radiusForNode(node, sizeBy) + 2, 0, 2 * Math.PI);
|
||||
ctx.fill();
|
||||
},
|
||||
[],
|
||||
[sizeBy],
|
||||
);
|
||||
|
||||
const linkColor = useCallback(
|
||||
|
||||
@@ -25,6 +25,7 @@ import {
|
||||
SelectValue,
|
||||
} from "@/components/ui/select";
|
||||
import type { GraphFacets } from "@/lib/api/graph";
|
||||
import type { ColorBy, SizeBy } from "@/components/graph/graph-canvas";
|
||||
|
||||
export type GraphControls = {
|
||||
practiceArea: string;
|
||||
@@ -37,6 +38,8 @@ export type GraphControls = {
|
||||
district: string;
|
||||
yearFrom: number;
|
||||
yearTo: number;
|
||||
colorBy: ColorBy;
|
||||
sizeBy: SizeBy;
|
||||
showTopics: boolean;
|
||||
showPracticeAreas: boolean;
|
||||
showHalachot: boolean;
|
||||
@@ -45,6 +48,20 @@ export type GraphControls = {
|
||||
const ALL = "__all__";
|
||||
const YEARS = Array.from({ length: 2026 - 1994 + 1 }, (_, i) => 2026 - i);
|
||||
|
||||
const COLOR_BY: { value: ColorBy; label: string }[] = [
|
||||
{ value: "type", label: "סוג נקודה" },
|
||||
{ value: "practice_area", label: "תחום" },
|
||||
{ value: "precedent_level", label: "דרגת סמכות" },
|
||||
{ value: "community", label: "אשכול (זיהוי אוטומטי)" },
|
||||
{ value: "recency", label: "עדכניות" },
|
||||
];
|
||||
|
||||
const SIZE_BY: { value: SizeBy; label: string }[] = [
|
||||
{ value: "indegree", label: "ציטוטים נכנסים" },
|
||||
{ value: "pagerank", label: "השפעה (PageRank)" },
|
||||
{ value: "betweenness", label: "גשריות (Betweenness)" },
|
||||
];
|
||||
|
||||
const PRACTICE_AREAS: { value: string; label: string }[] = [
|
||||
{ value: "rishuy_uvniya", label: "רישוי ובנייה" },
|
||||
{ value: "betterment_levy", label: "היטל השבחה" },
|
||||
@@ -142,6 +159,46 @@ export function GraphFilterPanel({
|
||||
</Select>
|
||||
</div>
|
||||
|
||||
<Separator />
|
||||
|
||||
<div className="space-y-1.5">
|
||||
<Label className="text-xs text-ink-muted">צביעה לפי</Label>
|
||||
<Select
|
||||
value={controls.colorBy}
|
||||
onValueChange={(v) => onChange({ colorBy: v as GraphControls["colorBy"] })}
|
||||
>
|
||||
<SelectTrigger>
|
||||
<SelectValue />
|
||||
</SelectTrigger>
|
||||
<SelectContent>
|
||||
{COLOR_BY.map((c) => (
|
||||
<SelectItem key={c.value} value={c.value}>
|
||||
{c.label}
|
||||
</SelectItem>
|
||||
))}
|
||||
</SelectContent>
|
||||
</Select>
|
||||
</div>
|
||||
|
||||
<div className="space-y-1.5">
|
||||
<Label className="text-xs text-ink-muted">גודל נקודה לפי</Label>
|
||||
<Select
|
||||
value={controls.sizeBy}
|
||||
onValueChange={(v) => onChange({ sizeBy: v as GraphControls["sizeBy"] })}
|
||||
>
|
||||
<SelectTrigger>
|
||||
<SelectValue />
|
||||
</SelectTrigger>
|
||||
<SelectContent>
|
||||
{SIZE_BY.map((s) => (
|
||||
<SelectItem key={s.value} value={s.value}>
|
||||
{s.label}
|
||||
</SelectItem>
|
||||
))}
|
||||
</SelectContent>
|
||||
</Select>
|
||||
</div>
|
||||
|
||||
<Accordion type="single" collapsible className="border-0">
|
||||
<AccordionItem value="advanced" className="border-0">
|
||||
<AccordionTrigger className="py-1 text-xs text-ink-muted hover:no-underline">
|
||||
|
||||
@@ -3,12 +3,20 @@
|
||||
/**
|
||||
* Corpus graph orchestrator. Owns filter + selection state, decides whether to
|
||||
* render the full graph or a focused node neighborhood (the Obsidian "local
|
||||
* graph"), and wires the filter sidebar, canvas, and node detail panel.
|
||||
* graph"), and wires the filter sidebar, canvas, ranking panel, and node panel.
|
||||
*
|
||||
* Analytics (PR B): when color-by=community or size-by=pagerank/betweenness,
|
||||
* `metrics=true` is requested on the FULL graph (global importance). The
|
||||
* neighborhood endpoint does not compute metrics — instead we overlay the
|
||||
* cached full-graph metrics onto neighborhood nodes by id, so a node's
|
||||
* PageRank doesn't change just because you zoomed into it.
|
||||
*/
|
||||
|
||||
import { useEffect, useMemo, useState } from "react";
|
||||
|
||||
import { Button } from "@/components/ui/button";
|
||||
import { Card, CardContent } from "@/components/ui/card";
|
||||
import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs";
|
||||
import {
|
||||
type CorpusGraph,
|
||||
type GraphNode,
|
||||
@@ -20,7 +28,13 @@ import {
|
||||
type GraphControls,
|
||||
GraphFilterPanel,
|
||||
} from "@/components/graph/graph-filter-panel";
|
||||
import { GraphCanvas } from "@/components/graph/graph-canvas";
|
||||
import {
|
||||
type ColorBy,
|
||||
COMMUNITY_PALETTE,
|
||||
GraphCanvas,
|
||||
LEVEL_COLORS,
|
||||
PA_COLORS,
|
||||
} from "@/components/graph/graph-canvas";
|
||||
import { GraphNodePanel } from "@/components/graph/graph-node-panel";
|
||||
|
||||
const NODE_LIMIT = 400;
|
||||
@@ -46,6 +60,8 @@ export function GraphView() {
|
||||
district: "",
|
||||
yearFrom: 0,
|
||||
yearTo: 0,
|
||||
colorBy: "type",
|
||||
sizeBy: "indegree",
|
||||
showTopics: true,
|
||||
showPracticeAreas: true,
|
||||
showHalachot: false,
|
||||
@@ -65,6 +81,10 @@ export function GraphView() {
|
||||
return t.join(",");
|
||||
}, [controls.showTopics, controls.showPracticeAreas, controls.showHalachot]);
|
||||
|
||||
// Metrics are needed when colouring by cluster or sizing by a centrality.
|
||||
const metricsOn =
|
||||
controls.sizeBy !== "indegree" || controls.colorBy === "community";
|
||||
|
||||
const debouncedQ = useDebouncedValue(controls.q, 350);
|
||||
|
||||
const filters = useMemo(
|
||||
@@ -81,6 +101,7 @@ export function GraphView() {
|
||||
district: controls.district,
|
||||
year_from: controls.yearFrom,
|
||||
year_to: controls.yearTo,
|
||||
metrics: metricsOn,
|
||||
}),
|
||||
[
|
||||
controls.practiceArea,
|
||||
@@ -92,19 +113,51 @@ export function GraphView() {
|
||||
controls.district,
|
||||
controls.yearFrom,
|
||||
controls.yearTo,
|
||||
metricsOn,
|
||||
nodeTypes,
|
||||
debouncedQ,
|
||||
],
|
||||
);
|
||||
|
||||
const isFocused = !!focusNodeId;
|
||||
const full = useCorpusGraph(filters, !isFocused);
|
||||
// Keep the full query alive when metrics are on so the overlay map stays warm.
|
||||
const full = useCorpusGraph(filters, !isFocused || metricsOn);
|
||||
const neighborhood = useNodeNeighborhood(focusNodeId, 1, nodeTypes);
|
||||
|
||||
const active = isFocused ? neighborhood : full;
|
||||
const data: CorpusGraph | undefined = active.data;
|
||||
const error = active.error as Error | undefined;
|
||||
|
||||
// Cache of global metrics by node id, overlaid onto neighborhood nodes.
|
||||
const metricsMap = useMemo(() => {
|
||||
const m = new Map<
|
||||
string,
|
||||
{ pagerank: number | null; betweenness: number | null; community: number | null }
|
||||
>();
|
||||
for (const n of full.data?.nodes ?? []) {
|
||||
if (n.pagerank != null || n.community != null) {
|
||||
m.set(n.id, {
|
||||
pagerank: n.pagerank,
|
||||
betweenness: n.betweenness,
|
||||
community: n.community,
|
||||
});
|
||||
}
|
||||
}
|
||||
return m;
|
||||
}, [full.data]);
|
||||
|
||||
const data: CorpusGraph | undefined = useMemo(() => {
|
||||
if (!isFocused) return full.data;
|
||||
if (!neighborhood.data) return undefined;
|
||||
if (metricsMap.size === 0) return neighborhood.data;
|
||||
return {
|
||||
...neighborhood.data,
|
||||
nodes: neighborhood.data.nodes.map((n) => {
|
||||
const mv = metricsMap.get(n.id);
|
||||
return mv ? { ...n, ...mv } : n;
|
||||
}),
|
||||
};
|
||||
}, [isFocused, full.data, neighborhood.data, metricsMap]);
|
||||
|
||||
const handleNodeClick = (node: GraphNode) => {
|
||||
setSelectedNode(node);
|
||||
setFocusNodeId(node.id);
|
||||
@@ -115,13 +168,13 @@ export function GraphView() {
|
||||
setSelectedNode(null);
|
||||
};
|
||||
|
||||
const showRanking = metricsOn && !selectedNode && (full.data?.nodes.length ?? 0) > 0;
|
||||
|
||||
return (
|
||||
<div className="space-y-3">
|
||||
<div className="flex items-center justify-between gap-3 text-xs text-ink-muted">
|
||||
<span>
|
||||
{data
|
||||
? `${data.nodes.length} נקודות · ${data.edges.length} קשרים`
|
||||
: "—"}
|
||||
{data ? `${data.nodes.length} נקודות · ${data.edges.length} קשרים` : "—"}
|
||||
</span>
|
||||
{!isFocused && full.data?.truncated && (
|
||||
<span className="text-gold-deep">
|
||||
@@ -158,6 +211,8 @@ export function GraphView() {
|
||||
data={data}
|
||||
selectedId={selectedNode?.id ?? null}
|
||||
onNodeClick={handleNodeClick}
|
||||
colorBy={controls.colorBy}
|
||||
sizeBy={controls.sizeBy}
|
||||
/>
|
||||
)}
|
||||
|
||||
@@ -172,23 +227,123 @@ export function GraphView() {
|
||||
</Button>
|
||||
)}
|
||||
|
||||
<Legend />
|
||||
<Legend colorBy={controls.colorBy} />
|
||||
</div>
|
||||
|
||||
{selectedNode && (
|
||||
{selectedNode ? (
|
||||
<GraphNodePanel node={selectedNode} onClose={() => setSelectedNode(null)} />
|
||||
)}
|
||||
) : showRanking ? (
|
||||
<RankingPanel nodes={full.data!.nodes} onPick={handleNodeClick} />
|
||||
) : null}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function Legend() {
|
||||
const items = [
|
||||
function RankingPanel({
|
||||
nodes,
|
||||
onPick,
|
||||
}: {
|
||||
nodes: GraphNode[];
|
||||
onPick: (n: GraphNode) => void;
|
||||
}) {
|
||||
const precedents = nodes.filter((n) => n.type === "precedent");
|
||||
const byPagerank = [...precedents]
|
||||
.filter((n) => n.pagerank != null)
|
||||
.sort((a, b) => (b.pagerank ?? 0) - (a.pagerank ?? 0))
|
||||
.slice(0, 12);
|
||||
const byBetweenness = [...precedents]
|
||||
.filter((n) => n.betweenness != null)
|
||||
.sort((a, b) => (b.betweenness ?? 0) - (a.betweenness ?? 0))
|
||||
.slice(0, 12);
|
||||
|
||||
return (
|
||||
<Card className="bg-surface border-rule shadow-sm w-72 shrink-0 overflow-y-auto">
|
||||
<CardContent className="p-4">
|
||||
<Tabs defaultValue="pagerank">
|
||||
<TabsList className="w-full">
|
||||
<TabsTrigger value="pagerank" className="flex-1">
|
||||
המשפיעות
|
||||
</TabsTrigger>
|
||||
<TabsTrigger value="betweenness" className="flex-1">
|
||||
גשרים
|
||||
</TabsTrigger>
|
||||
</TabsList>
|
||||
<TabsContent value="pagerank">
|
||||
<RankList items={byPagerank} metric="pagerank" onPick={onPick} />
|
||||
</TabsContent>
|
||||
<TabsContent value="betweenness">
|
||||
<RankList items={byBetweenness} metric="betweenness" onPick={onPick} />
|
||||
</TabsContent>
|
||||
</Tabs>
|
||||
</CardContent>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
function RankList({
|
||||
items,
|
||||
metric,
|
||||
onPick,
|
||||
}: {
|
||||
items: GraphNode[];
|
||||
metric: "pagerank" | "betweenness";
|
||||
onPick: (n: GraphNode) => void;
|
||||
}) {
|
||||
if (items.length === 0) {
|
||||
return <p className="text-ink-muted text-xs mt-3">אין נתונים.</p>;
|
||||
}
|
||||
return (
|
||||
<ol className="mt-2 space-y-1">
|
||||
{items.map((n, i) => (
|
||||
<li key={n.id}>
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => onPick(n)}
|
||||
className="flex w-full items-baseline justify-between gap-2 rounded px-2 py-1 text-start text-sm hover:bg-gold-wash"
|
||||
>
|
||||
<span className="truncate">
|
||||
<span className="text-ink-muted text-xs">{i + 1}.</span> {n.label}
|
||||
</span>
|
||||
<span className="text-ink-muted text-xs tabular-nums shrink-0">
|
||||
{((n[metric] ?? 0) * 100).toFixed(0)}
|
||||
</span>
|
||||
</button>
|
||||
</li>
|
||||
))}
|
||||
</ol>
|
||||
);
|
||||
}
|
||||
|
||||
const LEGENDS: Record<ColorBy, { color: string; label: string }[]> = {
|
||||
type: [
|
||||
{ color: "#1e3a5f", label: "פסיקה" },
|
||||
{ color: "#a97d3a", label: "נושא" },
|
||||
{ color: "#475569", label: "תחום" },
|
||||
];
|
||||
],
|
||||
practice_area: [
|
||||
{ color: PA_COLORS.rishuy_uvniya, label: "רישוי ובנייה" },
|
||||
{ color: PA_COLORS.betterment_levy, label: "היטל השבחה" },
|
||||
{ color: PA_COLORS.compensation_197, label: "פיצויים" },
|
||||
],
|
||||
precedent_level: [
|
||||
{ color: LEVEL_COLORS["עליון"], label: "עליון" },
|
||||
{ color: LEVEL_COLORS["מנהלי"], label: "מנהלי" },
|
||||
{ color: LEVEL_COLORS["ועדת_ערר_מחוזית"], label: "ועדת ערר" },
|
||||
],
|
||||
community: [
|
||||
{ color: COMMUNITY_PALETTE[0], label: "אשכול עיקרי" },
|
||||
{ color: COMMUNITY_PALETTE[1], label: "אשכול נוסף" },
|
||||
{ color: COMMUNITY_PALETTE[2], label: "אשכול נוסף" },
|
||||
],
|
||||
recency: [
|
||||
{ color: "rgb(100,116,139)", label: "ישן (1994)" },
|
||||
{ color: "rgb(169,125,58)", label: "עדכני (2026)" },
|
||||
],
|
||||
};
|
||||
|
||||
function Legend({ colorBy }: { colorBy: ColorBy }) {
|
||||
const items = LEGENDS[colorBy] ?? LEGENDS.type;
|
||||
return (
|
||||
<div className="absolute bottom-3 end-3 flex flex-col gap-1 rounded-md bg-surface/85 backdrop-blur px-3 py-2 text-xs text-ink-muted">
|
||||
{items.map((i) => (
|
||||
|
||||
@@ -35,6 +35,9 @@ export type GraphNode = {
|
||||
court: string | null;
|
||||
date: string | null; // ISO date
|
||||
case_law_id: string | null;
|
||||
pagerank: number | null; // normalized 0–1, only when metrics requested
|
||||
betweenness: number | null; // normalized 0–1
|
||||
community: number | null; // dense cluster id, 0 = largest
|
||||
};
|
||||
|
||||
export type GraphFacets = {
|
||||
@@ -72,6 +75,7 @@ export type GraphFilters = {
|
||||
district?: string;
|
||||
year_from?: number;
|
||||
year_to?: number;
|
||||
metrics?: boolean;
|
||||
};
|
||||
|
||||
export const graphKeys = {
|
||||
@@ -95,6 +99,7 @@ function buildParams(f: GraphFilters): string {
|
||||
if (f.district) p.set("district", f.district);
|
||||
if (f.year_from) p.set("year_from", String(f.year_from));
|
||||
if (f.year_to) p.set("year_to", String(f.year_to));
|
||||
if (f.metrics) p.set("metrics", "true");
|
||||
return p.toString();
|
||||
}
|
||||
|
||||
|
||||
@@ -5780,6 +5780,7 @@ async def graph_corpus(
|
||||
district: str = "",
|
||||
year_from: int = 0,
|
||||
year_to: int = 0,
|
||||
metrics: bool = False,
|
||||
):
|
||||
"""Full corpus graph under the given filters (most-cited nodes survive the cap)."""
|
||||
if practice_area and practice_area not in _PRACTICE_AREAS:
|
||||
@@ -5799,6 +5800,7 @@ async def graph_corpus(
|
||||
district=district,
|
||||
year_from=year_from,
|
||||
year_to=year_to,
|
||||
metrics=metrics,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -35,6 +35,8 @@ from uuid import UUID
|
||||
import asyncpg
|
||||
from pydantic import BaseModel
|
||||
|
||||
from web import graph_metrics
|
||||
|
||||
# ── Node-type vocabulary ─────────────────────────────────────────────
|
||||
VALID_NODE_TYPES = {"precedent", "halacha", "topic", "practice_area"}
|
||||
DEFAULT_NODE_TYPES = ("precedent", "topic", "practice_area")
|
||||
@@ -63,6 +65,10 @@ class GraphNode(BaseModel):
|
||||
court: str | None = None # precedents only — for color-by / filter
|
||||
date: str | None = None # precedents only — ISO date, for recency color/filter
|
||||
case_law_id: str | None = None # canonical id for deep-link (precedents)
|
||||
# Graph metrics — populated only when ``metrics=true`` (precedents only).
|
||||
pagerank: float | None = None # normalized 0–1 (global influence)
|
||||
betweenness: float | None = None # normalized 0–1 (bridge-ness)
|
||||
community: int | None = None # dense cluster id, 0 = largest
|
||||
|
||||
|
||||
class GraphFacets(BaseModel):
|
||||
@@ -243,6 +249,7 @@ async def build_corpus_graph(
|
||||
district: str = "",
|
||||
year_from: int = 0,
|
||||
year_to: int = 0,
|
||||
metrics: bool = False,
|
||||
) -> CorpusGraph:
|
||||
"""Assemble the full corpus graph under the given filters.
|
||||
|
||||
@@ -250,6 +257,10 @@ async def build_corpus_graph(
|
||||
so clipping never hides the structurally important nodes. ``truncated`` +
|
||||
``total_available`` let the UI prompt the user to narrow filters. All
|
||||
filters are applied server-side in the WHERE clause (G5).
|
||||
|
||||
When ``metrics`` is true, PageRank / betweenness / community are computed
|
||||
in-memory over the precedent citation subgraph (``graph_metrics``) and
|
||||
stamped onto precedent nodes — no extra DB work (G2).
|
||||
"""
|
||||
types = normalize_node_types(node_types)
|
||||
cap = max(1, min(int(limit), NODE_CAP_MAX))
|
||||
@@ -298,6 +309,9 @@ async def build_corpus_graph(
|
||||
hub_nodes, edges = await _edges_and_hubs(conn, prec_rows, types)
|
||||
nodes.extend(hub_nodes)
|
||||
|
||||
if metrics:
|
||||
_stamp_metrics(nodes, edges)
|
||||
|
||||
return CorpusGraph(
|
||||
nodes=nodes,
|
||||
edges=edges,
|
||||
@@ -306,6 +320,23 @@ async def build_corpus_graph(
|
||||
)
|
||||
|
||||
|
||||
def _stamp_metrics(nodes: list[GraphNode], edges: list[GraphEdge]) -> None:
|
||||
"""Compute PageRank/betweenness/community over the precedent subgraph and
|
||||
stamp them onto precedent nodes in place (hubs stay ``None``)."""
|
||||
prec_ids = [n.id for n in nodes if n.type == "precedent"]
|
||||
if not prec_ids:
|
||||
return
|
||||
directed = [(e.source, e.target) for e in edges if e.type == "cites"]
|
||||
undirected = [(e.source, e.target) for e in edges if e.type == "same_chain"]
|
||||
m = graph_metrics.compute(prec_ids, directed, undirected)
|
||||
for n in nodes:
|
||||
mv = m.get(n.id)
|
||||
if mv:
|
||||
n.pagerank = mv["pagerank"]
|
||||
n.betweenness = mv["betweenness"]
|
||||
n.community = mv["community"]
|
||||
|
||||
|
||||
async def build_node_neighborhood(
|
||||
pool: asyncpg.Pool,
|
||||
node_id: str,
|
||||
|
||||
158
web/graph_metrics.py
Normal file
158
web/graph_metrics.py
Normal file
@@ -0,0 +1,158 @@
|
||||
"""Graph metrics for the corpus graph — dependency-free (no networkx).
|
||||
|
||||
Computed in-memory over the precedent citation subgraph that ``graph_api``
|
||||
already fetched (**G2**: no DB access here — pure functions over data the caller
|
||||
holds). The corpus graph is tiny (≤ ``NODE_CAP_MAX`` = 1500 nodes, sparse), so
|
||||
power-iteration PageRank, Brandes betweenness, and label-propagation communities
|
||||
all run synchronously well under a second.
|
||||
|
||||
Edge model: ``cites`` is directional (authority flows citing → cited);
|
||||
``same_chain`` is non-directional. PageRank uses cites-direction + same_chain
|
||||
both ways; betweenness and communities treat the whole graph as undirected.
|
||||
Determinism (stable colors across requests): nodes are processed in sorted
|
||||
order and ties break by lowest label — no randomness.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import Counter, defaultdict, deque
|
||||
|
||||
|
||||
def compute(
|
||||
node_ids: list[str],
|
||||
directed_edges: list[tuple[str, str]],
|
||||
undirected_edges: list[tuple[str, str]] | None = None,
|
||||
) -> dict[str, dict]:
|
||||
"""Return ``{node_id: {pagerank, betweenness, community}}``.
|
||||
|
||||
``pagerank`` / ``betweenness`` are normalized to max = 1.0 (easy client
|
||||
scaling); ``community`` is a dense int 0..k-1 ordered by descending cluster
|
||||
size (so the largest cluster is always colour 0).
|
||||
"""
|
||||
nodes = list(node_ids)
|
||||
node_set = set(nodes)
|
||||
if not nodes:
|
||||
return {}
|
||||
undirected_edges = undirected_edges or []
|
||||
|
||||
de = [(s, t) for s, t in directed_edges if s in node_set and t in node_set and s != t]
|
||||
ue = [(s, t) for s, t in undirected_edges if s in node_set and t in node_set and s != t]
|
||||
|
||||
pr = _normalize(_pagerank(nodes, de, ue))
|
||||
bt = _normalize(_betweenness(nodes, de, ue))
|
||||
comm = _communities(nodes, de, ue)
|
||||
|
||||
return {
|
||||
n: {
|
||||
"pagerank": round(pr[n], 4),
|
||||
"betweenness": round(bt[n], 4),
|
||||
"community": comm[n],
|
||||
}
|
||||
for n in nodes
|
||||
}
|
||||
|
||||
|
||||
def _normalize(d: dict[str, float]) -> dict[str, float]:
|
||||
m = max(d.values()) if d else 0.0
|
||||
if m <= 0:
|
||||
return {k: 0.0 for k in d}
|
||||
return {k: v / m for k, v in d.items()}
|
||||
|
||||
|
||||
def _undirected_adj(nodes: list[str], de, ue) -> dict[str, set[str]]:
|
||||
adj: dict[str, set[str]] = {n: set() for n in nodes}
|
||||
for s, t in de:
|
||||
adj[s].add(t)
|
||||
adj[t].add(s)
|
||||
for s, t in ue:
|
||||
adj[s].add(t)
|
||||
adj[t].add(s)
|
||||
return adj
|
||||
|
||||
|
||||
def _pagerank(nodes, de, ue, d: float = 0.85, iters: int = 100, tol: float = 1e-9):
|
||||
"""Power-iteration PageRank. cites direction + same_chain both ways."""
|
||||
out: dict[str, list[str]] = defaultdict(list)
|
||||
for s, t in de:
|
||||
out[s].append(t)
|
||||
for s, t in ue:
|
||||
out[s].append(t)
|
||||
out[t].append(s)
|
||||
n = len(nodes)
|
||||
pr = {x: 1.0 / n for x in nodes}
|
||||
for _ in range(iters):
|
||||
dangling = sum(pr[x] for x in nodes if not out[x])
|
||||
base = (1.0 - d) / n + d * dangling / n
|
||||
new = {x: base for x in nodes}
|
||||
for x in nodes:
|
||||
deg = len(out[x])
|
||||
if deg:
|
||||
share = d * pr[x] / deg
|
||||
for m in out[x]:
|
||||
new[m] += share
|
||||
if sum(abs(new[x] - pr[x]) for x in nodes) < tol:
|
||||
return new
|
||||
pr = new
|
||||
return pr
|
||||
|
||||
|
||||
def _betweenness(nodes, de, ue):
|
||||
"""Brandes betweenness on the undirected graph. O(V·(V+E))."""
|
||||
adj = _undirected_adj(nodes, de, ue)
|
||||
bc = {x: 0.0 for x in nodes}
|
||||
for s in nodes:
|
||||
stack: list[str] = []
|
||||
preds: dict[str, list[str]] = {w: [] for w in nodes}
|
||||
sigma = {w: 0.0 for w in nodes}
|
||||
sigma[s] = 1.0
|
||||
dist = {w: -1 for w in nodes}
|
||||
dist[s] = 0
|
||||
queue = deque([s])
|
||||
while queue:
|
||||
v = queue.popleft()
|
||||
stack.append(v)
|
||||
for w in adj[v]:
|
||||
if dist[w] < 0:
|
||||
dist[w] = dist[v] + 1
|
||||
queue.append(w)
|
||||
if dist[w] == dist[v] + 1:
|
||||
sigma[w] += sigma[v]
|
||||
preds[w].append(v)
|
||||
delta = {w: 0.0 for w in nodes}
|
||||
while stack:
|
||||
w = stack.pop()
|
||||
for v in preds[w]:
|
||||
if sigma[w]:
|
||||
delta[v] += (sigma[v] / sigma[w]) * (1.0 + delta[w])
|
||||
if w != s:
|
||||
bc[w] += delta[w]
|
||||
# Undirected: each shortest path counted from both endpoints.
|
||||
return {x: v / 2.0 for x, v in bc.items()}
|
||||
|
||||
|
||||
def _communities(nodes, de, ue) -> dict[str, int]:
|
||||
"""Deterministic synchronous label propagation (+ dense renumbering).
|
||||
|
||||
Each node starts in its own community and repeatedly adopts the most common
|
||||
label among its neighbours (ties → lowest label). Isolated nodes keep their
|
||||
own singleton community. Labels are renumbered 0..k-1 by descending size.
|
||||
"""
|
||||
adj = _undirected_adj(nodes, de, ue)
|
||||
order = sorted(nodes)
|
||||
label = {n: n for n in nodes}
|
||||
for _ in range(30):
|
||||
changed = False
|
||||
for n in order:
|
||||
neigh = adj[n]
|
||||
if not neigh:
|
||||
continue
|
||||
counts = Counter(label[m] for m in neigh)
|
||||
best = min(counts.items(), key=lambda kv: (-kv[1], kv[0]))[0]
|
||||
if label[n] != best:
|
||||
label[n] = best
|
||||
changed = True
|
||||
if not changed:
|
||||
break
|
||||
sizes = Counter(label.values())
|
||||
ranked = [lab for lab, _ in sorted(sizes.items(), key=lambda kv: (-kv[1], kv[0]))]
|
||||
remap = {lab: i for i, lab in enumerate(ranked)}
|
||||
return {n: remap[label[n]] for n in nodes}
|
||||
Reference in New Issue
Block a user