From 9a126f7c3628290daac9e706d37ad8dc06f90871 Mon Sep 17 00:00:00 2001 From: Chaim Date: Sun, 7 Jun 2026 21:21:53 +0000 Subject: [PATCH] feat(graph): research-gap (ghost) nodes (corpus graph PR C) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Turns the graph into a gap-finder: the 247 unresolved internal citations (a corpus precedent cites a ruling NOT in the corpus) collapse to 230 distinct "gap" nodes — each sized by how many corpus precedents cite it, i.e. the most-wanted missing precedent. Backend (web/graph_api.py — read-only, G2): - "gap" added to VALID_NODE_TYPES (NOT default → off unless requested). - New _gap_nodes_and_edges(): gap: nodes from precedent_internal_citations WHERE cited_case_law_id IS NULL, sized by global in-degree; cites edges only from precedents present in the view (dangling-edge invariant holds). Best-effort enrichment from missing_precedents via exact normalized-citation match → gap_status + missing_precedent_id. Validated: 230 gaps, top ע"א 3213/97 (cited 5×), 230/230 matched to missing_precedents. - GraphNode += gap_status, missing_precedent_id. Metrics correctly exclude gap edges (target not a precedent). No app.py change (gated via node_types). Frontend: - graph.ts: GraphNodeType += "gap"; node fields. - graph-filter-panel: toggle "חוסרי מחקר (פסיקה חסרה)" (off by default). - graph-canvas: gaps render as faint hollow dashed circles, never recoloured by color-by; sized by citation count. - graph-node-panel: gap branch — "מצוטטת ע״י N פסיקות" + status badge + link to /missing-precedents. web-ui build + lint pass. Invariants: G2 (SELECT-only), UI2 (model grows on explicit Pydantic). api:types post-deploy. Co-Authored-By: Claude Opus 4.8 (1M context) --- web-ui/src/components/graph/graph-canvas.tsx | 17 +++- .../components/graph/graph-filter-panel.tsx | 6 ++ .../src/components/graph/graph-node-panel.tsx | 34 +++++++- web-ui/src/components/graph/graph-view.tsx | 9 +- web-ui/src/lib/api/graph.ts | 9 +- web/graph_api.py | 83 ++++++++++++++++++- 6 files changed, 151 insertions(+), 7 deletions(-) diff --git a/web-ui/src/components/graph/graph-canvas.tsx b/web-ui/src/components/graph/graph-canvas.tsx index 742e100..5a13b50 100644 --- a/web-ui/src/components/graph/graph-canvas.tsx +++ b/web-ui/src/components/graph/graph-canvas.tsx @@ -50,6 +50,7 @@ const NODE_COLORS: Record = { halacha: "#b45309", // amber topic: "#a97d3a", // gold — hubs stand out practice_area: "#475569", // slate + gap: "#94a3b8", // faint slate — research gap (not in corpus) }; const TREATMENT_COLORS: Record = { @@ -215,13 +216,23 @@ export function GraphCanvas({ (node: FGNode, ctx: CanvasRenderingContext2D, globalScale: number) => { const r = radiusForNode(node, sizeBy); const dimmed = isDimmed(node.id); + const isGap = node.type === "gap"; const color = colorForNode(node, colorBy); - ctx.globalAlpha = dimmed ? 0.18 : 1; + ctx.globalAlpha = dimmed ? 0.18 : isGap ? 0.55 : 1; ctx.beginPath(); ctx.arc(node.x ?? 0, node.y ?? 0, r, 0, 2 * Math.PI); - ctx.fillStyle = color; - ctx.fill(); + if (isGap) { + // Hollow dashed circle — a ruling cited but absent from the corpus. + ctx.setLineDash([3 / globalScale, 2 / globalScale]); + ctx.lineWidth = 1.3 / globalScale; + ctx.strokeStyle = NODE_COLORS.gap; + ctx.stroke(); + ctx.setLineDash([]); + } else { + ctx.fillStyle = color; + ctx.fill(); + } if (node.id === activeId) { ctx.lineWidth = 2 / globalScale; ctx.strokeStyle = "#a97d3a"; diff --git a/web-ui/src/components/graph/graph-filter-panel.tsx b/web-ui/src/components/graph/graph-filter-panel.tsx index 778830c..c48489d 100644 --- a/web-ui/src/components/graph/graph-filter-panel.tsx +++ b/web-ui/src/components/graph/graph-filter-panel.tsx @@ -43,6 +43,7 @@ export type GraphControls = { showTopics: boolean; showPracticeAreas: boolean; showHalachot: boolean; + showGaps: boolean; }; const ALL = "__all__"; @@ -259,6 +260,11 @@ export function GraphFilterPanel({ checked={controls.showPracticeAreas} onCheckedChange={(v) => onChange({ showPracticeAreas: v })} /> + onChange({ showGaps: v })} + /> = { halacha: "הלכה", topic: "נושא", practice_area: "תחום", + gap: "פסיקה חסרה", +}; + +const GAP_STATUS_LABELS: Record = { + open: "ממתינה לקליטה", + uploaded: "הועלתה", + closed: "טופלה", + irrelevant: "לא רלוונטית", }; const PA_LABELS: Record = { @@ -43,6 +51,7 @@ export function GraphNodePanel({ onClose: () => void; }) { const isPrecedentLike = node.type === "precedent" || node.type === "halacha"; + const isGap = node.type === "gap"; return ( @@ -75,7 +84,21 @@ export function GraphNodePanel({ )} {node.precedent_level && } - {!isPrecedentLike && ( + {isGap && ( + <> + + {node.gap_status && ( + + )} +

+ פסיקה זו מצוטטת בקורפוס אך אינה קיימת בו — מועמדת לקליטה. +

+ + )} + {!isPrecedentLike && !isGap && (

לחיצה על נקודה זו מתמקדת בשכניה — כל הפסיקות המשויכות אליה.

@@ -90,6 +113,15 @@ export function GraphNodePanel({ )} + + {isGap && ( + + )}
); diff --git a/web-ui/src/components/graph/graph-view.tsx b/web-ui/src/components/graph/graph-view.tsx index 781692b..14706cd 100644 --- a/web-ui/src/components/graph/graph-view.tsx +++ b/web-ui/src/components/graph/graph-view.tsx @@ -65,6 +65,7 @@ export function GraphView() { showTopics: true, showPracticeAreas: true, showHalachot: false, + showGaps: false, }); const facets = useGraphFacets().data; const [selectedNode, setSelectedNode] = useState(null); @@ -78,8 +79,14 @@ export function GraphView() { if (controls.showTopics) t.push("topic"); if (controls.showPracticeAreas) t.push("practice_area"); if (controls.showHalachot) t.push("halacha"); + if (controls.showGaps) t.push("gap"); return t.join(","); - }, [controls.showTopics, controls.showPracticeAreas, controls.showHalachot]); + }, [ + controls.showTopics, + controls.showPracticeAreas, + controls.showHalachot, + controls.showGaps, + ]); // Metrics are needed when colouring by cluster or sizing by a centrality. const metricsOn = diff --git a/web-ui/src/lib/api/graph.ts b/web-ui/src/lib/api/graph.ts index 4a95060..ed9a4ac 100644 --- a/web-ui/src/lib/api/graph.ts +++ b/web-ui/src/lib/api/graph.ts @@ -14,7 +14,12 @@ import { keepPreviousData, useQuery } from "@tanstack/react-query"; import { apiRequest } from "./client"; -export type GraphNodeType = "precedent" | "halacha" | "topic" | "practice_area"; +export type GraphNodeType = + | "precedent" + | "halacha" + | "topic" + | "practice_area" + | "gap"; export type GraphEdgeType = | "cites" @@ -38,6 +43,8 @@ export type GraphNode = { pagerank: number | null; // normalized 0–1, only when metrics requested betweenness: number | null; // normalized 0–1 community: number | null; // dense cluster id, 0 = largest + gap_status: string | null; // gap nodes only — open|uploaded|closed|irrelevant + missing_precedent_id: string | null; // gap nodes only }; export type GraphFacets = { diff --git a/web/graph_api.py b/web/graph_api.py index 99ca117..21d627d 100644 --- a/web/graph_api.py +++ b/web/graph_api.py @@ -38,7 +38,7 @@ from pydantic import BaseModel from web import graph_metrics # ── Node-type vocabulary ───────────────────────────────────────────── -VALID_NODE_TYPES = {"precedent", "halacha", "topic", "practice_area"} +VALID_NODE_TYPES = {"precedent", "halacha", "topic", "practice_area", "gap"} DEFAULT_NODE_TYPES = ("precedent", "topic", "practice_area") NODE_CAP_DEFAULT = 400 NODE_CAP_MAX = 1500 @@ -69,6 +69,9 @@ class GraphNode(BaseModel): pagerank: float | None = None # normalized 0–1 (global influence) betweenness: float | None = None # normalized 0–1 (bridge-ness) community: int | None = None # dense cluster id, 0 = largest + # Gap nodes only — research-gap status from missing_precedents (best-effort). + gap_status: str | None = None # open | uploaded | closed | irrelevant + missing_precedent_id: str | None = None class GraphFacets(BaseModel): @@ -233,6 +236,76 @@ async def _edges_and_hubs( return hub_nodes, edges +_NORM_NUM = "regexp_replace(btrim(cited_case_number), '\\s+', ' ', 'g')" + + +async def _gap_nodes_and_edges( + conn: asyncpg.Connection, + prec_ids: list, +) -> tuple[list[GraphNode], list[GraphEdge]]: + """Research-gap ("ghost") nodes: precedents that are CITED but not in the + corpus (``precedent_internal_citations.cited_case_law_id IS NULL``). + + One ``gap:`` node per distinct cited number, sized by + how many corpus precedents cite it (global — the "most-wanted missing + precedent"). Edges only from citing precedents present in ``prec_ids`` so no + edge dangles. Best-effort enriched with ``missing_precedents`` status via an + exact normalized-citation match (an unmatched gap still renders).""" + nodes: list[GraphNode] = [] + edges: list[GraphEdge] = [] + if not prec_ids: + return nodes, edges + + # Edges from the displayed precedents to the numbers they cite. + edge_rows = await conn.fetch( + f""" + SELECT {_NORM_NUM} AS num, source_case_law_id AS s + FROM precedent_internal_citations + WHERE cited_case_law_id IS NULL AND btrim(cited_case_number) <> '' + AND source_case_law_id = ANY($1::uuid[]) + """, + prec_ids, + ) + if not edge_rows: + return nodes, edges + nums = {r["num"] for r in edge_rows} + + # Global in-degree per number (importance), independent of the cap. + indeg_rows = await conn.fetch( + f""" + SELECT {_NORM_NUM} AS num, COUNT(*) AS n + FROM precedent_internal_citations + WHERE cited_case_law_id IS NULL AND btrim(cited_case_number) <> '' + GROUP BY 1 + """ + ) + indeg = {r["num"]: int(r["n"]) for r in indeg_rows} + + # Best-effort enrichment from missing_precedents (exact normalized match). + mp_rows = await conn.fetch( + "SELECT id, regexp_replace(btrim(citation), '\\s+', ' ', 'g') AS num, status " + "FROM missing_precedents" + ) + mp = {r["num"]: (str(r["id"]), r["status"]) for r in mp_rows if r["num"]} + + for num in sorted(nums): + gid = f"gap:{num}" + match = mp.get(num) + nodes.append( + GraphNode( + id=gid, + type="gap", + label=num, + size=indeg.get(num, 1), + gap_status=(match[1] if match else None), + missing_precedent_id=(match[0] if match else None), + ) + ) + for r in edge_rows: + edges.append(GraphEdge(source=f"cl:{r['s']}", target=f"gap:{r['num']}", type="cites")) + return nodes, edges + + # ── Endpoints' core logic ──────────────────────────────────────────── async def build_corpus_graph( pool: asyncpg.Pool, @@ -308,6 +381,10 @@ async def build_corpus_graph( nodes = [_precedent_node(r) for r in prec_rows] hub_nodes, edges = await _edges_and_hubs(conn, prec_rows, types) nodes.extend(hub_nodes) + if "gap" in types: + gap_nodes, gap_edges = await _gap_nodes_and_edges(conn, [r["id"] for r in prec_rows]) + nodes.extend(gap_nodes) + edges.extend(gap_edges) if metrics: _stamp_metrics(nodes, edges) @@ -441,6 +518,10 @@ async def build_node_neighborhood( nodes = [_precedent_node(r) for r in prec_rows] hub_nodes, edges = await _edges_and_hubs(conn, prec_rows, forced_types) nodes.extend(hub_nodes) + if "gap" in forced_types: + gap_nodes, gap_edges = await _gap_nodes_and_edges(conn, [r["id"] for r in prec_rows]) + nodes.extend(gap_nodes) + edges.extend(gap_edges) return CorpusGraph( nodes=nodes,