feat(graph): research-gap (ghost) nodes (corpus graph PR C) #131

Merged
chaim merged 1 commits from worktree-graph-gaps into main 2026-06-07 21:22:19 +00:00
6 changed files with 151 additions and 7 deletions

View File

@@ -50,6 +50,7 @@ const NODE_COLORS: Record<string, string> = {
halacha: "#b45309", // amber
topic: "#a97d3a", // gold — hubs stand out
practice_area: "#475569", // slate
gap: "#94a3b8", // faint slate — research gap (not in corpus)
};
const TREATMENT_COLORS: Record<string, string> = {
@@ -215,13 +216,23 @@ export function GraphCanvas({
(node: FGNode, ctx: CanvasRenderingContext2D, globalScale: number) => {
const r = radiusForNode(node, sizeBy);
const dimmed = isDimmed(node.id);
const isGap = node.type === "gap";
const color = colorForNode(node, colorBy);
ctx.globalAlpha = dimmed ? 0.18 : 1;
ctx.globalAlpha = dimmed ? 0.18 : isGap ? 0.55 : 1;
ctx.beginPath();
ctx.arc(node.x ?? 0, node.y ?? 0, r, 0, 2 * Math.PI);
ctx.fillStyle = color;
ctx.fill();
if (isGap) {
// Hollow dashed circle — a ruling cited but absent from the corpus.
ctx.setLineDash([3 / globalScale, 2 / globalScale]);
ctx.lineWidth = 1.3 / globalScale;
ctx.strokeStyle = NODE_COLORS.gap;
ctx.stroke();
ctx.setLineDash([]);
} else {
ctx.fillStyle = color;
ctx.fill();
}
if (node.id === activeId) {
ctx.lineWidth = 2 / globalScale;
ctx.strokeStyle = "#a97d3a";

View File

@@ -43,6 +43,7 @@ export type GraphControls = {
showTopics: boolean;
showPracticeAreas: boolean;
showHalachot: boolean;
showGaps: boolean;
};
const ALL = "__all__";
@@ -259,6 +260,11 @@ export function GraphFilterPanel({
checked={controls.showPracticeAreas}
onCheckedChange={(v) => onChange({ showPracticeAreas: v })}
/>
<ToggleRow
label="חוסרי מחקר (פסיקה חסרה)"
checked={controls.showGaps}
onCheckedChange={(v) => onChange({ showGaps: v })}
/>
<ToggleRow
label="הלכות (שלב ב׳)"
checked={controls.showHalachot}

View File

@@ -19,6 +19,14 @@ const TYPE_LABELS: Record<string, string> = {
halacha: "הלכה",
topic: "נושא",
practice_area: "תחום",
gap: "פסיקה חסרה",
};
const GAP_STATUS_LABELS: Record<string, string> = {
open: "ממתינה לקליטה",
uploaded: "הועלתה",
closed: "טופלה",
irrelevant: "לא רלוונטית",
};
const PA_LABELS: Record<string, string> = {
@@ -43,6 +51,7 @@ export function GraphNodePanel({
onClose: () => void;
}) {
const isPrecedentLike = node.type === "precedent" || node.type === "halacha";
const isGap = node.type === "gap";
return (
<Card className="bg-surface border-rule shadow-sm w-80 shrink-0 overflow-y-auto">
<CardContent className="space-y-4 p-4">
@@ -75,7 +84,21 @@ export function GraphNodePanel({
<Row label="מקור" value={SOURCE_LABELS[node.source_kind] ?? node.source_kind} />
)}
{node.precedent_level && <Row label="דרגה" value={node.precedent_level} />}
{!isPrecedentLike && (
{isGap && (
<>
<Row label="מצוטטת ע״י" value={`${node.size} פסיקות בקורפוס`} />
{node.gap_status && (
<Row
label="סטטוס"
value={GAP_STATUS_LABELS[node.gap_status] ?? node.gap_status}
/>
)}
<p className="text-ink-muted text-xs leading-relaxed m-0">
פסיקה זו מצוטטת בקורפוס אך אינה קיימת בו מועמדת לקליטה.
</p>
</>
)}
{!isPrecedentLike && !isGap && (
<p className="text-ink-muted text-xs leading-relaxed m-0">
לחיצה על נקודה זו מתמקדת בשכניה כל הפסיקות המשויכות אליה.
</p>
@@ -90,6 +113,15 @@ export function GraphNodePanel({
</Link>
</Button>
)}
{isGap && (
<Button asChild variant="outline" className="w-full">
<Link href="/missing-precedents">
<ExternalLink className="size-4 me-2" />
לרשימת הפסיקה החסרה
</Link>
</Button>
)}
</CardContent>
</Card>
);

View File

@@ -65,6 +65,7 @@ export function GraphView() {
showTopics: true,
showPracticeAreas: true,
showHalachot: false,
showGaps: false,
});
const facets = useGraphFacets().data;
const [selectedNode, setSelectedNode] = useState<GraphNode | null>(null);
@@ -78,8 +79,14 @@ export function GraphView() {
if (controls.showTopics) t.push("topic");
if (controls.showPracticeAreas) t.push("practice_area");
if (controls.showHalachot) t.push("halacha");
if (controls.showGaps) t.push("gap");
return t.join(",");
}, [controls.showTopics, controls.showPracticeAreas, controls.showHalachot]);
}, [
controls.showTopics,
controls.showPracticeAreas,
controls.showHalachot,
controls.showGaps,
]);
// Metrics are needed when colouring by cluster or sizing by a centrality.
const metricsOn =

View File

@@ -14,7 +14,12 @@
import { keepPreviousData, useQuery } from "@tanstack/react-query";
import { apiRequest } from "./client";
export type GraphNodeType = "precedent" | "halacha" | "topic" | "practice_area";
export type GraphNodeType =
| "precedent"
| "halacha"
| "topic"
| "practice_area"
| "gap";
export type GraphEdgeType =
| "cites"
@@ -38,6 +43,8 @@ export type GraphNode = {
pagerank: number | null; // normalized 01, only when metrics requested
betweenness: number | null; // normalized 01
community: number | null; // dense cluster id, 0 = largest
gap_status: string | null; // gap nodes only — open|uploaded|closed|irrelevant
missing_precedent_id: string | null; // gap nodes only
};
export type GraphFacets = {

View File

@@ -38,7 +38,7 @@ from pydantic import BaseModel
from web import graph_metrics
# ── Node-type vocabulary ─────────────────────────────────────────────
VALID_NODE_TYPES = {"precedent", "halacha", "topic", "practice_area"}
VALID_NODE_TYPES = {"precedent", "halacha", "topic", "practice_area", "gap"}
DEFAULT_NODE_TYPES = ("precedent", "topic", "practice_area")
NODE_CAP_DEFAULT = 400
NODE_CAP_MAX = 1500
@@ -69,6 +69,9 @@ class GraphNode(BaseModel):
pagerank: float | None = None # normalized 01 (global influence)
betweenness: float | None = None # normalized 01 (bridge-ness)
community: int | None = None # dense cluster id, 0 = largest
# Gap nodes only — research-gap status from missing_precedents (best-effort).
gap_status: str | None = None # open | uploaded | closed | irrelevant
missing_precedent_id: str | None = None
class GraphFacets(BaseModel):
@@ -233,6 +236,76 @@ async def _edges_and_hubs(
return hub_nodes, edges
_NORM_NUM = "regexp_replace(btrim(cited_case_number), '\\s+', ' ', 'g')"
async def _gap_nodes_and_edges(
conn: asyncpg.Connection,
prec_ids: list,
) -> tuple[list[GraphNode], list[GraphEdge]]:
"""Research-gap ("ghost") nodes: precedents that are CITED but not in the
corpus (``precedent_internal_citations.cited_case_law_id IS NULL``).
One ``gap:<normalized citation>`` node per distinct cited number, sized by
how many corpus precedents cite it (global — the "most-wanted missing
precedent"). Edges only from citing precedents present in ``prec_ids`` so no
edge dangles. Best-effort enriched with ``missing_precedents`` status via an
exact normalized-citation match (an unmatched gap still renders)."""
nodes: list[GraphNode] = []
edges: list[GraphEdge] = []
if not prec_ids:
return nodes, edges
# Edges from the displayed precedents to the numbers they cite.
edge_rows = await conn.fetch(
f"""
SELECT {_NORM_NUM} AS num, source_case_law_id AS s
FROM precedent_internal_citations
WHERE cited_case_law_id IS NULL AND btrim(cited_case_number) <> ''
AND source_case_law_id = ANY($1::uuid[])
""",
prec_ids,
)
if not edge_rows:
return nodes, edges
nums = {r["num"] for r in edge_rows}
# Global in-degree per number (importance), independent of the cap.
indeg_rows = await conn.fetch(
f"""
SELECT {_NORM_NUM} AS num, COUNT(*) AS n
FROM precedent_internal_citations
WHERE cited_case_law_id IS NULL AND btrim(cited_case_number) <> ''
GROUP BY 1
"""
)
indeg = {r["num"]: int(r["n"]) for r in indeg_rows}
# Best-effort enrichment from missing_precedents (exact normalized match).
mp_rows = await conn.fetch(
"SELECT id, regexp_replace(btrim(citation), '\\s+', ' ', 'g') AS num, status "
"FROM missing_precedents"
)
mp = {r["num"]: (str(r["id"]), r["status"]) for r in mp_rows if r["num"]}
for num in sorted(nums):
gid = f"gap:{num}"
match = mp.get(num)
nodes.append(
GraphNode(
id=gid,
type="gap",
label=num,
size=indeg.get(num, 1),
gap_status=(match[1] if match else None),
missing_precedent_id=(match[0] if match else None),
)
)
for r in edge_rows:
edges.append(GraphEdge(source=f"cl:{r['s']}", target=f"gap:{r['num']}", type="cites"))
return nodes, edges
# ── Endpoints' core logic ────────────────────────────────────────────
async def build_corpus_graph(
pool: asyncpg.Pool,
@@ -308,6 +381,10 @@ async def build_corpus_graph(
nodes = [_precedent_node(r) for r in prec_rows]
hub_nodes, edges = await _edges_and_hubs(conn, prec_rows, types)
nodes.extend(hub_nodes)
if "gap" in types:
gap_nodes, gap_edges = await _gap_nodes_and_edges(conn, [r["id"] for r in prec_rows])
nodes.extend(gap_nodes)
edges.extend(gap_edges)
if metrics:
_stamp_metrics(nodes, edges)
@@ -441,6 +518,10 @@ async def build_node_neighborhood(
nodes = [_precedent_node(r) for r in prec_rows]
hub_nodes, edges = await _edges_and_hubs(conn, prec_rows, forced_types)
nodes.extend(hub_nodes)
if "gap" in forced_types:
gap_nodes, gap_edges = await _gap_nodes_and_edges(conn, [r["id"] for r in prec_rows])
nodes.extend(gap_nodes)
edges.extend(gap_edges)
return CorpusGraph(
nodes=nodes,