feat(graph): research-gap (ghost) nodes (corpus graph PR C) #131
@@ -50,6 +50,7 @@ const NODE_COLORS: Record<string, string> = {
|
||||
halacha: "#b45309", // amber
|
||||
topic: "#a97d3a", // gold — hubs stand out
|
||||
practice_area: "#475569", // slate
|
||||
gap: "#94a3b8", // faint slate — research gap (not in corpus)
|
||||
};
|
||||
|
||||
const TREATMENT_COLORS: Record<string, string> = {
|
||||
@@ -215,13 +216,23 @@ export function GraphCanvas({
|
||||
(node: FGNode, ctx: CanvasRenderingContext2D, globalScale: number) => {
|
||||
const r = radiusForNode(node, sizeBy);
|
||||
const dimmed = isDimmed(node.id);
|
||||
const isGap = node.type === "gap";
|
||||
const color = colorForNode(node, colorBy);
|
||||
ctx.globalAlpha = dimmed ? 0.18 : 1;
|
||||
ctx.globalAlpha = dimmed ? 0.18 : isGap ? 0.55 : 1;
|
||||
|
||||
ctx.beginPath();
|
||||
ctx.arc(node.x ?? 0, node.y ?? 0, r, 0, 2 * Math.PI);
|
||||
ctx.fillStyle = color;
|
||||
ctx.fill();
|
||||
if (isGap) {
|
||||
// Hollow dashed circle — a ruling cited but absent from the corpus.
|
||||
ctx.setLineDash([3 / globalScale, 2 / globalScale]);
|
||||
ctx.lineWidth = 1.3 / globalScale;
|
||||
ctx.strokeStyle = NODE_COLORS.gap;
|
||||
ctx.stroke();
|
||||
ctx.setLineDash([]);
|
||||
} else {
|
||||
ctx.fillStyle = color;
|
||||
ctx.fill();
|
||||
}
|
||||
if (node.id === activeId) {
|
||||
ctx.lineWidth = 2 / globalScale;
|
||||
ctx.strokeStyle = "#a97d3a";
|
||||
|
||||
@@ -43,6 +43,7 @@ export type GraphControls = {
|
||||
showTopics: boolean;
|
||||
showPracticeAreas: boolean;
|
||||
showHalachot: boolean;
|
||||
showGaps: boolean;
|
||||
};
|
||||
|
||||
const ALL = "__all__";
|
||||
@@ -259,6 +260,11 @@ export function GraphFilterPanel({
|
||||
checked={controls.showPracticeAreas}
|
||||
onCheckedChange={(v) => onChange({ showPracticeAreas: v })}
|
||||
/>
|
||||
<ToggleRow
|
||||
label="חוסרי מחקר (פסיקה חסרה)"
|
||||
checked={controls.showGaps}
|
||||
onCheckedChange={(v) => onChange({ showGaps: v })}
|
||||
/>
|
||||
<ToggleRow
|
||||
label="הלכות (שלב ב׳)"
|
||||
checked={controls.showHalachot}
|
||||
|
||||
@@ -19,6 +19,14 @@ const TYPE_LABELS: Record<string, string> = {
|
||||
halacha: "הלכה",
|
||||
topic: "נושא",
|
||||
practice_area: "תחום",
|
||||
gap: "פסיקה חסרה",
|
||||
};
|
||||
|
||||
const GAP_STATUS_LABELS: Record<string, string> = {
|
||||
open: "ממתינה לקליטה",
|
||||
uploaded: "הועלתה",
|
||||
closed: "טופלה",
|
||||
irrelevant: "לא רלוונטית",
|
||||
};
|
||||
|
||||
const PA_LABELS: Record<string, string> = {
|
||||
@@ -43,6 +51,7 @@ export function GraphNodePanel({
|
||||
onClose: () => void;
|
||||
}) {
|
||||
const isPrecedentLike = node.type === "precedent" || node.type === "halacha";
|
||||
const isGap = node.type === "gap";
|
||||
return (
|
||||
<Card className="bg-surface border-rule shadow-sm w-80 shrink-0 overflow-y-auto">
|
||||
<CardContent className="space-y-4 p-4">
|
||||
@@ -75,7 +84,21 @@ export function GraphNodePanel({
|
||||
<Row label="מקור" value={SOURCE_LABELS[node.source_kind] ?? node.source_kind} />
|
||||
)}
|
||||
{node.precedent_level && <Row label="דרגה" value={node.precedent_level} />}
|
||||
{!isPrecedentLike && (
|
||||
{isGap && (
|
||||
<>
|
||||
<Row label="מצוטטת ע״י" value={`${node.size} פסיקות בקורפוס`} />
|
||||
{node.gap_status && (
|
||||
<Row
|
||||
label="סטטוס"
|
||||
value={GAP_STATUS_LABELS[node.gap_status] ?? node.gap_status}
|
||||
/>
|
||||
)}
|
||||
<p className="text-ink-muted text-xs leading-relaxed m-0">
|
||||
פסיקה זו מצוטטת בקורפוס אך אינה קיימת בו — מועמדת לקליטה.
|
||||
</p>
|
||||
</>
|
||||
)}
|
||||
{!isPrecedentLike && !isGap && (
|
||||
<p className="text-ink-muted text-xs leading-relaxed m-0">
|
||||
לחיצה על נקודה זו מתמקדת בשכניה — כל הפסיקות המשויכות אליה.
|
||||
</p>
|
||||
@@ -90,6 +113,15 @@ export function GraphNodePanel({
|
||||
</Link>
|
||||
</Button>
|
||||
)}
|
||||
|
||||
{isGap && (
|
||||
<Button asChild variant="outline" className="w-full">
|
||||
<Link href="/missing-precedents">
|
||||
<ExternalLink className="size-4 me-2" />
|
||||
לרשימת הפסיקה החסרה
|
||||
</Link>
|
||||
</Button>
|
||||
)}
|
||||
</CardContent>
|
||||
</Card>
|
||||
);
|
||||
|
||||
@@ -65,6 +65,7 @@ export function GraphView() {
|
||||
showTopics: true,
|
||||
showPracticeAreas: true,
|
||||
showHalachot: false,
|
||||
showGaps: false,
|
||||
});
|
||||
const facets = useGraphFacets().data;
|
||||
const [selectedNode, setSelectedNode] = useState<GraphNode | null>(null);
|
||||
@@ -78,8 +79,14 @@ export function GraphView() {
|
||||
if (controls.showTopics) t.push("topic");
|
||||
if (controls.showPracticeAreas) t.push("practice_area");
|
||||
if (controls.showHalachot) t.push("halacha");
|
||||
if (controls.showGaps) t.push("gap");
|
||||
return t.join(",");
|
||||
}, [controls.showTopics, controls.showPracticeAreas, controls.showHalachot]);
|
||||
}, [
|
||||
controls.showTopics,
|
||||
controls.showPracticeAreas,
|
||||
controls.showHalachot,
|
||||
controls.showGaps,
|
||||
]);
|
||||
|
||||
// Metrics are needed when colouring by cluster or sizing by a centrality.
|
||||
const metricsOn =
|
||||
|
||||
@@ -14,7 +14,12 @@
|
||||
import { keepPreviousData, useQuery } from "@tanstack/react-query";
|
||||
import { apiRequest } from "./client";
|
||||
|
||||
export type GraphNodeType = "precedent" | "halacha" | "topic" | "practice_area";
|
||||
export type GraphNodeType =
|
||||
| "precedent"
|
||||
| "halacha"
|
||||
| "topic"
|
||||
| "practice_area"
|
||||
| "gap";
|
||||
|
||||
export type GraphEdgeType =
|
||||
| "cites"
|
||||
@@ -38,6 +43,8 @@ export type GraphNode = {
|
||||
pagerank: number | null; // normalized 0–1, only when metrics requested
|
||||
betweenness: number | null; // normalized 0–1
|
||||
community: number | null; // dense cluster id, 0 = largest
|
||||
gap_status: string | null; // gap nodes only — open|uploaded|closed|irrelevant
|
||||
missing_precedent_id: string | null; // gap nodes only
|
||||
};
|
||||
|
||||
export type GraphFacets = {
|
||||
|
||||
@@ -38,7 +38,7 @@ from pydantic import BaseModel
|
||||
from web import graph_metrics
|
||||
|
||||
# ── Node-type vocabulary ─────────────────────────────────────────────
|
||||
VALID_NODE_TYPES = {"precedent", "halacha", "topic", "practice_area"}
|
||||
VALID_NODE_TYPES = {"precedent", "halacha", "topic", "practice_area", "gap"}
|
||||
DEFAULT_NODE_TYPES = ("precedent", "topic", "practice_area")
|
||||
NODE_CAP_DEFAULT = 400
|
||||
NODE_CAP_MAX = 1500
|
||||
@@ -69,6 +69,9 @@ class GraphNode(BaseModel):
|
||||
pagerank: float | None = None # normalized 0–1 (global influence)
|
||||
betweenness: float | None = None # normalized 0–1 (bridge-ness)
|
||||
community: int | None = None # dense cluster id, 0 = largest
|
||||
# Gap nodes only — research-gap status from missing_precedents (best-effort).
|
||||
gap_status: str | None = None # open | uploaded | closed | irrelevant
|
||||
missing_precedent_id: str | None = None
|
||||
|
||||
|
||||
class GraphFacets(BaseModel):
|
||||
@@ -233,6 +236,76 @@ async def _edges_and_hubs(
|
||||
return hub_nodes, edges
|
||||
|
||||
|
||||
_NORM_NUM = "regexp_replace(btrim(cited_case_number), '\\s+', ' ', 'g')"
|
||||
|
||||
|
||||
async def _gap_nodes_and_edges(
|
||||
conn: asyncpg.Connection,
|
||||
prec_ids: list,
|
||||
) -> tuple[list[GraphNode], list[GraphEdge]]:
|
||||
"""Research-gap ("ghost") nodes: precedents that are CITED but not in the
|
||||
corpus (``precedent_internal_citations.cited_case_law_id IS NULL``).
|
||||
|
||||
One ``gap:<normalized citation>`` node per distinct cited number, sized by
|
||||
how many corpus precedents cite it (global — the "most-wanted missing
|
||||
precedent"). Edges only from citing precedents present in ``prec_ids`` so no
|
||||
edge dangles. Best-effort enriched with ``missing_precedents`` status via an
|
||||
exact normalized-citation match (an unmatched gap still renders)."""
|
||||
nodes: list[GraphNode] = []
|
||||
edges: list[GraphEdge] = []
|
||||
if not prec_ids:
|
||||
return nodes, edges
|
||||
|
||||
# Edges from the displayed precedents to the numbers they cite.
|
||||
edge_rows = await conn.fetch(
|
||||
f"""
|
||||
SELECT {_NORM_NUM} AS num, source_case_law_id AS s
|
||||
FROM precedent_internal_citations
|
||||
WHERE cited_case_law_id IS NULL AND btrim(cited_case_number) <> ''
|
||||
AND source_case_law_id = ANY($1::uuid[])
|
||||
""",
|
||||
prec_ids,
|
||||
)
|
||||
if not edge_rows:
|
||||
return nodes, edges
|
||||
nums = {r["num"] for r in edge_rows}
|
||||
|
||||
# Global in-degree per number (importance), independent of the cap.
|
||||
indeg_rows = await conn.fetch(
|
||||
f"""
|
||||
SELECT {_NORM_NUM} AS num, COUNT(*) AS n
|
||||
FROM precedent_internal_citations
|
||||
WHERE cited_case_law_id IS NULL AND btrim(cited_case_number) <> ''
|
||||
GROUP BY 1
|
||||
"""
|
||||
)
|
||||
indeg = {r["num"]: int(r["n"]) for r in indeg_rows}
|
||||
|
||||
# Best-effort enrichment from missing_precedents (exact normalized match).
|
||||
mp_rows = await conn.fetch(
|
||||
"SELECT id, regexp_replace(btrim(citation), '\\s+', ' ', 'g') AS num, status "
|
||||
"FROM missing_precedents"
|
||||
)
|
||||
mp = {r["num"]: (str(r["id"]), r["status"]) for r in mp_rows if r["num"]}
|
||||
|
||||
for num in sorted(nums):
|
||||
gid = f"gap:{num}"
|
||||
match = mp.get(num)
|
||||
nodes.append(
|
||||
GraphNode(
|
||||
id=gid,
|
||||
type="gap",
|
||||
label=num,
|
||||
size=indeg.get(num, 1),
|
||||
gap_status=(match[1] if match else None),
|
||||
missing_precedent_id=(match[0] if match else None),
|
||||
)
|
||||
)
|
||||
for r in edge_rows:
|
||||
edges.append(GraphEdge(source=f"cl:{r['s']}", target=f"gap:{r['num']}", type="cites"))
|
||||
return nodes, edges
|
||||
|
||||
|
||||
# ── Endpoints' core logic ────────────────────────────────────────────
|
||||
async def build_corpus_graph(
|
||||
pool: asyncpg.Pool,
|
||||
@@ -308,6 +381,10 @@ async def build_corpus_graph(
|
||||
nodes = [_precedent_node(r) for r in prec_rows]
|
||||
hub_nodes, edges = await _edges_and_hubs(conn, prec_rows, types)
|
||||
nodes.extend(hub_nodes)
|
||||
if "gap" in types:
|
||||
gap_nodes, gap_edges = await _gap_nodes_and_edges(conn, [r["id"] for r in prec_rows])
|
||||
nodes.extend(gap_nodes)
|
||||
edges.extend(gap_edges)
|
||||
|
||||
if metrics:
|
||||
_stamp_metrics(nodes, edges)
|
||||
@@ -441,6 +518,10 @@ async def build_node_neighborhood(
|
||||
nodes = [_precedent_node(r) for r in prec_rows]
|
||||
hub_nodes, edges = await _edges_and_hubs(conn, prec_rows, forced_types)
|
||||
nodes.extend(hub_nodes)
|
||||
if "gap" in forced_types:
|
||||
gap_nodes, gap_edges = await _gap_nodes_and_edges(conn, [r["id"] for r in prec_rows])
|
||||
nodes.extend(gap_nodes)
|
||||
edges.extend(gap_edges)
|
||||
|
||||
return CorpusGraph(
|
||||
nodes=nodes,
|
||||
|
||||
Reference in New Issue
Block a user