feat(graph): research-gap (ghost) nodes (corpus graph PR C)
Turns the graph into a gap-finder: the 247 unresolved internal citations (a corpus precedent cites a ruling NOT in the corpus) collapse to 230 distinct "gap" nodes — each sized by how many corpus precedents cite it, i.e. the most-wanted missing precedent. Backend (web/graph_api.py — read-only, G2): - "gap" added to VALID_NODE_TYPES (NOT default → off unless requested). - New _gap_nodes_and_edges(): gap:<normalized citation> nodes from precedent_internal_citations WHERE cited_case_law_id IS NULL, sized by global in-degree; cites edges only from precedents present in the view (dangling-edge invariant holds). Best-effort enrichment from missing_precedents via exact normalized-citation match → gap_status + missing_precedent_id. Validated: 230 gaps, top ע"א 3213/97 (cited 5×), 230/230 matched to missing_precedents. - GraphNode += gap_status, missing_precedent_id. Metrics correctly exclude gap edges (target not a precedent). No app.py change (gated via node_types). Frontend: - graph.ts: GraphNodeType += "gap"; node fields. - graph-filter-panel: toggle "חוסרי מחקר (פסיקה חסרה)" (off by default). - graph-canvas: gaps render as faint hollow dashed circles, never recoloured by color-by; sized by citation count. - graph-node-panel: gap branch — "מצוטטת ע״י N פסיקות" + status badge + link to /missing-precedents. web-ui build + lint pass. Invariants: G2 (SELECT-only), UI2 (model grows on explicit Pydantic). api:types post-deploy. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -38,7 +38,7 @@ from pydantic import BaseModel
|
||||
from web import graph_metrics
|
||||
|
||||
# ── Node-type vocabulary ─────────────────────────────────────────────
|
||||
VALID_NODE_TYPES = {"precedent", "halacha", "topic", "practice_area"}
|
||||
VALID_NODE_TYPES = {"precedent", "halacha", "topic", "practice_area", "gap"}
|
||||
DEFAULT_NODE_TYPES = ("precedent", "topic", "practice_area")
|
||||
NODE_CAP_DEFAULT = 400
|
||||
NODE_CAP_MAX = 1500
|
||||
@@ -69,6 +69,9 @@ class GraphNode(BaseModel):
|
||||
pagerank: float | None = None # normalized 0–1 (global influence)
|
||||
betweenness: float | None = None # normalized 0–1 (bridge-ness)
|
||||
community: int | None = None # dense cluster id, 0 = largest
|
||||
# Gap nodes only — research-gap status from missing_precedents (best-effort).
|
||||
gap_status: str | None = None # open | uploaded | closed | irrelevant
|
||||
missing_precedent_id: str | None = None
|
||||
|
||||
|
||||
class GraphFacets(BaseModel):
|
||||
@@ -233,6 +236,76 @@ async def _edges_and_hubs(
|
||||
return hub_nodes, edges
|
||||
|
||||
|
||||
_NORM_NUM = "regexp_replace(btrim(cited_case_number), '\\s+', ' ', 'g')"
|
||||
|
||||
|
||||
async def _gap_nodes_and_edges(
|
||||
conn: asyncpg.Connection,
|
||||
prec_ids: list,
|
||||
) -> tuple[list[GraphNode], list[GraphEdge]]:
|
||||
"""Research-gap ("ghost") nodes: precedents that are CITED but not in the
|
||||
corpus (``precedent_internal_citations.cited_case_law_id IS NULL``).
|
||||
|
||||
One ``gap:<normalized citation>`` node per distinct cited number, sized by
|
||||
how many corpus precedents cite it (global — the "most-wanted missing
|
||||
precedent"). Edges only from citing precedents present in ``prec_ids`` so no
|
||||
edge dangles. Best-effort enriched with ``missing_precedents`` status via an
|
||||
exact normalized-citation match (an unmatched gap still renders)."""
|
||||
nodes: list[GraphNode] = []
|
||||
edges: list[GraphEdge] = []
|
||||
if not prec_ids:
|
||||
return nodes, edges
|
||||
|
||||
# Edges from the displayed precedents to the numbers they cite.
|
||||
edge_rows = await conn.fetch(
|
||||
f"""
|
||||
SELECT {_NORM_NUM} AS num, source_case_law_id AS s
|
||||
FROM precedent_internal_citations
|
||||
WHERE cited_case_law_id IS NULL AND btrim(cited_case_number) <> ''
|
||||
AND source_case_law_id = ANY($1::uuid[])
|
||||
""",
|
||||
prec_ids,
|
||||
)
|
||||
if not edge_rows:
|
||||
return nodes, edges
|
||||
nums = {r["num"] for r in edge_rows}
|
||||
|
||||
# Global in-degree per number (importance), independent of the cap.
|
||||
indeg_rows = await conn.fetch(
|
||||
f"""
|
||||
SELECT {_NORM_NUM} AS num, COUNT(*) AS n
|
||||
FROM precedent_internal_citations
|
||||
WHERE cited_case_law_id IS NULL AND btrim(cited_case_number) <> ''
|
||||
GROUP BY 1
|
||||
"""
|
||||
)
|
||||
indeg = {r["num"]: int(r["n"]) for r in indeg_rows}
|
||||
|
||||
# Best-effort enrichment from missing_precedents (exact normalized match).
|
||||
mp_rows = await conn.fetch(
|
||||
"SELECT id, regexp_replace(btrim(citation), '\\s+', ' ', 'g') AS num, status "
|
||||
"FROM missing_precedents"
|
||||
)
|
||||
mp = {r["num"]: (str(r["id"]), r["status"]) for r in mp_rows if r["num"]}
|
||||
|
||||
for num in sorted(nums):
|
||||
gid = f"gap:{num}"
|
||||
match = mp.get(num)
|
||||
nodes.append(
|
||||
GraphNode(
|
||||
id=gid,
|
||||
type="gap",
|
||||
label=num,
|
||||
size=indeg.get(num, 1),
|
||||
gap_status=(match[1] if match else None),
|
||||
missing_precedent_id=(match[0] if match else None),
|
||||
)
|
||||
)
|
||||
for r in edge_rows:
|
||||
edges.append(GraphEdge(source=f"cl:{r['s']}", target=f"gap:{r['num']}", type="cites"))
|
||||
return nodes, edges
|
||||
|
||||
|
||||
# ── Endpoints' core logic ────────────────────────────────────────────
|
||||
async def build_corpus_graph(
|
||||
pool: asyncpg.Pool,
|
||||
@@ -308,6 +381,10 @@ async def build_corpus_graph(
|
||||
nodes = [_precedent_node(r) for r in prec_rows]
|
||||
hub_nodes, edges = await _edges_and_hubs(conn, prec_rows, types)
|
||||
nodes.extend(hub_nodes)
|
||||
if "gap" in types:
|
||||
gap_nodes, gap_edges = await _gap_nodes_and_edges(conn, [r["id"] for r in prec_rows])
|
||||
nodes.extend(gap_nodes)
|
||||
edges.extend(gap_edges)
|
||||
|
||||
if metrics:
|
||||
_stamp_metrics(nodes, edges)
|
||||
@@ -441,6 +518,10 @@ async def build_node_neighborhood(
|
||||
nodes = [_precedent_node(r) for r in prec_rows]
|
||||
hub_nodes, edges = await _edges_and_hubs(conn, prec_rows, forced_types)
|
||||
nodes.extend(hub_nodes)
|
||||
if "gap" in forced_types:
|
||||
gap_nodes, gap_edges = await _gap_nodes_and_edges(conn, [r["id"] for r in prec_rows])
|
||||
nodes.extend(gap_nodes)
|
||||
edges.extend(gap_edges)
|
||||
|
||||
return CorpusGraph(
|
||||
nodes=nodes,
|
||||
|
||||
Reference in New Issue
Block a user