feat(graph): daily-digest (יומון) discovery layer (corpus graph PR E)
Chaim's idea: surface the downloaded "כל יום" digests in the graph. Each digest COVERS the ruling it analyses — a corpus precedent when we have it (16), or a synthesized gap node from its underlying_citation when we don't (269). So the digest layer doubles as a discovery signal: it makes visible that the daily feed overwhelmingly covers rulings NOT yet in the corpus. Backend (web/graph_api.py — read-only, G2): - "digest" added to VALID_NODE_TYPES (off by default). - _digest_nodes_and_edges(): dig:<id> nodes from completed digests, `covers` edge → cl:precedent (linked_case_law_id in view) or → gap:<underlying_citation> (synthesized, deduped against the gap layer — real in-degree wins). Carries concept_tag (label), headline_holding (note), underlying_court/date. - _add_digests() appends the layer with gap dedup. Wired into both build functions. GraphNode += note, digest_id. Gated via node_types (no app.py change). Validated: 16 covers→precedent, 269 covers→gap. Frontend: - graph.ts: GraphNodeType += "digest"; GraphEdgeType += "covers"; node fields. - graph-filter-panel: toggle "יומונים (כל יום)" (off by default). - graph-canvas: digest = teal node (r=4); `covers` edges teal. - graph-node-panel: digest branch — concept + holding + court/date + link to /digests. web-ui build + lint pass. Invariants: G2 (SELECT-only), UI2. api:types post-deploy. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
106
web/graph_api.py
106
web/graph_api.py
@@ -38,7 +38,7 @@ from pydantic import BaseModel
|
||||
from web import graph_metrics
|
||||
|
||||
# ── Node-type vocabulary ─────────────────────────────────────────────
|
||||
VALID_NODE_TYPES = {"precedent", "halacha", "topic", "practice_area", "gap"}
|
||||
VALID_NODE_TYPES = {"precedent", "halacha", "topic", "practice_area", "gap", "digest"}
|
||||
DEFAULT_NODE_TYPES = ("precedent", "topic", "practice_area")
|
||||
NODE_CAP_DEFAULT = 400
|
||||
NODE_CAP_MAX = 1500
|
||||
@@ -72,6 +72,9 @@ class GraphNode(BaseModel):
|
||||
# Gap nodes only — research-gap status from missing_precedents (best-effort).
|
||||
gap_status: str | None = None # open | uploaded | closed | irrelevant
|
||||
missing_precedent_id: str | None = None
|
||||
# Digest nodes only — the holding line from the daily יומון.
|
||||
note: str | None = None
|
||||
digest_id: str | None = None # for deep-link to /digests
|
||||
|
||||
|
||||
class GraphFacets(BaseModel):
|
||||
@@ -306,6 +309,97 @@ async def _gap_nodes_and_edges(
|
||||
return nodes, edges
|
||||
|
||||
|
||||
async def _digest_nodes_and_edges(
|
||||
conn: asyncpg.Connection,
|
||||
prec_ids: list,
|
||||
) -> tuple[list[GraphNode], list[GraphEdge], list[GraphNode]]:
|
||||
"""Daily-digest (יומון) discovery layer. Each digest ``covers`` the ruling
|
||||
it analyses: a corpus precedent (``linked_case_law_id``) when we have it, or
|
||||
a ``gap`` node synthesized from ``underlying_citation`` when we don't — so
|
||||
the digest doubles as a research signal ("the feed flagged this ruling").
|
||||
|
||||
Returns (digest_nodes, covers_edges, gap_target_nodes). The caller dedups
|
||||
gap nodes against the gap layer (real in-degree there wins over size=1)."""
|
||||
digest_nodes: list[GraphNode] = []
|
||||
edges: list[GraphEdge] = []
|
||||
gap_nodes: list[GraphNode] = []
|
||||
if not prec_ids:
|
||||
return digest_nodes, edges, gap_nodes
|
||||
prec_set = {str(x) for x in prec_ids}
|
||||
|
||||
rows = await conn.fetch(
|
||||
"""
|
||||
SELECT id, yomon_number, concept_tag, headline_holding,
|
||||
underlying_citation, underlying_court, underlying_date,
|
||||
digest_date, practice_area, linked_case_law_id,
|
||||
regexp_replace(btrim(underlying_citation), '\\s+', ' ', 'g') AS u_num
|
||||
FROM digests
|
||||
WHERE extraction_status = 'completed'
|
||||
AND (linked_case_law_id = ANY($1::uuid[])
|
||||
OR (linked_case_law_id IS NULL AND btrim(underlying_citation) <> ''))
|
||||
ORDER BY digest_date DESC NULLS LAST
|
||||
LIMIT 400
|
||||
""",
|
||||
prec_ids,
|
||||
)
|
||||
seen_gap: set[str] = set()
|
||||
for r in rows:
|
||||
did = f"dig:{r['id']}"
|
||||
linked = r["linked_case_law_id"]
|
||||
if linked is not None and str(linked) in prec_set:
|
||||
target = f"cl:{linked}"
|
||||
elif r["u_num"]:
|
||||
target = f"gap:{r['u_num']}"
|
||||
if r["u_num"] not in seen_gap:
|
||||
seen_gap.add(r["u_num"])
|
||||
gap_nodes.append(
|
||||
GraphNode(
|
||||
id=target,
|
||||
type="gap",
|
||||
label=(r["underlying_citation"] or "").strip() or r["u_num"],
|
||||
size=1,
|
||||
)
|
||||
)
|
||||
else:
|
||||
continue
|
||||
label = (r["concept_tag"] or "").strip() or (
|
||||
f"יומון {r['yomon_number']}" if r["yomon_number"] else "יומון"
|
||||
)
|
||||
d = r["underlying_date"] or r["digest_date"]
|
||||
digest_nodes.append(
|
||||
GraphNode(
|
||||
id=did,
|
||||
type="digest",
|
||||
label=label[:48],
|
||||
note=((r["headline_holding"] or "").strip()[:160] or None),
|
||||
court=(r["underlying_court"] or None),
|
||||
date=(d.isoformat() if d else None),
|
||||
practice_area=(r["practice_area"] or None),
|
||||
digest_id=str(r["id"]),
|
||||
)
|
||||
)
|
||||
edges.append(GraphEdge(source=did, target=target, type="covers"))
|
||||
return digest_nodes, edges, gap_nodes
|
||||
|
||||
|
||||
async def _add_digests(
|
||||
conn: asyncpg.Connection,
|
||||
prec_ids: list,
|
||||
nodes: list[GraphNode],
|
||||
edges: list[GraphEdge],
|
||||
) -> None:
|
||||
"""Append the digest layer in place, adding digest-target gap nodes only if
|
||||
they aren't already present (the gap layer's real in-degree wins)."""
|
||||
dig_nodes, dig_edges, gap_targets = await _digest_nodes_and_edges(conn, prec_ids)
|
||||
existing = {n.id for n in nodes}
|
||||
for g in gap_targets:
|
||||
if g.id not in existing:
|
||||
nodes.append(g)
|
||||
existing.add(g.id)
|
||||
nodes.extend(dig_nodes)
|
||||
edges.extend(dig_edges)
|
||||
|
||||
|
||||
# ── Endpoints' core logic ────────────────────────────────────────────
|
||||
async def build_corpus_graph(
|
||||
pool: asyncpg.Pool,
|
||||
@@ -379,12 +473,15 @@ async def build_corpus_graph(
|
||||
|
||||
total_available = int(prec_rows[0]["total_available"]) if prec_rows else 0
|
||||
nodes = [_precedent_node(r) for r in prec_rows]
|
||||
prec_id_list = [r["id"] for r in prec_rows]
|
||||
hub_nodes, edges = await _edges_and_hubs(conn, prec_rows, types)
|
||||
nodes.extend(hub_nodes)
|
||||
if "gap" in types:
|
||||
gap_nodes, gap_edges = await _gap_nodes_and_edges(conn, [r["id"] for r in prec_rows])
|
||||
gap_nodes, gap_edges = await _gap_nodes_and_edges(conn, prec_id_list)
|
||||
nodes.extend(gap_nodes)
|
||||
edges.extend(gap_edges)
|
||||
if "digest" in types:
|
||||
await _add_digests(conn, prec_id_list, nodes, edges)
|
||||
|
||||
if metrics:
|
||||
_stamp_metrics(nodes, edges)
|
||||
@@ -516,12 +613,15 @@ async def build_node_neighborhood(
|
||||
ids,
|
||||
)
|
||||
nodes = [_precedent_node(r) for r in prec_rows]
|
||||
prec_id_list = [r["id"] for r in prec_rows]
|
||||
hub_nodes, edges = await _edges_and_hubs(conn, prec_rows, forced_types)
|
||||
nodes.extend(hub_nodes)
|
||||
if "gap" in forced_types:
|
||||
gap_nodes, gap_edges = await _gap_nodes_and_edges(conn, [r["id"] for r in prec_rows])
|
||||
gap_nodes, gap_edges = await _gap_nodes_and_edges(conn, prec_id_list)
|
||||
nodes.extend(gap_nodes)
|
||||
edges.extend(gap_edges)
|
||||
if "digest" in forced_types:
|
||||
await _add_digests(conn, prec_id_list, nodes, edges)
|
||||
|
||||
return CorpusGraph(
|
||||
nodes=nodes,
|
||||
|
||||
Reference in New Issue
Block a user