feat(graph): daily-digest (יומון) discovery layer (corpus graph PR E)

Chaim's idea: surface the downloaded "כל יום" digests in the graph. Each digest
COVERS the ruling it analyses — a corpus precedent when we have it (16), or a
synthesized gap node from its underlying_citation when we don't (269). So the
digest layer doubles as a discovery signal: it makes visible that the daily
feed overwhelmingly covers rulings NOT yet in the corpus.

Backend (web/graph_api.py — read-only, G2):
- "digest" added to VALID_NODE_TYPES (off by default).
- _digest_nodes_and_edges(): dig:<id> nodes from completed digests, `covers`
  edge → cl:precedent (linked_case_law_id in view) or → gap:<underlying_citation>
  (synthesized, deduped against the gap layer — real in-degree wins). Carries
  concept_tag (label), headline_holding (note), underlying_court/date.
- _add_digests() appends the layer with gap dedup. Wired into both build
  functions. GraphNode += note, digest_id. Gated via node_types (no app.py
  change). Validated: 16 covers→precedent, 269 covers→gap.

Frontend:
- graph.ts: GraphNodeType += "digest"; GraphEdgeType += "covers"; node fields.
- graph-filter-panel: toggle "יומונים (כל יום)" (off by default).
- graph-canvas: digest = teal node (r=4); `covers` edges teal.
- graph-node-panel: digest branch — concept + holding + court/date + link to
  /digests.

web-ui build + lint pass. Invariants: G2 (SELECT-only), UI2. api:types post-deploy.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-07 21:31:04 +00:00
parent 8dc0a268fb
commit fc5d69902f
6 changed files with 147 additions and 6 deletions

View File

@@ -38,7 +38,7 @@ from pydantic import BaseModel
from web import graph_metrics
# ── Node-type vocabulary ─────────────────────────────────────────────
VALID_NODE_TYPES = {"precedent", "halacha", "topic", "practice_area", "gap"}
VALID_NODE_TYPES = {"precedent", "halacha", "topic", "practice_area", "gap", "digest"}
DEFAULT_NODE_TYPES = ("precedent", "topic", "practice_area")
NODE_CAP_DEFAULT = 400
NODE_CAP_MAX = 1500
@@ -72,6 +72,9 @@ class GraphNode(BaseModel):
# Gap nodes only — research-gap status from missing_precedents (best-effort).
gap_status: str | None = None # open | uploaded | closed | irrelevant
missing_precedent_id: str | None = None
# Digest nodes only — the holding line from the daily יומון.
note: str | None = None
digest_id: str | None = None # for deep-link to /digests
class GraphFacets(BaseModel):
@@ -306,6 +309,97 @@ async def _gap_nodes_and_edges(
return nodes, edges
async def _digest_nodes_and_edges(
conn: asyncpg.Connection,
prec_ids: list,
) -> tuple[list[GraphNode], list[GraphEdge], list[GraphNode]]:
"""Daily-digest (יומון) discovery layer. Each digest ``covers`` the ruling
it analyses: a corpus precedent (``linked_case_law_id``) when we have it, or
a ``gap`` node synthesized from ``underlying_citation`` when we don't — so
the digest doubles as a research signal ("the feed flagged this ruling").
Returns (digest_nodes, covers_edges, gap_target_nodes). The caller dedups
gap nodes against the gap layer (real in-degree there wins over size=1)."""
digest_nodes: list[GraphNode] = []
edges: list[GraphEdge] = []
gap_nodes: list[GraphNode] = []
if not prec_ids:
return digest_nodes, edges, gap_nodes
prec_set = {str(x) for x in prec_ids}
rows = await conn.fetch(
"""
SELECT id, yomon_number, concept_tag, headline_holding,
underlying_citation, underlying_court, underlying_date,
digest_date, practice_area, linked_case_law_id,
regexp_replace(btrim(underlying_citation), '\\s+', ' ', 'g') AS u_num
FROM digests
WHERE extraction_status = 'completed'
AND (linked_case_law_id = ANY($1::uuid[])
OR (linked_case_law_id IS NULL AND btrim(underlying_citation) <> ''))
ORDER BY digest_date DESC NULLS LAST
LIMIT 400
""",
prec_ids,
)
seen_gap: set[str] = set()
for r in rows:
did = f"dig:{r['id']}"
linked = r["linked_case_law_id"]
if linked is not None and str(linked) in prec_set:
target = f"cl:{linked}"
elif r["u_num"]:
target = f"gap:{r['u_num']}"
if r["u_num"] not in seen_gap:
seen_gap.add(r["u_num"])
gap_nodes.append(
GraphNode(
id=target,
type="gap",
label=(r["underlying_citation"] or "").strip() or r["u_num"],
size=1,
)
)
else:
continue
label = (r["concept_tag"] or "").strip() or (
f"יומון {r['yomon_number']}" if r["yomon_number"] else "יומון"
)
d = r["underlying_date"] or r["digest_date"]
digest_nodes.append(
GraphNode(
id=did,
type="digest",
label=label[:48],
note=((r["headline_holding"] or "").strip()[:160] or None),
court=(r["underlying_court"] or None),
date=(d.isoformat() if d else None),
practice_area=(r["practice_area"] or None),
digest_id=str(r["id"]),
)
)
edges.append(GraphEdge(source=did, target=target, type="covers"))
return digest_nodes, edges, gap_nodes
async def _add_digests(
conn: asyncpg.Connection,
prec_ids: list,
nodes: list[GraphNode],
edges: list[GraphEdge],
) -> None:
"""Append the digest layer in place, adding digest-target gap nodes only if
they aren't already present (the gap layer's real in-degree wins)."""
dig_nodes, dig_edges, gap_targets = await _digest_nodes_and_edges(conn, prec_ids)
existing = {n.id for n in nodes}
for g in gap_targets:
if g.id not in existing:
nodes.append(g)
existing.add(g.id)
nodes.extend(dig_nodes)
edges.extend(dig_edges)
# ── Endpoints' core logic ────────────────────────────────────────────
async def build_corpus_graph(
pool: asyncpg.Pool,
@@ -379,12 +473,15 @@ async def build_corpus_graph(
total_available = int(prec_rows[0]["total_available"]) if prec_rows else 0
nodes = [_precedent_node(r) for r in prec_rows]
prec_id_list = [r["id"] for r in prec_rows]
hub_nodes, edges = await _edges_and_hubs(conn, prec_rows, types)
nodes.extend(hub_nodes)
if "gap" in types:
gap_nodes, gap_edges = await _gap_nodes_and_edges(conn, [r["id"] for r in prec_rows])
gap_nodes, gap_edges = await _gap_nodes_and_edges(conn, prec_id_list)
nodes.extend(gap_nodes)
edges.extend(gap_edges)
if "digest" in types:
await _add_digests(conn, prec_id_list, nodes, edges)
if metrics:
_stamp_metrics(nodes, edges)
@@ -516,12 +613,15 @@ async def build_node_neighborhood(
ids,
)
nodes = [_precedent_node(r) for r in prec_rows]
prec_id_list = [r["id"] for r in prec_rows]
hub_nodes, edges = await _edges_and_hubs(conn, prec_rows, forced_types)
nodes.extend(hub_nodes)
if "gap" in forced_types:
gap_nodes, gap_edges = await _gap_nodes_and_edges(conn, [r["id"] for r in prec_rows])
gap_nodes, gap_edges = await _gap_nodes_and_edges(conn, prec_id_list)
nodes.extend(gap_nodes)
edges.extend(gap_edges)
if "digest" in forced_types:
await _add_digests(conn, prec_id_list, nodes, edges)
return CorpusGraph(
nodes=nodes,