feat(graph): daily-digest (יומון) discovery layer (corpus graph PR E)

Chaim's idea: surface the downloaded "כל יום" digests in the graph. Each digest COVERS the ruling it analyses — a corpus precedent when we have it (16), or a synthesized gap node from its underlying_citation when we don't (269). So the digest layer doubles as a discovery signal: it makes visible that the daily feed overwhelmingly covers rulings NOT yet in the corpus. Backend (web/graph_api.py — read-only, G2): - "digest" added to VALID_NODE_TYPES (off by default). - _digest_nodes_and_edges(): dig:<id> nodes from completed digests, `covers` edge → cl:precedent (linked_case_law_id in view) or → gap:<underlying_citation> (synthesized, deduped against the gap layer — real in-degree wins). Carries concept_tag (label), headline_holding (note), underlying_court/date. - _add_digests() appends the layer with gap dedup. Wired into both build functions. GraphNode += note, digest_id. Gated via node_types (no app.py change). Validated: 16 covers→precedent, 269 covers→gap. Frontend: - graph.ts: GraphNodeType += "digest"; GraphEdgeType += "covers"; node fields. - graph-filter-panel: toggle "יומונים (כל יום)" (off by default). - graph-canvas: digest = teal node (r=4); `covers` edges teal. - graph-node-panel: digest branch — concept + holding + court/date + link to /digests. web-ui build + lint pass. Invariants: G2 (SELECT-only), UI2. api:types post-deploy. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-07 21:31:04 +00:00
parent 8dc0a268fb
commit fc5d69902f
6 changed files with 147 additions and 6 deletions
--- a/web/graph_api.py
+++ b/web/graph_api.py
@@ -38,7 +38,7 @@ from pydantic import BaseModel
 from web import graph_metrics

 # ── Node-type vocabulary ─────────────────────────────────────────────
-VALID_NODE_TYPES = {"precedent", "halacha", "topic", "practice_area", "gap"}
+VALID_NODE_TYPES = {"precedent", "halacha", "topic", "practice_area", "gap", "digest"}
 DEFAULT_NODE_TYPES = ("precedent", "topic", "practice_area")
 NODE_CAP_DEFAULT = 400
 NODE_CAP_MAX = 1500
@@ -72,6 +72,9 @@ class GraphNode(BaseModel):
    # Gap nodes only — research-gap status from missing_precedents (best-effort).
    gap_status: str | None = None  # open | uploaded | closed | irrelevant
    missing_precedent_id: str | None = None
+    # Digest nodes only — the holding line from the daily יומון.
+    note: str | None = None
+    digest_id: str | None = None  # for deep-link to /digests


 class GraphFacets(BaseModel):
@@ -306,6 +309,97 @@ async def _gap_nodes_and_edges(
    return nodes, edges


+async def _digest_nodes_and_edges(
+    conn: asyncpg.Connection,
+    prec_ids: list,
+) -> tuple[list[GraphNode], list[GraphEdge], list[GraphNode]]:
+    """Daily-digest (יומון) discovery layer. Each digest ``covers`` the ruling
+    it analyses: a corpus precedent (``linked_case_law_id``) when we have it, or
+    a ``gap`` node synthesized from ``underlying_citation`` when we don't — so
+    the digest doubles as a research signal ("the feed flagged this ruling").
+
+    Returns (digest_nodes, covers_edges, gap_target_nodes). The caller dedups
+    gap nodes against the gap layer (real in-degree there wins over size=1)."""
+    digest_nodes: list[GraphNode] = []
+    edges: list[GraphEdge] = []
+    gap_nodes: list[GraphNode] = []
+    if not prec_ids:
+        return digest_nodes, edges, gap_nodes
+    prec_set = {str(x) for x in prec_ids}
+
+    rows = await conn.fetch(
+        """
+        SELECT id, yomon_number, concept_tag, headline_holding,
+               underlying_citation, underlying_court, underlying_date,
+               digest_date, practice_area, linked_case_law_id,
+               regexp_replace(btrim(underlying_citation), '\\s+', ' ', 'g') AS u_num
+        FROM digests
+        WHERE extraction_status = 'completed'
+          AND (linked_case_law_id = ANY($1::uuid[])
+               OR (linked_case_law_id IS NULL AND btrim(underlying_citation) <> ''))
+        ORDER BY digest_date DESC NULLS LAST
+        LIMIT 400
+        """,
+        prec_ids,
+    )
+    seen_gap: set[str] = set()
+    for r in rows:
+        did = f"dig:{r['id']}"
+        linked = r["linked_case_law_id"]
+        if linked is not None and str(linked) in prec_set:
+            target = f"cl:{linked}"
+        elif r["u_num"]:
+            target = f"gap:{r['u_num']}"
+            if r["u_num"] not in seen_gap:
+                seen_gap.add(r["u_num"])
+                gap_nodes.append(
+                    GraphNode(
+                        id=target,
+                        type="gap",
+                        label=(r["underlying_citation"] or "").strip() or r["u_num"],
+                        size=1,
+                    )
+                )
+        else:
+            continue
+        label = (r["concept_tag"] or "").strip() or (
+            f"יומון {r['yomon_number']}" if r["yomon_number"] else "יומון"
+        )
+        d = r["underlying_date"] or r["digest_date"]
+        digest_nodes.append(
+            GraphNode(
+                id=did,
+                type="digest",
+                label=label[:48],
+                note=((r["headline_holding"] or "").strip()[:160] or None),
+                court=(r["underlying_court"] or None),
+                date=(d.isoformat() if d else None),
+                practice_area=(r["practice_area"] or None),
+                digest_id=str(r["id"]),
+            )
+        )
+        edges.append(GraphEdge(source=did, target=target, type="covers"))
+    return digest_nodes, edges, gap_nodes
+
+
+async def _add_digests(
+    conn: asyncpg.Connection,
+    prec_ids: list,
+    nodes: list[GraphNode],
+    edges: list[GraphEdge],
+) -> None:
+    """Append the digest layer in place, adding digest-target gap nodes only if
+    they aren't already present (the gap layer's real in-degree wins)."""
+    dig_nodes, dig_edges, gap_targets = await _digest_nodes_and_edges(conn, prec_ids)
+    existing = {n.id for n in nodes}
+    for g in gap_targets:
+        if g.id not in existing:
+            nodes.append(g)
+            existing.add(g.id)
+    nodes.extend(dig_nodes)
+    edges.extend(dig_edges)
+
+
 # ── Endpoints' core logic ────────────────────────────────────────────
 async def build_corpus_graph(
    pool: asyncpg.Pool,
@@ -379,12 +473,15 @@ async def build_corpus_graph(

        total_available = int(prec_rows[0]["total_available"]) if prec_rows else 0
        nodes = [_precedent_node(r) for r in prec_rows]
+        prec_id_list = [r["id"] for r in prec_rows]
        hub_nodes, edges = await _edges_and_hubs(conn, prec_rows, types)
        nodes.extend(hub_nodes)
        if "gap" in types:
-            gap_nodes, gap_edges = await _gap_nodes_and_edges(conn, [r["id"] for r in prec_rows])
+            gap_nodes, gap_edges = await _gap_nodes_and_edges(conn, prec_id_list)
            nodes.extend(gap_nodes)
            edges.extend(gap_edges)
+        if "digest" in types:
+            await _add_digests(conn, prec_id_list, nodes, edges)

    if metrics:
        _stamp_metrics(nodes, edges)
@@ -516,12 +613,15 @@ async def build_node_neighborhood(
            ids,
        )
        nodes = [_precedent_node(r) for r in prec_rows]
+        prec_id_list = [r["id"] for r in prec_rows]
        hub_nodes, edges = await _edges_and_hubs(conn, prec_rows, forced_types)
        nodes.extend(hub_nodes)
        if "gap" in forced_types:
-            gap_nodes, gap_edges = await _gap_nodes_and_edges(conn, [r["id"] for r in prec_rows])
+            gap_nodes, gap_edges = await _gap_nodes_and_edges(conn, prec_id_list)
            nodes.extend(gap_nodes)
            edges.extend(gap_edges)
+        if "digest" in forced_types:
+            await _add_digests(conn, prec_id_list, nodes, edges)

    return CorpusGraph(
        nodes=nodes,