feat(graph): metadata filters + facets (corpus graph PR A)

Adds legal-metadata filtering and the payload to color by it (foundation for the color-by selector in the analytics PR). Backend (web/graph_api.py, web/app.py) — read-only, G2: - GraphNode += court, date (ISO) — precedents carry them for filter/color-by. - build_corpus_graph += server-side WHERE filters (G5): court, precedent_level, chair, district, year_from, year_to (EXTRACT(YEAR FROM date)). Neighborhood query also selects court/date. - New GET /api/graph/facets (response_model GraphFacets, UI2) → distinct courts/levels/chairs/districts so the UI doesn't hardcode Hebrew strings. Frontend: - graph.ts: GraphNode += court/date; GraphFilters += the six params; buildParams; useGraphFacets() hook. - graph-filter-panel: an "advanced" Accordion with court/precedent_level/chair/ district Selects (from facets) + year-from/year-to Selects. - graph-view: new controls wired into filters; facets fetched and passed down. Verified read-only against the live DB (precedent_level=עליון&year_from=2015 filters correctly; facets populated: 36 courts / 3 levels / 19 chairs / 4 districts). web-ui build + lint pass. Invariants: G2 (SELECT-only via db.get_pool), G5 (filters server-side), UI2 (explicit response_models). api:types to be regenerated post-deploy. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-07 20:52:13 +00:00
parent bcd5fd5f8d
commit 8258f09228
5 changed files with 267 additions and 3 deletions
--- a/web/graph_api.py
+++ b/web/graph_api.py
@@ -60,9 +60,20 @@ class GraphNode(BaseModel):
    practice_area: str | None = None
    source_kind: str | None = None  # precedents only
    precedent_level: str | None = None  # precedents only
+    court: str | None = None  # precedents only — for color-by / filter
+    date: str | None = None  # precedents only — ISO date, for recency color/filter
    case_law_id: str | None = None  # canonical id for deep-link (precedents)


+class GraphFacets(BaseModel):
+    """Distinct filter values so the UI doesn't hardcode Hebrew enum strings."""
+
+    courts: list[str]
+    precedent_levels: list[str]
+    chairs: list[str]
+    districts: list[str]
+
+
 class GraphEdge(BaseModel):
    source: str
    target: str
@@ -110,6 +121,8 @@ def _precedent_node(row: asyncpg.Record) -> GraphNode:
        practice_area=(row["practice_area"] or None),
        source_kind=(row["source_kind"] or None),
        precedent_level=(row["precedent_level"] or None),
+        court=(row["court"] or None),
+        date=(row["date"].isoformat() if row["date"] else None),
        case_law_id=str(row["id"]),
    )

@@ -224,12 +237,19 @@ async def build_corpus_graph(
    min_citations: int = 0,
    limit: int = NODE_CAP_DEFAULT,
    q: str = "",
+    court: str = "",
+    precedent_level: str = "",
+    chair: str = "",
+    district: str = "",
+    year_from: int = 0,
+    year_to: int = 0,
 ) -> CorpusGraph:
    """Assemble the full corpus graph under the given filters.

    The most-cited precedents always survive the cap (``ORDER BY size DESC``),
    so clipping never hides the structurally important nodes. ``truncated`` +
-    ``total_available`` let the UI prompt the user to narrow filters.
+    ``total_available`` let the UI prompt the user to narrow filters. All
+    filters are applied server-side in the WHERE clause (G5).
    """
    types = normalize_node_types(node_types)
    cap = max(1, min(int(limit), NODE_CAP_MAX))
@@ -241,6 +261,7 @@ async def build_corpus_graph(
            + """
            SELECT c.id, c.case_number, c.case_name,
                   c.practice_area, c.source_kind, c.precedent_level,
+                   c.court, c.date,
                   COALESCE(p.n, 0) AS size,
                   COUNT(*) OVER () AS total_available
            FROM case_law c
@@ -250,6 +271,12 @@ async def build_corpus_graph(
              AND COALESCE(p.n, 0) >= $3
              AND ($4 = '' OR c.case_number ILIKE '%' || $4 || '%'
                          OR c.case_name ILIKE '%' || $4 || '%')
+              AND ($6 = '' OR c.court = $6)
+              AND ($7 = '' OR c.precedent_level = $7)
+              AND ($8 = '' OR c.chair_name = $8)
+              AND ($9 = '' OR c.district = $9)
+              AND ($10 = 0 OR (c.date IS NOT NULL AND EXTRACT(YEAR FROM c.date) >= $10))
+              AND ($11 = 0 OR (c.date IS NOT NULL AND EXTRACT(YEAR FROM c.date) <= $11))
            ORDER BY COALESCE(p.n, 0) DESC, c.case_number
            LIMIT $5
            """,
@@ -258,6 +285,12 @@ async def build_corpus_graph(
            min_cit,
            q.strip(),
            cap,
+            court,
+            precedent_level,
+            chair,
+            district,
+            max(0, int(year_from)),
+            max(0, int(year_to)),
        )

        total_available = int(prec_rows[0]["total_available"]) if prec_rows else 0
@@ -366,6 +399,7 @@ async def build_node_neighborhood(
            + """
            SELECT c.id, c.case_number, c.case_name,
                   c.practice_area, c.source_kind, c.precedent_level,
+                   c.court, c.date,
                   COALESCE(p.n, 0) AS size
            FROM case_law c
            LEFT JOIN prec_indeg p ON p.id = c.id
@@ -383,3 +417,33 @@ async def build_node_neighborhood(
        truncated=truncated,
        total_available=len(nodes),
    )
+
+
+async def build_facets(pool: asyncpg.Pool) -> GraphFacets:
+    """Distinct, non-empty filter values from ``case_law`` for the UI dropdowns.
+
+    Keeps the closed-vs-open-enum problem server-side so the frontend never
+    hardcodes Hebrew court / chair strings (a UI1 source-of-truth concern).
+    """
+    async with pool.acquire() as conn:
+        rows = await conn.fetch(
+            """
+            SELECT 'court' AS kind, court AS v FROM case_law WHERE court <> ''
+            UNION
+            SELECT 'level', precedent_level FROM case_law WHERE precedent_level <> ''
+            UNION
+            SELECT 'chair', chair_name FROM case_law WHERE chair_name <> ''
+            UNION
+            SELECT 'district', district FROM case_law WHERE district <> ''
+            ORDER BY 1, 2
+            """
+        )
+    buckets: dict[str, list[str]] = {"court": [], "level": [], "chair": [], "district": []}
+    for r in rows:
+        buckets[r["kind"]].append(r["v"])
+    return GraphFacets(
+        courts=buckets["court"],
+        precedent_levels=buckets["level"],
+        chairs=buckets["chair"],
+        districts=buckets["district"],
+    )