From b57e59027555c292545888cfa8205afa5e499879 Mon Sep 17 00:00:00 2001 From: Chaim Date: Sun, 31 May 2026 19:04:20 +0000 Subject: [PATCH] feat(corroboration): orchestrator + persistence over both citation graphs (X11) Co-Authored-By: Claude Sonnet 4.6 --- .../src/legal_mcp/services/corroboration.py | 27 ++++++++++++ mcp-server/src/legal_mcp/services/db.py | 41 +++++++++++++++++++ 2 files changed, 68 insertions(+) diff --git a/mcp-server/src/legal_mcp/services/corroboration.py b/mcp-server/src/legal_mcp/services/corroboration.py index 9cfec9d..e4788f9 100644 --- a/mcp-server/src/legal_mcp/services/corroboration.py +++ b/mcp-server/src/legal_mcp/services/corroboration.py @@ -6,9 +6,11 @@ same architectural rule as the other extractors (local MCP only). """ from __future__ import annotations import logging +from uuid import UUID from legal_mcp import config from legal_mcp.config import parse_llm_json from legal_mcp.services import claude_session +from legal_mcp.services import db, embeddings logger = logging.getLogger(__name__) @@ -75,3 +77,28 @@ async def classify_treatment(cited_citation: str, context: str) -> str: logger.warning("classify_treatment failed: %s", e) return "mentioned" return _coerce_treatment(result if isinstance(result, dict) else {}) + + +async def build_for_precedent(case_law_id: str | UUID) -> dict: + """For one cited precedent: classify+match+store each incoming citation. Idempotent.""" + if isinstance(case_law_id, str): + case_law_id = UUID(case_law_id) + cits = await db.incoming_citations_for_precedent(case_law_id) + linked = 0 + for c in cits: + ctx = (c.get("context") or "").strip() + if not ctx: + continue + vecs = await embeddings.embed_texts([ctx], input_type="query") + best = await db.nearest_halacha_for_vector(case_law_id, vecs[0]) + halacha_id = accept_match(best) + if not halacha_id: + continue + treatment = await classify_treatment(c.get("citing_case_law_id") or c.get("citing_decision_id") or "", ctx) + await db.store_corroboration( + halacha_id, c["source_id"], + c.get("citing_case_law_id"), c.get("citing_decision_id"), + treatment, best[1], ctx, + ) + linked += 1 + return {"citations": len(cits), "linked": linked} diff --git a/mcp-server/src/legal_mcp/services/db.py b/mcp-server/src/legal_mcp/services/db.py index 1c4150d..d2ee6df 100644 --- a/mcp-server/src/legal_mcp/services/db.py +++ b/mcp-server/src/legal_mcp/services/db.py @@ -3398,6 +3398,47 @@ async def nearest_halacha_for_vector(case_law_id: UUID, vec: list[float]) -> tup return (row["id"], float(row["sim"])) if row else None +async def incoming_citations_for_precedent(case_law_id: UUID) -> list[dict]: + """All incoming citations (both graphs) with their context + source id.""" + pool = await get_pool() + rows = await pool.fetch( + "SELECT id::text AS source_id, source_case_law_id::text AS citing_case_law_id, " + " NULL::text AS citing_decision_id, match_context AS context " + "FROM precedent_internal_citations WHERE cited_case_law_id = $1 " + "UNION ALL " + "SELECT id::text, NULL, decision_id::text, context_text " + "FROM case_law_citations WHERE case_law_id = $1", + case_law_id, + ) + return [dict(r) for r in rows] + + +async def store_corroboration( + halacha_id: str, + source_id: str, + citing_case_law_id, + citing_decision_id, + treatment: str, + score: float, + context: str, +) -> None: + from uuid import UUID as _UUID + pool = await get_pool() + # asyncpg requires UUID objects for uuid-typed columns; convert non-None strings. + h_id = _UUID(halacha_id) if isinstance(halacha_id, str) else halacha_id + s_id = _UUID(source_id) if isinstance(source_id, str) else source_id + cl_id = _UUID(citing_case_law_id) if (citing_case_law_id and isinstance(citing_case_law_id, str)) else citing_case_law_id + d_id = _UUID(citing_decision_id) if (citing_decision_id and isinstance(citing_decision_id, str)) else citing_decision_id + await pool.execute( + "INSERT INTO halacha_citation_corroboration " + "(halacha_id, citing_case_law_id, citing_decision_id, source_citation_id, treatment, match_score, match_context) " + "VALUES ($1,$2,$3,$4,$5,$6,$7) " + "ON CONFLICT (halacha_id, source_citation_id) DO UPDATE SET " + "treatment=EXCLUDED.treatment, match_score=EXCLUDED.match_score", + h_id, cl_id, d_id, s_id, treatment, score, context, + ) + + async def search_precedent_library_semantic( query_embedding: list[float], practice_area: str = "",