feat(corroboration): orchestrator + persistence over both citation graphs (X11)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-31 19:04:20 +00:00
parent 33f955e372
commit b57e590275
2 changed files with 68 additions and 0 deletions

View File

@@ -6,9 +6,11 @@ same architectural rule as the other extractors (local MCP only).
"""
from __future__ import annotations
import logging
from uuid import UUID
from legal_mcp import config
from legal_mcp.config import parse_llm_json
from legal_mcp.services import claude_session
from legal_mcp.services import db, embeddings
logger = logging.getLogger(__name__)
@@ -75,3 +77,28 @@ async def classify_treatment(cited_citation: str, context: str) -> str:
logger.warning("classify_treatment failed: %s", e)
return "mentioned"
return _coerce_treatment(result if isinstance(result, dict) else {})
async def build_for_precedent(case_law_id: str | UUID) -> dict:
"""For one cited precedent: classify+match+store each incoming citation. Idempotent."""
if isinstance(case_law_id, str):
case_law_id = UUID(case_law_id)
cits = await db.incoming_citations_for_precedent(case_law_id)
linked = 0
for c in cits:
ctx = (c.get("context") or "").strip()
if not ctx:
continue
vecs = await embeddings.embed_texts([ctx], input_type="query")
best = await db.nearest_halacha_for_vector(case_law_id, vecs[0])
halacha_id = accept_match(best)
if not halacha_id:
continue
treatment = await classify_treatment(c.get("citing_case_law_id") or c.get("citing_decision_id") or "", ctx)
await db.store_corroboration(
halacha_id, c["source_id"],
c.get("citing_case_law_id"), c.get("citing_decision_id"),
treatment, best[1], ctx,
)
linked += 1
return {"citations": len(cits), "linked": linked}

View File

@@ -3398,6 +3398,47 @@ async def nearest_halacha_for_vector(case_law_id: UUID, vec: list[float]) -> tup
return (row["id"], float(row["sim"])) if row else None
async def incoming_citations_for_precedent(case_law_id: UUID) -> list[dict]:
"""All incoming citations (both graphs) with their context + source id."""
pool = await get_pool()
rows = await pool.fetch(
"SELECT id::text AS source_id, source_case_law_id::text AS citing_case_law_id, "
" NULL::text AS citing_decision_id, match_context AS context "
"FROM precedent_internal_citations WHERE cited_case_law_id = $1 "
"UNION ALL "
"SELECT id::text, NULL, decision_id::text, context_text "
"FROM case_law_citations WHERE case_law_id = $1",
case_law_id,
)
return [dict(r) for r in rows]
async def store_corroboration(
halacha_id: str,
source_id: str,
citing_case_law_id,
citing_decision_id,
treatment: str,
score: float,
context: str,
) -> None:
from uuid import UUID as _UUID
pool = await get_pool()
# asyncpg requires UUID objects for uuid-typed columns; convert non-None strings.
h_id = _UUID(halacha_id) if isinstance(halacha_id, str) else halacha_id
s_id = _UUID(source_id) if isinstance(source_id, str) else source_id
cl_id = _UUID(citing_case_law_id) if (citing_case_law_id and isinstance(citing_case_law_id, str)) else citing_case_law_id
d_id = _UUID(citing_decision_id) if (citing_decision_id and isinstance(citing_decision_id, str)) else citing_decision_id
await pool.execute(
"INSERT INTO halacha_citation_corroboration "
"(halacha_id, citing_case_law_id, citing_decision_id, source_citation_id, treatment, match_score, match_context) "
"VALUES ($1,$2,$3,$4,$5,$6,$7) "
"ON CONFLICT (halacha_id, source_citation_id) DO UPDATE SET "
"treatment=EXCLUDED.treatment, match_score=EXCLUDED.match_score",
h_id, cl_id, d_id, s_id, treatment, score, context,
)
async def search_precedent_library_semantic(
query_embedding: list[float],
practice_area: str = "",