feat(principles): canonical_statement synthesis service + throttled backfill (Phase E groundwork, #152)

Grounded (INV-AH) multi-instance synthesis with drift guard + chair gate
(pending_review, G10). Single path used by backfill, MCP tool, nightly drain.
HELD from production run pending the principles-redesign (rename+cull, #152).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-19 10:57:48 +00:00
parent db93735ed6
commit 338a8a947f
14 changed files with 1250 additions and 74 deletions

View File

@@ -6147,6 +6147,71 @@ async def update_canonical_statement(
return result.split()[-1] != "0"
async def fetch_canonical_synthesis_input(canonical_id: "UUID") -> "dict | None":
"""Fetch everything the canonical-synthesis pass needs for one principle (V41 Phase 4).
Unlike :func:`get_canonical_halacha` (UI-facing) this returns the canonical's own
``embedding`` (as a python list, for the drift guard) AND each instance's full text
fields (``rule_statement`` + ``supporting_quote`` + ``reasoning_summary``) — the
grounding evidence the LLM rewrites from (INV-AH). Returns None if not found.
"""
pool = await get_pool()
row = await pool.fetchrow(
"SELECT id::text, canonical_statement, rule_type, practice_areas, "
" subject_tags, review_status, instance_count, embedding "
"FROM canonical_halachot WHERE id=$1",
canonical_id,
)
if not row:
return None
instances = await pool.fetch(
"SELECT h.instance_type, h.treatment, h.rule_statement, "
" h.supporting_quote, h.reasoning_summary, "
" cl.case_number, cl.case_name "
"FROM halachot h JOIN case_law cl ON cl.id = h.case_law_id "
"WHERE h.canonical_id=$1 "
"ORDER BY (h.instance_type='original') DESC, cl.case_number",
canonical_id,
)
emb = row["embedding"]
out = dict(row)
out["embedding"] = list(emb) if emb is not None else None
out["instances"] = [dict(i) for i in instances]
return out
async def apply_canonical_synthesis(
canonical_id: "UUID",
canonical_statement: str,
embedding: "list[float] | None" = None,
review_status: str = "pending_review",
) -> bool:
"""Atomically commit a synthesis outcome for one canonical (V41 Phase 4).
Always advances ``review_status`` (default → ``pending_review`` for the chair
gate, G10/INV-LRN1) and writes ``canonical_statement``. ``embedding`` is updated
only when provided (None = leave as-is) so the keep-original path on a
drift-rejected/abstained synthesis doesn't need to re-embed. Returns True if the
row existed.
"""
pool = await get_pool()
if embedding is None:
result = await pool.execute(
"UPDATE canonical_halachot "
"SET canonical_statement=$2, review_status=$3, updated_at=now() "
"WHERE id=$1",
canonical_id, canonical_statement, review_status,
)
else:
result = await pool.execute(
"UPDATE canonical_halachot "
"SET canonical_statement=$2, embedding=$3, review_status=$4, updated_at=now() "
"WHERE id=$1",
canonical_id, canonical_statement, embedding, review_status,
)
return result.split()[-1] != "0"
async def list_canonical_instances(canonical_id: "UUID") -> list[dict]:
"""List all halachot (instances) sharing a canonical_id — used by the UI accordion."""
pool = await get_pool()