feat(principles): canonical_statement synthesis service + throttled backfill (Phase E groundwork, #152)
Grounded (INV-AH) multi-instance synthesis with drift guard + chair gate (pending_review, G10). Single path used by backfill, MCP tool, nightly drain. HELD from production run pending the principles-redesign (rename+cull, #152). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -6147,6 +6147,71 @@ async def update_canonical_statement(
|
||||
return result.split()[-1] != "0"
|
||||
|
||||
|
||||
async def fetch_canonical_synthesis_input(canonical_id: "UUID") -> "dict | None":
|
||||
"""Fetch everything the canonical-synthesis pass needs for one principle (V41 Phase 4).
|
||||
|
||||
Unlike :func:`get_canonical_halacha` (UI-facing) this returns the canonical's own
|
||||
``embedding`` (as a python list, for the drift guard) AND each instance's full text
|
||||
fields (``rule_statement`` + ``supporting_quote`` + ``reasoning_summary``) — the
|
||||
grounding evidence the LLM rewrites from (INV-AH). Returns None if not found.
|
||||
"""
|
||||
pool = await get_pool()
|
||||
row = await pool.fetchrow(
|
||||
"SELECT id::text, canonical_statement, rule_type, practice_areas, "
|
||||
" subject_tags, review_status, instance_count, embedding "
|
||||
"FROM canonical_halachot WHERE id=$1",
|
||||
canonical_id,
|
||||
)
|
||||
if not row:
|
||||
return None
|
||||
instances = await pool.fetch(
|
||||
"SELECT h.instance_type, h.treatment, h.rule_statement, "
|
||||
" h.supporting_quote, h.reasoning_summary, "
|
||||
" cl.case_number, cl.case_name "
|
||||
"FROM halachot h JOIN case_law cl ON cl.id = h.case_law_id "
|
||||
"WHERE h.canonical_id=$1 "
|
||||
"ORDER BY (h.instance_type='original') DESC, cl.case_number",
|
||||
canonical_id,
|
||||
)
|
||||
emb = row["embedding"]
|
||||
out = dict(row)
|
||||
out["embedding"] = list(emb) if emb is not None else None
|
||||
out["instances"] = [dict(i) for i in instances]
|
||||
return out
|
||||
|
||||
|
||||
async def apply_canonical_synthesis(
|
||||
canonical_id: "UUID",
|
||||
canonical_statement: str,
|
||||
embedding: "list[float] | None" = None,
|
||||
review_status: str = "pending_review",
|
||||
) -> bool:
|
||||
"""Atomically commit a synthesis outcome for one canonical (V41 Phase 4).
|
||||
|
||||
Always advances ``review_status`` (default → ``pending_review`` for the chair
|
||||
gate, G10/INV-LRN1) and writes ``canonical_statement``. ``embedding`` is updated
|
||||
only when provided (None = leave as-is) so the keep-original path on a
|
||||
drift-rejected/abstained synthesis doesn't need to re-embed. Returns True if the
|
||||
row existed.
|
||||
"""
|
||||
pool = await get_pool()
|
||||
if embedding is None:
|
||||
result = await pool.execute(
|
||||
"UPDATE canonical_halachot "
|
||||
"SET canonical_statement=$2, review_status=$3, updated_at=now() "
|
||||
"WHERE id=$1",
|
||||
canonical_id, canonical_statement, review_status,
|
||||
)
|
||||
else:
|
||||
result = await pool.execute(
|
||||
"UPDATE canonical_halachot "
|
||||
"SET canonical_statement=$2, embedding=$3, review_status=$4, updated_at=now() "
|
||||
"WHERE id=$1",
|
||||
canonical_id, canonical_statement, embedding, review_status,
|
||||
)
|
||||
return result.split()[-1] != "0"
|
||||
|
||||
|
||||
async def list_canonical_instances(canonical_id: "UUID") -> list[dict]:
|
||||
"""List all halachot (instances) sharing a canonical_id — used by the UI accordion."""
|
||||
pool = await get_pool()
|
||||
|
||||
Reference in New Issue
Block a user