feat(halachot): canonical lookup-before-insert + MCP tools (Phase 3+4, V41)
All checks were successful
G12 Leak-Guard / leak-guard (pull_request) Successful in 4s
Lint — undefined names / undefined-names (pull_request) Successful in 10s

store_halachot_for_chunk: לפני כל INSERT — חיפוש cosine ב-canonical_halachot (≥0.85).
  עיקרון קיים → instance_type='citation' (אין canonical חדש).
  עיקרון חדש → instance_type='original' + יצירת canonical אוטומטית + עדכון instance_count.

config: HALACHA_CANONICAL_LOOKUP_ENABLED=true, HALACHA_CANONICAL_THRESHOLD=0.85.

db.list_halachot: פרמטר instance_type חדש לסינון.
db.list_canonical_halachot: שאילתת רשימה לפי practice_area/status.
db.update_canonical_statement: עדכון ניסוח קנוני ע"י היו"ר.

tools/precedent_library.py:
  halachot_pending: ברירת-מחדל instance_type='original' (תור ריאלי).
  halacha_review: פרמטר canonical_statement חדש (עריכת ניסוח העיקרון).
  canonical_halacha_list: כלי MCP חדש — רשימת עקרונות קנוניים.
  canonical_halacha_get: כלי MCP חדש — עיקרון + אינסטנסים.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-06-17 17:36:09 +00:00
parent aba87737e3
commit 7c39c685e5
4 changed files with 203 additions and 10 deletions

View File

@@ -5320,6 +5320,24 @@ async def store_halachot_for_chunk(
and halacha_quality.FLAG_NEAR_DUPLICATE not in flags):
flags.append(halacha_quality.FLAG_NEAR_DUPLICATE)
# 3) V41 lookup-before-insert: does this principle already have a canonical?
# If yes → 'citation' instance linked to the existing canonical.
# If no → 'original' instance; a new canonical is created after INSERT.
canonical_id = None
instance_type = "original"
if emb is not None and config.HALACHA_CANONICAL_LOOKUP_ENABLED:
canon_match = await conn.fetchrow(
"SELECT id, 1 - (embedding <=> $1) AS sim "
"FROM canonical_halachot "
"WHERE embedding IS NOT NULL "
"ORDER BY embedding <=> $1 LIMIT 1",
emb,
)
if (canon_match
and float(canon_match["sim"]) >= config.HALACHA_CANONICAL_THRESHOLD):
canonical_id = canon_match["id"]
instance_type = "citation"
confidence = float(h.get("confidence", 0.0))
auto_approve = confidence >= threshold and not flags
review_status = "approved" if auto_approve else "pending_review"
@@ -5334,18 +5352,50 @@ async def store_halachot_for_chunk(
reasoning_summary, supporting_quote, page_reference,
practice_areas, subject_tags, cites, confidence,
quote_verified, quality_flags, embedding, review_status,
reviewer, reviewed_at)
reviewer, reviewed_at, canonical_id, instance_type)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11,
$12, $13, $14, $15, $16, {reviewed_at_clause})""",
$12, $13, $14, $15, $16, {reviewed_at_clause},
$17, $18)""",
case_law_id, base + inserted, h["rule_statement"],
h.get("rule_type", "interpretive"), h.get("reasoning_summary", ""),
h["supporting_quote"], h.get("page_reference", ""),
h.get("practice_areas", []), h.get("subject_tags", []),
h.get("cites", []), confidence, h.get("quote_verified", False),
flags, h.get("embedding"), review_status, reviewer,
flags, emb, review_status, reviewer,
canonical_id, instance_type,
)
existing_quotes.add(norm_quote)
inserted += 1
# V41: maintain canonical_halachot after successful insert.
if config.HALACHA_CANONICAL_LOOKUP_ENABLED:
if instance_type == "original":
# New principle — create canonical and link back.
new_canon_id = await conn.fetchval(
"INSERT INTO canonical_halachot "
"(canonical_statement, rule_type, practice_areas, subject_tags, "
" embedding, first_established_in, review_status, instance_count) "
"VALUES ($1,$2,$3,$4,$5,$6,'pending_synthesis',1) RETURNING id",
h.get("rule_statement") or "",
h.get("rule_type", "interpretive"),
h.get("practice_areas") or [],
h.get("subject_tags") or [],
emb,
case_law_id,
)
await conn.execute(
"UPDATE halachot SET canonical_id=$1 "
"WHERE case_law_id=$2 AND halacha_index=$3",
new_canon_id, case_law_id, base + inserted - 1,
)
elif canonical_id is not None:
# Citation of existing canonical — bump its instance count.
await conn.execute(
"UPDATE canonical_halachot SET "
"instance_count = instance_count + 1, updated_at = now() "
"WHERE id = $1",
canonical_id,
)
await conn.execute(
"UPDATE precedent_chunks SET halacha_extracted_at = now() "
"WHERE id = $1", chunk_id,
@@ -5362,6 +5412,7 @@ async def list_halachot(
case_law_id: UUID | None = None,
review_status: str | None = None,
practice_area: str | None = None,
instance_type: str | None = None,
limit: int = 200,
offset: int = 0,
exclude_low_quality: bool = False,
@@ -5407,6 +5458,10 @@ async def list_halachot(
conditions.append(f"${idx} = ANY(h.practice_areas)")
params.append(practice_area)
idx += 1
if instance_type:
conditions.append(f"h.instance_type = ${idx}")
params.append(instance_type)
idx += 1
if exclude_low_quality:
# a clean item has an empty/NULL quality_flags array
conditions.append("COALESCE(array_length(h.quality_flags, 1), 0) = 0")
@@ -6034,6 +6089,51 @@ async def get_canonical_halacha(canonical_id: "UUID") -> "dict | None":
}
async def list_canonical_halachot(
practice_area: str | None = None,
review_status: str | None = None,
limit: int = 50,
offset: int = 0,
) -> list[dict]:
"""List canonical principles, optionally filtered by practice_area / review_status."""
pool = await get_pool()
conditions = ["1=1"]
params: list = []
idx = 1
if practice_area:
conditions.append(f"${ idx} = ANY(practice_areas)")
params.append(practice_area)
idx += 1
if review_status:
conditions.append(f"review_status = ${idx}")
params.append(review_status)
idx += 1
params += [limit, offset]
rows = await pool.fetch(
f"SELECT id::text, canonical_statement, rule_type, practice_areas, "
f" subject_tags, review_status, instance_count, created_at, updated_at "
f"FROM canonical_halachot "
f"WHERE {' AND '.join(conditions)} "
f"ORDER BY instance_count DESC, created_at DESC "
f"LIMIT ${idx} OFFSET ${idx + 1}",
*params,
)
return [dict(r) for r in rows]
async def update_canonical_statement(
canonical_id: "UUID", canonical_statement: str,
) -> bool:
"""Update the synthesized statement of a canonical principle. Returns True if found."""
pool = await get_pool()
result = await pool.execute(
"UPDATE canonical_halachot SET canonical_statement=$2, updated_at=now() "
"WHERE id=$1",
canonical_id, canonical_statement,
)
return result.split()[-1] != "0"
async def _annotate_equivalents(pool, out: list[dict]) -> None:
"""Attach an `equivalents` list to each row (#84.2) — parallel-authority links.