feat(halachot): canonical lookup-before-insert + MCP tools (Phase 3+4, V41)
store_halachot_for_chunk: לפני כל INSERT — חיפוש cosine ב-canonical_halachot (≥0.85). עיקרון קיים → instance_type='citation' (אין canonical חדש). עיקרון חדש → instance_type='original' + יצירת canonical אוטומטית + עדכון instance_count. config: HALACHA_CANONICAL_LOOKUP_ENABLED=true, HALACHA_CANONICAL_THRESHOLD=0.85. db.list_halachot: פרמטר instance_type חדש לסינון. db.list_canonical_halachot: שאילתת רשימה לפי practice_area/status. db.update_canonical_statement: עדכון ניסוח קנוני ע"י היו"ר. tools/precedent_library.py: halachot_pending: ברירת-מחדל instance_type='original' (תור ריאלי). halacha_review: פרמטר canonical_statement חדש (עריכת ניסוח העיקרון). canonical_halacha_list: כלי MCP חדש — רשימת עקרונות קנוניים. canonical_halacha_get: כלי MCP חדש — עיקרון + אינסטנסים. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -5320,6 +5320,24 @@ async def store_halachot_for_chunk(
|
||||
and halacha_quality.FLAG_NEAR_DUPLICATE not in flags):
|
||||
flags.append(halacha_quality.FLAG_NEAR_DUPLICATE)
|
||||
|
||||
# 3) V41 lookup-before-insert: does this principle already have a canonical?
|
||||
# If yes → 'citation' instance linked to the existing canonical.
|
||||
# If no → 'original' instance; a new canonical is created after INSERT.
|
||||
canonical_id = None
|
||||
instance_type = "original"
|
||||
if emb is not None and config.HALACHA_CANONICAL_LOOKUP_ENABLED:
|
||||
canon_match = await conn.fetchrow(
|
||||
"SELECT id, 1 - (embedding <=> $1) AS sim "
|
||||
"FROM canonical_halachot "
|
||||
"WHERE embedding IS NOT NULL "
|
||||
"ORDER BY embedding <=> $1 LIMIT 1",
|
||||
emb,
|
||||
)
|
||||
if (canon_match
|
||||
and float(canon_match["sim"]) >= config.HALACHA_CANONICAL_THRESHOLD):
|
||||
canonical_id = canon_match["id"]
|
||||
instance_type = "citation"
|
||||
|
||||
confidence = float(h.get("confidence", 0.0))
|
||||
auto_approve = confidence >= threshold and not flags
|
||||
review_status = "approved" if auto_approve else "pending_review"
|
||||
@@ -5334,18 +5352,50 @@ async def store_halachot_for_chunk(
|
||||
reasoning_summary, supporting_quote, page_reference,
|
||||
practice_areas, subject_tags, cites, confidence,
|
||||
quote_verified, quality_flags, embedding, review_status,
|
||||
reviewer, reviewed_at)
|
||||
reviewer, reviewed_at, canonical_id, instance_type)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11,
|
||||
$12, $13, $14, $15, $16, {reviewed_at_clause})""",
|
||||
$12, $13, $14, $15, $16, {reviewed_at_clause},
|
||||
$17, $18)""",
|
||||
case_law_id, base + inserted, h["rule_statement"],
|
||||
h.get("rule_type", "interpretive"), h.get("reasoning_summary", ""),
|
||||
h["supporting_quote"], h.get("page_reference", ""),
|
||||
h.get("practice_areas", []), h.get("subject_tags", []),
|
||||
h.get("cites", []), confidence, h.get("quote_verified", False),
|
||||
flags, h.get("embedding"), review_status, reviewer,
|
||||
flags, emb, review_status, reviewer,
|
||||
canonical_id, instance_type,
|
||||
)
|
||||
existing_quotes.add(norm_quote)
|
||||
inserted += 1
|
||||
|
||||
# V41: maintain canonical_halachot after successful insert.
|
||||
if config.HALACHA_CANONICAL_LOOKUP_ENABLED:
|
||||
if instance_type == "original":
|
||||
# New principle — create canonical and link back.
|
||||
new_canon_id = await conn.fetchval(
|
||||
"INSERT INTO canonical_halachot "
|
||||
"(canonical_statement, rule_type, practice_areas, subject_tags, "
|
||||
" embedding, first_established_in, review_status, instance_count) "
|
||||
"VALUES ($1,$2,$3,$4,$5,$6,'pending_synthesis',1) RETURNING id",
|
||||
h.get("rule_statement") or "",
|
||||
h.get("rule_type", "interpretive"),
|
||||
h.get("practice_areas") or [],
|
||||
h.get("subject_tags") or [],
|
||||
emb,
|
||||
case_law_id,
|
||||
)
|
||||
await conn.execute(
|
||||
"UPDATE halachot SET canonical_id=$1 "
|
||||
"WHERE case_law_id=$2 AND halacha_index=$3",
|
||||
new_canon_id, case_law_id, base + inserted - 1,
|
||||
)
|
||||
elif canonical_id is not None:
|
||||
# Citation of existing canonical — bump its instance count.
|
||||
await conn.execute(
|
||||
"UPDATE canonical_halachot SET "
|
||||
"instance_count = instance_count + 1, updated_at = now() "
|
||||
"WHERE id = $1",
|
||||
canonical_id,
|
||||
)
|
||||
await conn.execute(
|
||||
"UPDATE precedent_chunks SET halacha_extracted_at = now() "
|
||||
"WHERE id = $1", chunk_id,
|
||||
@@ -5362,6 +5412,7 @@ async def list_halachot(
|
||||
case_law_id: UUID | None = None,
|
||||
review_status: str | None = None,
|
||||
practice_area: str | None = None,
|
||||
instance_type: str | None = None,
|
||||
limit: int = 200,
|
||||
offset: int = 0,
|
||||
exclude_low_quality: bool = False,
|
||||
@@ -5407,6 +5458,10 @@ async def list_halachot(
|
||||
conditions.append(f"${idx} = ANY(h.practice_areas)")
|
||||
params.append(practice_area)
|
||||
idx += 1
|
||||
if instance_type:
|
||||
conditions.append(f"h.instance_type = ${idx}")
|
||||
params.append(instance_type)
|
||||
idx += 1
|
||||
if exclude_low_quality:
|
||||
# a clean item has an empty/NULL quality_flags array
|
||||
conditions.append("COALESCE(array_length(h.quality_flags, 1), 0) = 0")
|
||||
@@ -6034,6 +6089,51 @@ async def get_canonical_halacha(canonical_id: "UUID") -> "dict | None":
|
||||
}
|
||||
|
||||
|
||||
async def list_canonical_halachot(
|
||||
practice_area: str | None = None,
|
||||
review_status: str | None = None,
|
||||
limit: int = 50,
|
||||
offset: int = 0,
|
||||
) -> list[dict]:
|
||||
"""List canonical principles, optionally filtered by practice_area / review_status."""
|
||||
pool = await get_pool()
|
||||
conditions = ["1=1"]
|
||||
params: list = []
|
||||
idx = 1
|
||||
if practice_area:
|
||||
conditions.append(f"${ idx} = ANY(practice_areas)")
|
||||
params.append(practice_area)
|
||||
idx += 1
|
||||
if review_status:
|
||||
conditions.append(f"review_status = ${idx}")
|
||||
params.append(review_status)
|
||||
idx += 1
|
||||
params += [limit, offset]
|
||||
rows = await pool.fetch(
|
||||
f"SELECT id::text, canonical_statement, rule_type, practice_areas, "
|
||||
f" subject_tags, review_status, instance_count, created_at, updated_at "
|
||||
f"FROM canonical_halachot "
|
||||
f"WHERE {' AND '.join(conditions)} "
|
||||
f"ORDER BY instance_count DESC, created_at DESC "
|
||||
f"LIMIT ${idx} OFFSET ${idx + 1}",
|
||||
*params,
|
||||
)
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
|
||||
async def update_canonical_statement(
|
||||
canonical_id: "UUID", canonical_statement: str,
|
||||
) -> bool:
|
||||
"""Update the synthesized statement of a canonical principle. Returns True if found."""
|
||||
pool = await get_pool()
|
||||
result = await pool.execute(
|
||||
"UPDATE canonical_halachot SET canonical_statement=$2, updated_at=now() "
|
||||
"WHERE id=$1",
|
||||
canonical_id, canonical_statement,
|
||||
)
|
||||
return result.split()[-1] != "0"
|
||||
|
||||
|
||||
async def _annotate_equivalents(pool, out: list[dict]) -> None:
|
||||
"""Attach an `equivalents` list to each row (#84.2) — parallel-authority links.
|
||||
|
||||
|
||||
Reference in New Issue
Block a user