feat(halachot): canonical lookup-before-insert + MCP tools (Phase 3+4) #299
@@ -205,6 +205,11 @@ HALACHA_CONSOLIDATE_ENABLED = os.environ.get("HALACHA_CONSOLIDATE_ENABLED", "tru
|
|||||||
HALACHA_CONSOLIDATE_MODEL = os.environ.get("HALACHA_CONSOLIDATE_MODEL", HALACHA_EXTRACT_MODEL)
|
HALACHA_CONSOLIDATE_MODEL = os.environ.get("HALACHA_CONSOLIDATE_MODEL", HALACHA_EXTRACT_MODEL)
|
||||||
HALACHA_CONSOLIDATE_EFFORT = os.environ.get("HALACHA_CONSOLIDATE_EFFORT", "high")
|
HALACHA_CONSOLIDATE_EFFORT = os.environ.get("HALACHA_CONSOLIDATE_EFFORT", "high")
|
||||||
|
|
||||||
|
# V41 canonical lookup-before-insert: cosine gate for reusing an existing canonical
|
||||||
|
# instead of creating a new one. 0.85 is tuned to the embedding space (1024-dim voyage).
|
||||||
|
HALACHA_CANONICAL_LOOKUP_ENABLED = os.environ.get("HALACHA_CANONICAL_LOOKUP_ENABLED", "true").lower() == "true"
|
||||||
|
HALACHA_CANONICAL_THRESHOLD = float(os.environ.get("HALACHA_CANONICAL_THRESHOLD", "0.85"))
|
||||||
|
|
||||||
# Google Cloud Vision (OCR for scanned PDFs)
|
# Google Cloud Vision (OCR for scanned PDFs)
|
||||||
GOOGLE_CLOUD_VISION_API_KEY = os.environ.get("GOOGLE_CLOUD_VISION_API_KEY", "")
|
GOOGLE_CLOUD_VISION_API_KEY = os.environ.get("GOOGLE_CLOUD_VISION_API_KEY", "")
|
||||||
|
|
||||||
|
|||||||
@@ -427,18 +427,42 @@ async def halacha_review(
|
|||||||
reasoning_summary: str = "",
|
reasoning_summary: str = "",
|
||||||
subject_tags: list[str] | None = None,
|
subject_tags: list[str] | None = None,
|
||||||
practice_areas: list[str] | None = None,
|
practice_areas: list[str] | None = None,
|
||||||
|
canonical_statement: str = "",
|
||||||
) -> str:
|
) -> str:
|
||||||
"""אישור / דחייה / עריכה של הלכה שחולצה אוטומטית. status: pending_review / approved / rejected / published."""
|
"""אישור / דחייה / עריכה של הלכה שחולצה אוטומטית. status: pending_review / approved / rejected / published.
|
||||||
|
canonical_statement: עריכת ניסוח העיקרון הקנוני הרחב (V41)."""
|
||||||
return await plib.halacha_review(
|
return await plib.halacha_review(
|
||||||
halacha_id, status, reviewer, rule_statement, reasoning_summary,
|
halacha_id, status, reviewer, rule_statement, reasoning_summary,
|
||||||
subject_tags, practice_areas,
|
subject_tags, practice_areas, canonical_statement,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@mcp.tool()
|
@mcp.tool()
|
||||||
async def halachot_pending(limit: int = 100) -> str:
|
async def halachot_pending(
|
||||||
"""תור ההלכות הממתינות לאישור."""
|
limit: int = 100,
|
||||||
return await plib.halachot_pending(_clamp_limit(limit))
|
include_low_quality: bool = False,
|
||||||
|
instance_type: str = "original",
|
||||||
|
) -> str:
|
||||||
|
"""תור ההלכות הממתינות לאישור. V41: ברירת-מחדל instance_type='original' (עקרונות חדשים בלבד, לא ציטוטים)."""
|
||||||
|
return await plib.halachot_pending(_clamp_limit(limit), include_low_quality, instance_type)
|
||||||
|
|
||||||
|
|
||||||
|
@mcp.tool()
|
||||||
|
async def canonical_halacha_list(
|
||||||
|
practice_area: str = "",
|
||||||
|
review_status: str = "",
|
||||||
|
limit: int = 50,
|
||||||
|
offset: int = 0,
|
||||||
|
) -> str:
|
||||||
|
"""רשימת עקרונות קנוניים (canonical_halachot). V41.
|
||||||
|
practice_area: סינון תחום עיסוק. review_status: pending_synthesis/pending_review/approved/published."""
|
||||||
|
return await plib.canonical_halacha_list(practice_area, review_status, limit, offset)
|
||||||
|
|
||||||
|
|
||||||
|
@mcp.tool()
|
||||||
|
async def canonical_halacha_get(canonical_id: str) -> str:
|
||||||
|
"""שלוף עיקרון קנוני + כל האינסטנסים שלו לפי פסיקה. V41."""
|
||||||
|
return await plib.canonical_halacha_get(canonical_id)
|
||||||
|
|
||||||
|
|
||||||
# Documents
|
# Documents
|
||||||
|
|||||||
@@ -5320,6 +5320,24 @@ async def store_halachot_for_chunk(
|
|||||||
and halacha_quality.FLAG_NEAR_DUPLICATE not in flags):
|
and halacha_quality.FLAG_NEAR_DUPLICATE not in flags):
|
||||||
flags.append(halacha_quality.FLAG_NEAR_DUPLICATE)
|
flags.append(halacha_quality.FLAG_NEAR_DUPLICATE)
|
||||||
|
|
||||||
|
# 3) V41 lookup-before-insert: does this principle already have a canonical?
|
||||||
|
# If yes → 'citation' instance linked to the existing canonical.
|
||||||
|
# If no → 'original' instance; a new canonical is created after INSERT.
|
||||||
|
canonical_id = None
|
||||||
|
instance_type = "original"
|
||||||
|
if emb is not None and config.HALACHA_CANONICAL_LOOKUP_ENABLED:
|
||||||
|
canon_match = await conn.fetchrow(
|
||||||
|
"SELECT id, 1 - (embedding <=> $1) AS sim "
|
||||||
|
"FROM canonical_halachot "
|
||||||
|
"WHERE embedding IS NOT NULL "
|
||||||
|
"ORDER BY embedding <=> $1 LIMIT 1",
|
||||||
|
emb,
|
||||||
|
)
|
||||||
|
if (canon_match
|
||||||
|
and float(canon_match["sim"]) >= config.HALACHA_CANONICAL_THRESHOLD):
|
||||||
|
canonical_id = canon_match["id"]
|
||||||
|
instance_type = "citation"
|
||||||
|
|
||||||
confidence = float(h.get("confidence", 0.0))
|
confidence = float(h.get("confidence", 0.0))
|
||||||
auto_approve = confidence >= threshold and not flags
|
auto_approve = confidence >= threshold and not flags
|
||||||
review_status = "approved" if auto_approve else "pending_review"
|
review_status = "approved" if auto_approve else "pending_review"
|
||||||
@@ -5334,18 +5352,50 @@ async def store_halachot_for_chunk(
|
|||||||
reasoning_summary, supporting_quote, page_reference,
|
reasoning_summary, supporting_quote, page_reference,
|
||||||
practice_areas, subject_tags, cites, confidence,
|
practice_areas, subject_tags, cites, confidence,
|
||||||
quote_verified, quality_flags, embedding, review_status,
|
quote_verified, quality_flags, embedding, review_status,
|
||||||
reviewer, reviewed_at)
|
reviewer, reviewed_at, canonical_id, instance_type)
|
||||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11,
|
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11,
|
||||||
$12, $13, $14, $15, $16, {reviewed_at_clause})""",
|
$12, $13, $14, $15, $16, {reviewed_at_clause},
|
||||||
|
$17, $18)""",
|
||||||
case_law_id, base + inserted, h["rule_statement"],
|
case_law_id, base + inserted, h["rule_statement"],
|
||||||
h.get("rule_type", "interpretive"), h.get("reasoning_summary", ""),
|
h.get("rule_type", "interpretive"), h.get("reasoning_summary", ""),
|
||||||
h["supporting_quote"], h.get("page_reference", ""),
|
h["supporting_quote"], h.get("page_reference", ""),
|
||||||
h.get("practice_areas", []), h.get("subject_tags", []),
|
h.get("practice_areas", []), h.get("subject_tags", []),
|
||||||
h.get("cites", []), confidence, h.get("quote_verified", False),
|
h.get("cites", []), confidence, h.get("quote_verified", False),
|
||||||
flags, h.get("embedding"), review_status, reviewer,
|
flags, emb, review_status, reviewer,
|
||||||
|
canonical_id, instance_type,
|
||||||
)
|
)
|
||||||
existing_quotes.add(norm_quote)
|
existing_quotes.add(norm_quote)
|
||||||
inserted += 1
|
inserted += 1
|
||||||
|
|
||||||
|
# V41: maintain canonical_halachot after successful insert.
|
||||||
|
if config.HALACHA_CANONICAL_LOOKUP_ENABLED:
|
||||||
|
if instance_type == "original":
|
||||||
|
# New principle — create canonical and link back.
|
||||||
|
new_canon_id = await conn.fetchval(
|
||||||
|
"INSERT INTO canonical_halachot "
|
||||||
|
"(canonical_statement, rule_type, practice_areas, subject_tags, "
|
||||||
|
" embedding, first_established_in, review_status, instance_count) "
|
||||||
|
"VALUES ($1,$2,$3,$4,$5,$6,'pending_synthesis',1) RETURNING id",
|
||||||
|
h.get("rule_statement") or "",
|
||||||
|
h.get("rule_type", "interpretive"),
|
||||||
|
h.get("practice_areas") or [],
|
||||||
|
h.get("subject_tags") or [],
|
||||||
|
emb,
|
||||||
|
case_law_id,
|
||||||
|
)
|
||||||
|
await conn.execute(
|
||||||
|
"UPDATE halachot SET canonical_id=$1 "
|
||||||
|
"WHERE case_law_id=$2 AND halacha_index=$3",
|
||||||
|
new_canon_id, case_law_id, base + inserted - 1,
|
||||||
|
)
|
||||||
|
elif canonical_id is not None:
|
||||||
|
# Citation of existing canonical — bump its instance count.
|
||||||
|
await conn.execute(
|
||||||
|
"UPDATE canonical_halachot SET "
|
||||||
|
"instance_count = instance_count + 1, updated_at = now() "
|
||||||
|
"WHERE id = $1",
|
||||||
|
canonical_id,
|
||||||
|
)
|
||||||
await conn.execute(
|
await conn.execute(
|
||||||
"UPDATE precedent_chunks SET halacha_extracted_at = now() "
|
"UPDATE precedent_chunks SET halacha_extracted_at = now() "
|
||||||
"WHERE id = $1", chunk_id,
|
"WHERE id = $1", chunk_id,
|
||||||
@@ -5362,6 +5412,7 @@ async def list_halachot(
|
|||||||
case_law_id: UUID | None = None,
|
case_law_id: UUID | None = None,
|
||||||
review_status: str | None = None,
|
review_status: str | None = None,
|
||||||
practice_area: str | None = None,
|
practice_area: str | None = None,
|
||||||
|
instance_type: str | None = None,
|
||||||
limit: int = 200,
|
limit: int = 200,
|
||||||
offset: int = 0,
|
offset: int = 0,
|
||||||
exclude_low_quality: bool = False,
|
exclude_low_quality: bool = False,
|
||||||
@@ -5407,6 +5458,10 @@ async def list_halachot(
|
|||||||
conditions.append(f"${idx} = ANY(h.practice_areas)")
|
conditions.append(f"${idx} = ANY(h.practice_areas)")
|
||||||
params.append(practice_area)
|
params.append(practice_area)
|
||||||
idx += 1
|
idx += 1
|
||||||
|
if instance_type:
|
||||||
|
conditions.append(f"h.instance_type = ${idx}")
|
||||||
|
params.append(instance_type)
|
||||||
|
idx += 1
|
||||||
if exclude_low_quality:
|
if exclude_low_quality:
|
||||||
# a clean item has an empty/NULL quality_flags array
|
# a clean item has an empty/NULL quality_flags array
|
||||||
conditions.append("COALESCE(array_length(h.quality_flags, 1), 0) = 0")
|
conditions.append("COALESCE(array_length(h.quality_flags, 1), 0) = 0")
|
||||||
@@ -6034,6 +6089,51 @@ async def get_canonical_halacha(canonical_id: "UUID") -> "dict | None":
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def list_canonical_halachot(
|
||||||
|
practice_area: str | None = None,
|
||||||
|
review_status: str | None = None,
|
||||||
|
limit: int = 50,
|
||||||
|
offset: int = 0,
|
||||||
|
) -> list[dict]:
|
||||||
|
"""List canonical principles, optionally filtered by practice_area / review_status."""
|
||||||
|
pool = await get_pool()
|
||||||
|
conditions = ["1=1"]
|
||||||
|
params: list = []
|
||||||
|
idx = 1
|
||||||
|
if practice_area:
|
||||||
|
conditions.append(f"${ idx} = ANY(practice_areas)")
|
||||||
|
params.append(practice_area)
|
||||||
|
idx += 1
|
||||||
|
if review_status:
|
||||||
|
conditions.append(f"review_status = ${idx}")
|
||||||
|
params.append(review_status)
|
||||||
|
idx += 1
|
||||||
|
params += [limit, offset]
|
||||||
|
rows = await pool.fetch(
|
||||||
|
f"SELECT id::text, canonical_statement, rule_type, practice_areas, "
|
||||||
|
f" subject_tags, review_status, instance_count, created_at, updated_at "
|
||||||
|
f"FROM canonical_halachot "
|
||||||
|
f"WHERE {' AND '.join(conditions)} "
|
||||||
|
f"ORDER BY instance_count DESC, created_at DESC "
|
||||||
|
f"LIMIT ${idx} OFFSET ${idx + 1}",
|
||||||
|
*params,
|
||||||
|
)
|
||||||
|
return [dict(r) for r in rows]
|
||||||
|
|
||||||
|
|
||||||
|
async def update_canonical_statement(
|
||||||
|
canonical_id: "UUID", canonical_statement: str,
|
||||||
|
) -> bool:
|
||||||
|
"""Update the synthesized statement of a canonical principle. Returns True if found."""
|
||||||
|
pool = await get_pool()
|
||||||
|
result = await pool.execute(
|
||||||
|
"UPDATE canonical_halachot SET canonical_statement=$2, updated_at=now() "
|
||||||
|
"WHERE id=$1",
|
||||||
|
canonical_id, canonical_statement,
|
||||||
|
)
|
||||||
|
return result.split()[-1] != "0"
|
||||||
|
|
||||||
|
|
||||||
async def _annotate_equivalents(pool, out: list[dict]) -> None:
|
async def _annotate_equivalents(pool, out: list[dict]) -> None:
|
||||||
"""Attach an `equivalents` list to each row (#84.2) — parallel-authority links.
|
"""Attach an `equivalents` list to each row (#84.2) — parallel-authority links.
|
||||||
|
|
||||||
|
|||||||
@@ -320,6 +320,7 @@ async def halacha_review(
|
|||||||
reasoning_summary: str = "",
|
reasoning_summary: str = "",
|
||||||
subject_tags: list[str] | None = None,
|
subject_tags: list[str] | None = None,
|
||||||
practice_areas: list[str] | None = None,
|
practice_areas: list[str] | None = None,
|
||||||
|
canonical_statement: str = "",
|
||||||
) -> str:
|
) -> str:
|
||||||
"""אישור / דחייה / עריכה של הלכה שחולצה אוטומטית.
|
"""אישור / דחייה / עריכה של הלכה שחולצה אוטומטית.
|
||||||
|
|
||||||
@@ -331,6 +332,7 @@ async def halacha_review(
|
|||||||
reasoning_summary: עריכת תמצית ההיגיון (ריק = ללא שינוי).
|
reasoning_summary: עריכת תמצית ההיגיון (ריק = ללא שינוי).
|
||||||
subject_tags: עריכת תגיות (None = ללא שינוי).
|
subject_tags: עריכת תגיות (None = ללא שינוי).
|
||||||
practice_areas: עריכת תחומים (None = ללא שינוי).
|
practice_areas: עריכת תחומים (None = ללא שינוי).
|
||||||
|
canonical_statement: עריכת הניסוח הקנוני הרחב של העיקרון (ריק = ללא שינוי).
|
||||||
"""
|
"""
|
||||||
if status not in {"pending_review", "approved", "rejected", "published"}:
|
if status not in {"pending_review", "approved", "rejected", "published"}:
|
||||||
return _err(
|
return _err(
|
||||||
@@ -353,25 +355,87 @@ async def halacha_review(
|
|||||||
)
|
)
|
||||||
if row is None:
|
if row is None:
|
||||||
return _err("הלכה לא נמצאה")
|
return _err("הלכה לא נמצאה")
|
||||||
|
|
||||||
|
# V41: propagate canonical_statement edit to the canonical principle.
|
||||||
|
if canonical_statement and row.get("canonical_id"):
|
||||||
|
try:
|
||||||
|
await db.update_canonical_statement(
|
||||||
|
UUID(str(row["canonical_id"])), canonical_statement,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
import logging
|
||||||
|
logging.getLogger(__name__).warning(
|
||||||
|
"halacha_review: failed to update canonical_statement: %s", e,
|
||||||
|
)
|
||||||
|
|
||||||
return _ok(row)
|
return _ok(row)
|
||||||
|
|
||||||
|
|
||||||
async def halachot_pending(limit: int = 100, include_low_quality: bool = False) -> str:
|
async def halachot_pending(
|
||||||
|
limit: int = 100,
|
||||||
|
include_low_quality: bool = False,
|
||||||
|
instance_type: str = "original",
|
||||||
|
) -> str:
|
||||||
"""תור ההלכות הממתינות לאישור (review_status='pending_review').
|
"""תור ההלכות הממתינות לאישור (review_status='pending_review').
|
||||||
|
|
||||||
כברירת-מחדל (#84.1, #84.3) התור **מסונן** — הלכות עם דגל-איכות כלשהו
|
כברירת-מחדל (#84.1, #84.3, V41) התור **מסונן** — הלכות עם דגל-איכות כלשהו
|
||||||
(application / ציטוט-לא-מאומת / קטוע / obiter / restatement דק / לא-נתמך /
|
(application / ציטוט-לא-מאומת / קטוע / obiter / restatement דק / לא-נתמך /
|
||||||
near-duplicate) מוסתרות (הן שייכות ל'דורש תיקון-חילוץ', לא לתור-האישור),
|
near-duplicate) מוסתרות (הן שייכות ל'דורש תיקון-חילוץ', לא לתור-האישור),
|
||||||
ו**ממוין לפי עדיפות** (טופלו-לרעה תחילה, אז הכי לא-ודאיים, אז הישנים).
|
ו**ממוין לפי עדיפות** (טופלו-לרעה תחילה, אז הכי לא-ודאיים, אז הישנים).
|
||||||
|
|
||||||
|
V41: כברירת-מחדל מציג רק instance_type='original' (עקרונות חדשים, לא ציטוטים).
|
||||||
|
העברת instance_type='' מציגה הכל (כולל ציטוטים).
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
limit: מספר מקסימלי.
|
limit: מספר מקסימלי.
|
||||||
include_low_quality: True כדי לחשוף גם פריטים מסומני-איכות (בקט 'דורש תיקון').
|
include_low_quality: True כדי לחשוף גם פריטים מסומני-איכות (בקט 'דורש תיקון').
|
||||||
|
instance_type: 'original' (ברירת מחדל) / 'citation' / 'application' / '' (הכל).
|
||||||
"""
|
"""
|
||||||
rows = await db.list_halachot(
|
rows = await db.list_halachot(
|
||||||
review_status="pending_review",
|
review_status="pending_review",
|
||||||
|
instance_type=instance_type or None,
|
||||||
limit=limit,
|
limit=limit,
|
||||||
exclude_low_quality=not include_low_quality,
|
exclude_low_quality=not include_low_quality,
|
||||||
order_by_priority=True,
|
order_by_priority=True,
|
||||||
)
|
)
|
||||||
return _ok(rows)
|
return _ok(rows)
|
||||||
|
|
||||||
|
|
||||||
|
async def canonical_halacha_list(
|
||||||
|
practice_area: str = "",
|
||||||
|
review_status: str = "",
|
||||||
|
limit: int = 50,
|
||||||
|
offset: int = 0,
|
||||||
|
) -> str:
|
||||||
|
"""רשימת עקרונות קנוניים (canonical_halachot) — שאילתת נוחות לסוכני-הכתיבה.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
practice_area: סינון לפי תחום עיסוק (ריק = הכל).
|
||||||
|
review_status: pending_synthesis / pending_review / approved / published (ריק = הכל).
|
||||||
|
limit: מספר מקסימלי (עד 200).
|
||||||
|
offset: עמוד (pagination).
|
||||||
|
"""
|
||||||
|
rows = await db.list_canonical_halachot(
|
||||||
|
practice_area=practice_area or None,
|
||||||
|
review_status=review_status or None,
|
||||||
|
limit=min(limit, 200),
|
||||||
|
offset=offset,
|
||||||
|
)
|
||||||
|
return _ok(rows)
|
||||||
|
|
||||||
|
|
||||||
|
async def canonical_halacha_get(canonical_id: str) -> str:
|
||||||
|
"""שלוף עיקרון קנוני אחד (canonical_statement, practice_areas, subject_tags,
|
||||||
|
review_status, instance_count) + כל האינסטנסים שלו (לפי פסיקה).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
canonical_id: מזהה (UUID) של העיקרון הקנוני.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
cid = UUID(canonical_id)
|
||||||
|
except ValueError:
|
||||||
|
return _err("canonical_id לא תקין")
|
||||||
|
row = await db.get_canonical_halacha(cid)
|
||||||
|
if row is None:
|
||||||
|
return _err("עיקרון קנוני לא נמצא")
|
||||||
|
return _ok(row)
|
||||||
|
|||||||
Reference in New Issue
Block a user