diff --git a/mcp-server/src/legal_mcp/config.py b/mcp-server/src/legal_mcp/config.py index 94d5014..d364a9f 100644 --- a/mcp-server/src/legal_mcp/config.py +++ b/mcp-server/src/legal_mcp/config.py @@ -205,6 +205,11 @@ HALACHA_CONSOLIDATE_ENABLED = os.environ.get("HALACHA_CONSOLIDATE_ENABLED", "tru HALACHA_CONSOLIDATE_MODEL = os.environ.get("HALACHA_CONSOLIDATE_MODEL", HALACHA_EXTRACT_MODEL) HALACHA_CONSOLIDATE_EFFORT = os.environ.get("HALACHA_CONSOLIDATE_EFFORT", "high") +# V41 canonical lookup-before-insert: cosine gate for reusing an existing canonical +# instead of creating a new one. 0.85 is tuned to the embedding space (1024-dim voyage). +HALACHA_CANONICAL_LOOKUP_ENABLED = os.environ.get("HALACHA_CANONICAL_LOOKUP_ENABLED", "true").lower() == "true" +HALACHA_CANONICAL_THRESHOLD = float(os.environ.get("HALACHA_CANONICAL_THRESHOLD", "0.85")) + # Google Cloud Vision (OCR for scanned PDFs) GOOGLE_CLOUD_VISION_API_KEY = os.environ.get("GOOGLE_CLOUD_VISION_API_KEY", "") diff --git a/mcp-server/src/legal_mcp/server.py b/mcp-server/src/legal_mcp/server.py index f3ed017..ddf04d6 100644 --- a/mcp-server/src/legal_mcp/server.py +++ b/mcp-server/src/legal_mcp/server.py @@ -427,18 +427,42 @@ async def halacha_review( reasoning_summary: str = "", subject_tags: list[str] | None = None, practice_areas: list[str] | None = None, + canonical_statement: str = "", ) -> str: - """אישור / דחייה / עריכה של הלכה שחולצה אוטומטית. status: pending_review / approved / rejected / published.""" + """אישור / דחייה / עריכה של הלכה שחולצה אוטומטית. status: pending_review / approved / rejected / published. + canonical_statement: עריכת ניסוח העיקרון הקנוני הרחב (V41).""" return await plib.halacha_review( halacha_id, status, reviewer, rule_statement, reasoning_summary, - subject_tags, practice_areas, + subject_tags, practice_areas, canonical_statement, ) @mcp.tool() -async def halachot_pending(limit: int = 100) -> str: - """תור ההלכות הממתינות לאישור.""" - return await plib.halachot_pending(_clamp_limit(limit)) +async def halachot_pending( + limit: int = 100, + include_low_quality: bool = False, + instance_type: str = "original", +) -> str: + """תור ההלכות הממתינות לאישור. V41: ברירת-מחדל instance_type='original' (עקרונות חדשים בלבד, לא ציטוטים).""" + return await plib.halachot_pending(_clamp_limit(limit), include_low_quality, instance_type) + + +@mcp.tool() +async def canonical_halacha_list( + practice_area: str = "", + review_status: str = "", + limit: int = 50, + offset: int = 0, +) -> str: + """רשימת עקרונות קנוניים (canonical_halachot). V41. + practice_area: סינון תחום עיסוק. review_status: pending_synthesis/pending_review/approved/published.""" + return await plib.canonical_halacha_list(practice_area, review_status, limit, offset) + + +@mcp.tool() +async def canonical_halacha_get(canonical_id: str) -> str: + """שלוף עיקרון קנוני + כל האינסטנסים שלו לפי פסיקה. V41.""" + return await plib.canonical_halacha_get(canonical_id) # Documents diff --git a/mcp-server/src/legal_mcp/services/db.py b/mcp-server/src/legal_mcp/services/db.py index 227e02e..4543fe4 100644 --- a/mcp-server/src/legal_mcp/services/db.py +++ b/mcp-server/src/legal_mcp/services/db.py @@ -5320,6 +5320,24 @@ async def store_halachot_for_chunk( and halacha_quality.FLAG_NEAR_DUPLICATE not in flags): flags.append(halacha_quality.FLAG_NEAR_DUPLICATE) + # 3) V41 lookup-before-insert: does this principle already have a canonical? + # If yes → 'citation' instance linked to the existing canonical. + # If no → 'original' instance; a new canonical is created after INSERT. + canonical_id = None + instance_type = "original" + if emb is not None and config.HALACHA_CANONICAL_LOOKUP_ENABLED: + canon_match = await conn.fetchrow( + "SELECT id, 1 - (embedding <=> $1) AS sim " + "FROM canonical_halachot " + "WHERE embedding IS NOT NULL " + "ORDER BY embedding <=> $1 LIMIT 1", + emb, + ) + if (canon_match + and float(canon_match["sim"]) >= config.HALACHA_CANONICAL_THRESHOLD): + canonical_id = canon_match["id"] + instance_type = "citation" + confidence = float(h.get("confidence", 0.0)) auto_approve = confidence >= threshold and not flags review_status = "approved" if auto_approve else "pending_review" @@ -5334,18 +5352,50 @@ async def store_halachot_for_chunk( reasoning_summary, supporting_quote, page_reference, practice_areas, subject_tags, cites, confidence, quote_verified, quality_flags, embedding, review_status, - reviewer, reviewed_at) + reviewer, reviewed_at, canonical_id, instance_type) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, - $12, $13, $14, $15, $16, {reviewed_at_clause})""", + $12, $13, $14, $15, $16, {reviewed_at_clause}, + $17, $18)""", case_law_id, base + inserted, h["rule_statement"], h.get("rule_type", "interpretive"), h.get("reasoning_summary", ""), h["supporting_quote"], h.get("page_reference", ""), h.get("practice_areas", []), h.get("subject_tags", []), h.get("cites", []), confidence, h.get("quote_verified", False), - flags, h.get("embedding"), review_status, reviewer, + flags, emb, review_status, reviewer, + canonical_id, instance_type, ) existing_quotes.add(norm_quote) inserted += 1 + + # V41: maintain canonical_halachot after successful insert. + if config.HALACHA_CANONICAL_LOOKUP_ENABLED: + if instance_type == "original": + # New principle — create canonical and link back. + new_canon_id = await conn.fetchval( + "INSERT INTO canonical_halachot " + "(canonical_statement, rule_type, practice_areas, subject_tags, " + " embedding, first_established_in, review_status, instance_count) " + "VALUES ($1,$2,$3,$4,$5,$6,'pending_synthesis',1) RETURNING id", + h.get("rule_statement") or "", + h.get("rule_type", "interpretive"), + h.get("practice_areas") or [], + h.get("subject_tags") or [], + emb, + case_law_id, + ) + await conn.execute( + "UPDATE halachot SET canonical_id=$1 " + "WHERE case_law_id=$2 AND halacha_index=$3", + new_canon_id, case_law_id, base + inserted - 1, + ) + elif canonical_id is not None: + # Citation of existing canonical — bump its instance count. + await conn.execute( + "UPDATE canonical_halachot SET " + "instance_count = instance_count + 1, updated_at = now() " + "WHERE id = $1", + canonical_id, + ) await conn.execute( "UPDATE precedent_chunks SET halacha_extracted_at = now() " "WHERE id = $1", chunk_id, @@ -5362,6 +5412,7 @@ async def list_halachot( case_law_id: UUID | None = None, review_status: str | None = None, practice_area: str | None = None, + instance_type: str | None = None, limit: int = 200, offset: int = 0, exclude_low_quality: bool = False, @@ -5407,6 +5458,10 @@ async def list_halachot( conditions.append(f"${idx} = ANY(h.practice_areas)") params.append(practice_area) idx += 1 + if instance_type: + conditions.append(f"h.instance_type = ${idx}") + params.append(instance_type) + idx += 1 if exclude_low_quality: # a clean item has an empty/NULL quality_flags array conditions.append("COALESCE(array_length(h.quality_flags, 1), 0) = 0") @@ -6034,6 +6089,51 @@ async def get_canonical_halacha(canonical_id: "UUID") -> "dict | None": } +async def list_canonical_halachot( + practice_area: str | None = None, + review_status: str | None = None, + limit: int = 50, + offset: int = 0, +) -> list[dict]: + """List canonical principles, optionally filtered by practice_area / review_status.""" + pool = await get_pool() + conditions = ["1=1"] + params: list = [] + idx = 1 + if practice_area: + conditions.append(f"${ idx} = ANY(practice_areas)") + params.append(practice_area) + idx += 1 + if review_status: + conditions.append(f"review_status = ${idx}") + params.append(review_status) + idx += 1 + params += [limit, offset] + rows = await pool.fetch( + f"SELECT id::text, canonical_statement, rule_type, practice_areas, " + f" subject_tags, review_status, instance_count, created_at, updated_at " + f"FROM canonical_halachot " + f"WHERE {' AND '.join(conditions)} " + f"ORDER BY instance_count DESC, created_at DESC " + f"LIMIT ${idx} OFFSET ${idx + 1}", + *params, + ) + return [dict(r) for r in rows] + + +async def update_canonical_statement( + canonical_id: "UUID", canonical_statement: str, +) -> bool: + """Update the synthesized statement of a canonical principle. Returns True if found.""" + pool = await get_pool() + result = await pool.execute( + "UPDATE canonical_halachot SET canonical_statement=$2, updated_at=now() " + "WHERE id=$1", + canonical_id, canonical_statement, + ) + return result.split()[-1] != "0" + + async def _annotate_equivalents(pool, out: list[dict]) -> None: """Attach an `equivalents` list to each row (#84.2) — parallel-authority links. diff --git a/mcp-server/src/legal_mcp/tools/precedent_library.py b/mcp-server/src/legal_mcp/tools/precedent_library.py index 91fcb6e..8ac3d2f 100644 --- a/mcp-server/src/legal_mcp/tools/precedent_library.py +++ b/mcp-server/src/legal_mcp/tools/precedent_library.py @@ -320,6 +320,7 @@ async def halacha_review( reasoning_summary: str = "", subject_tags: list[str] | None = None, practice_areas: list[str] | None = None, + canonical_statement: str = "", ) -> str: """אישור / דחייה / עריכה של הלכה שחולצה אוטומטית. @@ -331,6 +332,7 @@ async def halacha_review( reasoning_summary: עריכת תמצית ההיגיון (ריק = ללא שינוי). subject_tags: עריכת תגיות (None = ללא שינוי). practice_areas: עריכת תחומים (None = ללא שינוי). + canonical_statement: עריכת הניסוח הקנוני הרחב של העיקרון (ריק = ללא שינוי). """ if status not in {"pending_review", "approved", "rejected", "published"}: return _err( @@ -353,25 +355,87 @@ async def halacha_review( ) if row is None: return _err("הלכה לא נמצאה") + + # V41: propagate canonical_statement edit to the canonical principle. + if canonical_statement and row.get("canonical_id"): + try: + await db.update_canonical_statement( + UUID(str(row["canonical_id"])), canonical_statement, + ) + except Exception as e: + import logging + logging.getLogger(__name__).warning( + "halacha_review: failed to update canonical_statement: %s", e, + ) + return _ok(row) -async def halachot_pending(limit: int = 100, include_low_quality: bool = False) -> str: +async def halachot_pending( + limit: int = 100, + include_low_quality: bool = False, + instance_type: str = "original", +) -> str: """תור ההלכות הממתינות לאישור (review_status='pending_review'). - כברירת-מחדל (#84.1, #84.3) התור **מסונן** — הלכות עם דגל-איכות כלשהו + כברירת-מחדל (#84.1, #84.3, V41) התור **מסונן** — הלכות עם דגל-איכות כלשהו (application / ציטוט-לא-מאומת / קטוע / obiter / restatement דק / לא-נתמך / near-duplicate) מוסתרות (הן שייכות ל'דורש תיקון-חילוץ', לא לתור-האישור), ו**ממוין לפי עדיפות** (טופלו-לרעה תחילה, אז הכי לא-ודאיים, אז הישנים). + V41: כברירת-מחדל מציג רק instance_type='original' (עקרונות חדשים, לא ציטוטים). + העברת instance_type='' מציגה הכל (כולל ציטוטים). + Args: limit: מספר מקסימלי. include_low_quality: True כדי לחשוף גם פריטים מסומני-איכות (בקט 'דורש תיקון'). + instance_type: 'original' (ברירת מחדל) / 'citation' / 'application' / '' (הכל). """ rows = await db.list_halachot( review_status="pending_review", + instance_type=instance_type or None, limit=limit, exclude_low_quality=not include_low_quality, order_by_priority=True, ) return _ok(rows) + + +async def canonical_halacha_list( + practice_area: str = "", + review_status: str = "", + limit: int = 50, + offset: int = 0, +) -> str: + """רשימת עקרונות קנוניים (canonical_halachot) — שאילתת נוחות לסוכני-הכתיבה. + + Args: + practice_area: סינון לפי תחום עיסוק (ריק = הכל). + review_status: pending_synthesis / pending_review / approved / published (ריק = הכל). + limit: מספר מקסימלי (עד 200). + offset: עמוד (pagination). + """ + rows = await db.list_canonical_halachot( + practice_area=practice_area or None, + review_status=review_status or None, + limit=min(limit, 200), + offset=offset, + ) + return _ok(rows) + + +async def canonical_halacha_get(canonical_id: str) -> str: + """שלוף עיקרון קנוני אחד (canonical_statement, practice_areas, subject_tags, + review_status, instance_count) + כל האינסטנסים שלו (לפי פסיקה). + + Args: + canonical_id: מזהה (UUID) של העיקרון הקנוני. + """ + try: + cid = UUID(canonical_id) + except ValueError: + return _err("canonical_id לא תקין") + row = await db.get_canonical_halacha(cid) + if row is None: + return _err("עיקרון קנוני לא נמצא") + return _ok(row)