feat: external precedent library with auto halacha extraction
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m27s
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m27s
Adds a third corpus of legal authority distinct from style_corpus (Daphna's prior decisions for voice) and case_precedents (chair-attached quotes per case). The new corpus holds chair-uploaded court rulings and other appeals committee decisions, with binding rules (הלכות) extracted automatically and queued for chair approval. Pipeline (web/app.py + services/precedent_library.py): file → extract → chunk → Voyage embed → halacha_extractor → store + publish progress over the existing Redis SSE channel. Schema V7 (services/db.py): extends case_law with source_kind + extraction status fields under a CHECK constraint pinning practice_area to the three appeals committee domains (rishuy_uvniya, betterment_levy, compensation_197). New precedent_chunks (vector(1024)) and halachot tables (vector(1024) over rule_statement, IVFFlat indexes, gin on practice_areas/subject_tags). Halachot start as pending_review; only approved/published rows are visible to search_precedent_library. Agents: legal-writer, legal-researcher, legal-analyst, legal-ceo, legal-qa get search_precedent_library. legal-writer prompt explains the three-corpus distinction and CREAC use; legal-qa now verifies that every cited halacha resolves to an approved row in the corpus. UI: /precedents page with four tabs — library / semantic search / pending review (J/K nav, A/R/E shortcuts, badge count) / stats. Reuses the existing upload-sheet progress + SSE pattern. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -47,6 +47,7 @@ mcp = FastMCP(
|
||||
|
||||
from legal_mcp.tools import ( # noqa: E402
|
||||
cases, documents, search, drafting, workflow, precedents,
|
||||
precedent_library as plib,
|
||||
)
|
||||
|
||||
|
||||
@@ -142,10 +143,114 @@ async def precedent_remove(precedent_id: str) -> str:
|
||||
async def precedent_search_library(
|
||||
query: str, practice_area: str = "", limit: int = 10,
|
||||
) -> str:
|
||||
"""חיפוש בספרייה הרוחבית של ציטוטים שנצברו בין תיקים."""
|
||||
"""חיפוש בציטוטים שדפנה צירפה ידנית לתיקים בעבר (case_precedents).
|
||||
שונה מ-search_precedent_library שמחפש בקורפוס הפסיקה הסמכותית."""
|
||||
return await precedents.precedent_search_library(query, practice_area, limit)
|
||||
|
||||
|
||||
# ── External Precedent Library — authoritative case-law corpus ─────
|
||||
# Distinct from precedent_search_library above (chair-attached quotes)
|
||||
# and from search_decisions (Daphna's style corpus).
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def precedent_library_upload(
|
||||
file_path: str,
|
||||
citation: str,
|
||||
case_name: str = "",
|
||||
court: str = "",
|
||||
decision_date: str = "",
|
||||
source_type: str = "",
|
||||
precedent_level: str = "",
|
||||
practice_area: str = "",
|
||||
appeal_subtype: str = "",
|
||||
subject_tags: list[str] | None = None,
|
||||
is_binding: bool = True,
|
||||
headnote: str = "",
|
||||
summary: str = "",
|
||||
) -> str:
|
||||
"""העלאת פסיקה חיצונית (פס"ד / החלטה של ועדה אחרת) לקורפוס הסמכותי. מחלץ הלכות אוטומטית — כולן ממתינות לאישור היו"ר. practice_area: rishuy_uvniya / betterment_levy / compensation_197."""
|
||||
return await plib.precedent_library_upload(
|
||||
file_path, citation, case_name, court, decision_date,
|
||||
source_type, precedent_level, practice_area, appeal_subtype,
|
||||
subject_tags, is_binding, headnote, summary,
|
||||
)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def precedent_library_list(
|
||||
practice_area: str = "",
|
||||
court: str = "",
|
||||
precedent_level: str = "",
|
||||
source_type: str = "",
|
||||
search: str = "",
|
||||
limit: int = 100,
|
||||
) -> str:
|
||||
"""רשימת הפסיקה בקורפוס הסמכותי, עם פילטרים."""
|
||||
return await plib.precedent_library_list(
|
||||
practice_area, court, precedent_level, source_type, search, limit,
|
||||
)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def precedent_library_get(case_law_id: str) -> str:
|
||||
"""פסיקה ספציפית בקורפוס + רשימת ההלכות שחולצו ממנה (כולל ממתינות לאישור)."""
|
||||
return await plib.precedent_library_get(case_law_id)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def precedent_library_delete(case_law_id: str) -> str:
|
||||
"""מחיקת פסיקה מהקורפוס (cascade: chunks + halachot)."""
|
||||
return await plib.precedent_library_delete(case_law_id)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def precedent_extract_halachot(case_law_id: str) -> str:
|
||||
"""הרצה מחדש של חילוץ הלכות לפסיקה קיימת. ההלכות הקיימות נמחקות, החדשות חוזרות לסטטוס pending_review."""
|
||||
return await plib.precedent_extract_halachot(case_law_id)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def search_precedent_library(
|
||||
query: str,
|
||||
practice_area: str = "",
|
||||
court: str = "",
|
||||
precedent_level: str = "",
|
||||
appeal_subtype: str = "",
|
||||
subject_tag: str = "",
|
||||
limit: int = 10,
|
||||
include_halachot: bool = True,
|
||||
) -> str:
|
||||
"""חיפוש סמנטי בקורפוס הפסיקה הסמכותית. מחזיר הלכות (מאושרות בלבד) + קטעי טקסט. השתמש כש-legal-writer צריך לצטט פסיקה מחייבת בבלוק י (CREAC: rule + explanation)."""
|
||||
return await plib.search_precedent_library(
|
||||
query, practice_area, court, precedent_level, appeal_subtype,
|
||||
None, subject_tag, limit, include_halachot,
|
||||
)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def halacha_review(
|
||||
halacha_id: str,
|
||||
status: str,
|
||||
reviewer: str = "דפנה",
|
||||
rule_statement: str = "",
|
||||
reasoning_summary: str = "",
|
||||
subject_tags: list[str] | None = None,
|
||||
practice_areas: list[str] | None = None,
|
||||
) -> str:
|
||||
"""אישור / דחייה / עריכה של הלכה שחולצה אוטומטית. status: pending_review / approved / rejected / published."""
|
||||
return await plib.halacha_review(
|
||||
halacha_id, status, reviewer, rule_statement, reasoning_summary,
|
||||
subject_tags, practice_areas,
|
||||
)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def halachot_pending(limit: int = 100) -> str:
|
||||
"""תור ההלכות הממתינות לאישור."""
|
||||
return await plib.halachot_pending(limit)
|
||||
|
||||
|
||||
# Documents
|
||||
@mcp.tool()
|
||||
async def document_upload(
|
||||
|
||||
@@ -7,14 +7,16 @@ from dataclasses import dataclass, field
|
||||
|
||||
from legal_mcp import config
|
||||
|
||||
# Hebrew legal section headers
|
||||
# Hebrew legal section headers.
|
||||
# Covers both appeals committee decisions and external court rulings —
|
||||
# court rulings use slightly different vocabulary (פסק דין, נימוקים, סוף דבר).
|
||||
SECTION_PATTERNS = [
|
||||
(r"רקע\s*עובדתי|רקע\s*כללי|העובדות|הרקע", "facts"),
|
||||
(r"טענות\s*העוררי[םן]|טענות\s*המערערי[םן]|עיקר\s*טענות\s*העוררי[םן]", "appellant_claims"),
|
||||
(r"טענות\s*המשיבי[םן]|תשובת\s*המשיבי[םן]|עיקר\s*טענות\s*המשיבי[םן]", "respondent_claims"),
|
||||
(r"דיון\s*והכרעה|דיון|הכרעה|ניתוח\s*משפטי|המסגרת\s*המשפטית", "legal_analysis"),
|
||||
(r"מסקנ[הות]|סיכום", "conclusion"),
|
||||
(r"החלטה|לפיכך\s*אני\s*מחליט|התוצאה", "ruling"),
|
||||
(r"דיון\s*והכרעה|דיון|הכרעה|ניתוח\s*משפטי|המסגרת\s*המשפטית|נימוקים", "legal_analysis"),
|
||||
(r"מסקנ[הות]|סיכום|סוף\s*דבר", "conclusion"),
|
||||
(r"פסק[- ]?דין|החלטה|לפיכך\s*אני\s*מחליט|התוצאה", "ruling"),
|
||||
(r"מבוא|פתיחה|לפניי", "intro"),
|
||||
]
|
||||
|
||||
|
||||
@@ -518,6 +518,91 @@ CREATE INDEX IF NOT EXISTS idx_cases_archived ON cases(archived_at) WHERE archiv
|
||||
"""
|
||||
|
||||
|
||||
# ── V7: External Precedent Library + halacha extraction ──────────
|
||||
# Chair-uploaded external court rulings and other appeals committee decisions
|
||||
# become an authoritative law corpus. Distinct from style_corpus (Daphna's
|
||||
# style) and case_precedents (chair-attached quotes scoped to a single case).
|
||||
|
||||
SCHEMA_V7_SQL = """
|
||||
-- case_law extensions: distinguish chair-uploaded full rulings from
|
||||
-- auto-extracted citation stubs, and track ingestion progress.
|
||||
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS source_kind TEXT DEFAULT 'cited_only';
|
||||
-- 'external_upload' (chair uploaded full ruling) | 'cited_only' (stub from
|
||||
-- references_extractor) | 'nevo_seed' (future: auto-fetched from Nevo).
|
||||
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS document_id UUID REFERENCES documents(id) ON DELETE SET NULL;
|
||||
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS extraction_status TEXT DEFAULT 'pending';
|
||||
-- 'pending' | 'processing' | 'completed' | 'failed'
|
||||
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS halacha_extraction_status TEXT DEFAULT 'pending';
|
||||
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS practice_area TEXT DEFAULT '';
|
||||
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS appeal_subtype TEXT DEFAULT '';
|
||||
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS headnote TEXT DEFAULT '';
|
||||
-- chair-editable abstract shown in search results.
|
||||
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS source_type TEXT DEFAULT '';
|
||||
-- 'court_ruling' | 'appeals_committee'
|
||||
|
||||
-- practice_area is closed to the three appeals committee domains.
|
||||
DO $$ BEGIN
|
||||
ALTER TABLE case_law ADD CONSTRAINT case_law_practice_area_check
|
||||
CHECK (practice_area IN ('', 'rishuy_uvniya', 'betterment_levy', 'compensation_197'));
|
||||
EXCEPTION WHEN duplicate_object THEN NULL; END $$;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_case_law_source_kind ON case_law(source_kind);
|
||||
CREATE INDEX IF NOT EXISTS idx_case_law_practice ON case_law(practice_area, appeal_subtype);
|
||||
|
||||
-- precedent_chunks: full-text chunks of an uploaded ruling, with embeddings.
|
||||
-- Analog of document_chunks for case_law rows where source_kind='external_upload'.
|
||||
CREATE TABLE IF NOT EXISTS precedent_chunks (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
case_law_id UUID REFERENCES case_law(id) ON DELETE CASCADE,
|
||||
chunk_index INTEGER NOT NULL,
|
||||
content TEXT NOT NULL,
|
||||
section_type TEXT DEFAULT 'other',
|
||||
-- intro | facts | legal_analysis | ruling | conclusion | other
|
||||
page_number INTEGER,
|
||||
embedding vector(1024),
|
||||
created_at TIMESTAMPTZ DEFAULT now()
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_precedent_chunks_case_law ON precedent_chunks(case_law_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_precedent_chunks_section ON precedent_chunks(case_law_id, section_type);
|
||||
CREATE INDEX IF NOT EXISTS idx_precedent_chunks_vec
|
||||
ON precedent_chunks USING ivfflat (embedding vector_cosine_ops) WITH (lists = 50);
|
||||
|
||||
-- halachot: extracted binding rules. One halacha = one rule + verbatim quote.
|
||||
-- Embedded separately for rule-precision semantic match (chunks centroid is
|
||||
-- dominated by surrounding context). All halachot start as pending_review;
|
||||
-- only approved/published rows are visible to search_precedent_library.
|
||||
CREATE TABLE IF NOT EXISTS halachot (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
case_law_id UUID REFERENCES case_law(id) ON DELETE CASCADE,
|
||||
halacha_index INTEGER NOT NULL,
|
||||
rule_statement TEXT NOT NULL,
|
||||
rule_type TEXT DEFAULT 'binding',
|
||||
-- binding | interpretive | procedural | obiter
|
||||
reasoning_summary TEXT DEFAULT '',
|
||||
supporting_quote TEXT NOT NULL,
|
||||
page_reference TEXT DEFAULT '',
|
||||
practice_areas TEXT[] DEFAULT '{}',
|
||||
subject_tags TEXT[] DEFAULT '{}',
|
||||
cites TEXT[] DEFAULT '{}',
|
||||
confidence NUMERIC(3,2) DEFAULT 0.0,
|
||||
quote_verified BOOLEAN DEFAULT FALSE,
|
||||
review_status TEXT DEFAULT 'pending_review',
|
||||
-- pending_review | approved | rejected | published
|
||||
reviewer TEXT DEFAULT '',
|
||||
reviewed_at TIMESTAMPTZ,
|
||||
embedding vector(1024),
|
||||
created_at TIMESTAMPTZ DEFAULT now(),
|
||||
updated_at TIMESTAMPTZ DEFAULT now()
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_halachot_case_law ON halachot(case_law_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_halachot_status ON halachot(review_status);
|
||||
CREATE INDEX IF NOT EXISTS idx_halachot_practice ON halachot USING gin(practice_areas);
|
||||
CREATE INDEX IF NOT EXISTS idx_halachot_tags ON halachot USING gin(subject_tags);
|
||||
CREATE INDEX IF NOT EXISTS idx_halachot_vec
|
||||
ON halachot USING ivfflat (embedding vector_cosine_ops) WITH (lists = 50);
|
||||
"""
|
||||
|
||||
|
||||
async def init_schema() -> None:
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
@@ -528,7 +613,8 @@ async def init_schema() -> None:
|
||||
await conn.execute(SCHEMA_V4_SQL)
|
||||
await conn.execute(SCHEMA_V5_SQL)
|
||||
await conn.execute(SCHEMA_V6_SQL)
|
||||
logger.info("Database schema initialized (v1-v6)")
|
||||
await conn.execute(SCHEMA_V7_SQL)
|
||||
logger.info("Database schema initialized (v1-v7)")
|
||||
|
||||
|
||||
# ── Case CRUD ───────────────────────────────────────────────────────
|
||||
@@ -1518,3 +1604,590 @@ async def detect_appraiser_conflicts(case_id: UUID) -> list[dict]:
|
||||
"entries": entries,
|
||||
})
|
||||
return conflicts
|
||||
|
||||
|
||||
# ── V7: External precedent library + halachot ─────────────────────
|
||||
|
||||
|
||||
def _row_to_case_law(row: asyncpg.Record) -> dict:
|
||||
"""Normalize a case_law row, parsing subject_tags JSONB to list."""
|
||||
d = dict(row)
|
||||
if isinstance(d.get("subject_tags"), str):
|
||||
try:
|
||||
d["subject_tags"] = json.loads(d["subject_tags"])
|
||||
except (TypeError, ValueError):
|
||||
d["subject_tags"] = []
|
||||
if d.get("date") is not None:
|
||||
d["date"] = d["date"].isoformat()
|
||||
return d
|
||||
|
||||
|
||||
async def get_case_law(case_law_id: UUID) -> dict | None:
|
||||
pool = await get_pool()
|
||||
row = await pool.fetchrow(
|
||||
"SELECT * FROM case_law WHERE id = $1", case_law_id,
|
||||
)
|
||||
return _row_to_case_law(row) if row else None
|
||||
|
||||
|
||||
async def get_case_law_by_citation(case_number: str) -> dict | None:
|
||||
pool = await get_pool()
|
||||
row = await pool.fetchrow(
|
||||
"SELECT * FROM case_law WHERE case_number = $1", case_number,
|
||||
)
|
||||
return _row_to_case_law(row) if row else None
|
||||
|
||||
|
||||
async def create_external_case_law(
|
||||
case_number: str,
|
||||
case_name: str,
|
||||
full_text: str,
|
||||
court: str = "",
|
||||
decision_date: date | None = None,
|
||||
practice_area: str = "",
|
||||
appeal_subtype: str = "",
|
||||
subject_tags: list[str] | None = None,
|
||||
summary: str = "",
|
||||
headnote: str = "",
|
||||
key_quote: str = "",
|
||||
source_url: str = "",
|
||||
source_type: str = "",
|
||||
precedent_level: str = "",
|
||||
is_binding: bool = True,
|
||||
document_id: UUID | None = None,
|
||||
) -> dict:
|
||||
"""Insert a chair-uploaded external precedent into case_law.
|
||||
|
||||
If a row with this ``case_number`` already exists with
|
||||
source_kind='cited_only' (auto-discovered), promote it to
|
||||
source_kind='external_upload' and fill in the missing fields.
|
||||
"""
|
||||
pool = await get_pool()
|
||||
tags_json = json.dumps(subject_tags or [], ensure_ascii=False)
|
||||
async with pool.acquire() as conn:
|
||||
existing = await conn.fetchrow(
|
||||
"SELECT id, source_kind FROM case_law WHERE case_number = $1",
|
||||
case_number,
|
||||
)
|
||||
if existing:
|
||||
row = await conn.fetchrow(
|
||||
"""
|
||||
UPDATE case_law SET
|
||||
case_name = $2,
|
||||
court = COALESCE(NULLIF($3, ''), court),
|
||||
date = COALESCE($4, date),
|
||||
practice_area = $5,
|
||||
appeal_subtype = $6,
|
||||
subject_tags = $7,
|
||||
summary = COALESCE(NULLIF($8, ''), summary),
|
||||
headnote = $9,
|
||||
key_quote = COALESCE(NULLIF($10, ''), key_quote),
|
||||
full_text = $11,
|
||||
source_url = COALESCE(NULLIF($12, ''), source_url),
|
||||
source_type = $13,
|
||||
precedent_level = $14,
|
||||
is_binding = $15,
|
||||
document_id = COALESCE($16, document_id),
|
||||
source_kind = 'external_upload',
|
||||
extraction_status = 'processing',
|
||||
halacha_extraction_status = 'pending'
|
||||
WHERE id = $1
|
||||
RETURNING *
|
||||
""",
|
||||
existing["id"], case_name, court, decision_date,
|
||||
practice_area, appeal_subtype, tags_json, summary, headnote,
|
||||
key_quote, full_text, source_url, source_type,
|
||||
precedent_level, is_binding, document_id,
|
||||
)
|
||||
else:
|
||||
row = await conn.fetchrow(
|
||||
"""
|
||||
INSERT INTO case_law (
|
||||
case_number, case_name, court, date, subject_tags,
|
||||
summary, key_quote, full_text, source_url,
|
||||
source_kind, document_id, extraction_status,
|
||||
halacha_extraction_status, practice_area, appeal_subtype,
|
||||
headnote, source_type, precedent_level, is_binding
|
||||
) VALUES (
|
||||
$1, $2, $3, $4, $5, $6, $7, $8, $9,
|
||||
'external_upload', $10, 'processing', 'pending',
|
||||
$11, $12, $13, $14, $15, $16
|
||||
)
|
||||
RETURNING *
|
||||
""",
|
||||
case_number, case_name, court, decision_date, tags_json,
|
||||
summary, key_quote, full_text, source_url,
|
||||
document_id, practice_area, appeal_subtype, headnote,
|
||||
source_type, precedent_level, is_binding,
|
||||
)
|
||||
return _row_to_case_law(row)
|
||||
|
||||
|
||||
async def update_case_law(case_law_id: UUID, **fields) -> dict | None:
|
||||
"""Patch metadata fields on a case_law row.
|
||||
|
||||
Allowed fields: case_name, court, date, practice_area, appeal_subtype,
|
||||
subject_tags, summary, headnote, key_quote, source_url, source_type,
|
||||
precedent_level, is_binding.
|
||||
"""
|
||||
allowed = {
|
||||
"case_name", "court", "date", "practice_area", "appeal_subtype",
|
||||
"subject_tags", "summary", "headnote", "key_quote", "source_url",
|
||||
"source_type", "precedent_level", "is_binding",
|
||||
}
|
||||
updates = {k: v for k, v in fields.items() if k in allowed}
|
||||
if not updates:
|
||||
return await get_case_law(case_law_id)
|
||||
|
||||
pool = await get_pool()
|
||||
set_parts = []
|
||||
params: list = [case_law_id]
|
||||
for i, (k, v) in enumerate(updates.items(), start=2):
|
||||
if k == "subject_tags":
|
||||
v = json.dumps(v or [], ensure_ascii=False)
|
||||
set_parts.append(f"{k} = ${i}")
|
||||
params.append(v)
|
||||
sql = f"UPDATE case_law SET {', '.join(set_parts)} WHERE id = $1 RETURNING *"
|
||||
row = await pool.fetchrow(sql, *params)
|
||||
return _row_to_case_law(row) if row else None
|
||||
|
||||
|
||||
async def set_case_law_extraction_status(case_law_id: UUID, status: str) -> None:
|
||||
pool = await get_pool()
|
||||
await pool.execute(
|
||||
"UPDATE case_law SET extraction_status = $2 WHERE id = $1",
|
||||
case_law_id, status,
|
||||
)
|
||||
|
||||
|
||||
async def set_case_law_halacha_status(case_law_id: UUID, status: str) -> None:
|
||||
pool = await get_pool()
|
||||
await pool.execute(
|
||||
"UPDATE case_law SET halacha_extraction_status = $2 WHERE id = $1",
|
||||
case_law_id, status,
|
||||
)
|
||||
|
||||
|
||||
async def list_external_case_law(
|
||||
practice_area: str = "",
|
||||
court: str = "",
|
||||
precedent_level: str = "",
|
||||
source_type: str = "",
|
||||
search: str = "",
|
||||
limit: int = 100,
|
||||
offset: int = 0,
|
||||
) -> list[dict]:
|
||||
"""List chair-uploaded precedents, with simple filters."""
|
||||
pool = await get_pool()
|
||||
conditions = ["source_kind = 'external_upload'"]
|
||||
params: list = []
|
||||
idx = 1
|
||||
if practice_area:
|
||||
conditions.append(f"practice_area = ${idx}")
|
||||
params.append(practice_area)
|
||||
idx += 1
|
||||
if court:
|
||||
conditions.append(f"court ILIKE ${idx}")
|
||||
params.append(f"%{court}%")
|
||||
idx += 1
|
||||
if precedent_level:
|
||||
conditions.append(f"precedent_level = ${idx}")
|
||||
params.append(precedent_level)
|
||||
idx += 1
|
||||
if source_type:
|
||||
conditions.append(f"source_type = ${idx}")
|
||||
params.append(source_type)
|
||||
idx += 1
|
||||
if search:
|
||||
conditions.append(
|
||||
f"(case_number ILIKE ${idx} OR case_name ILIKE ${idx} "
|
||||
f"OR summary ILIKE ${idx} OR headnote ILIKE ${idx})"
|
||||
)
|
||||
params.append(f"%{search}%")
|
||||
idx += 1
|
||||
where_sql = " AND ".join(conditions)
|
||||
params.extend([limit, offset])
|
||||
sql = f"""
|
||||
SELECT id, case_number, case_name, court, date, practice_area,
|
||||
appeal_subtype, source_type, precedent_level, is_binding,
|
||||
summary, headnote, subject_tags, source_kind,
|
||||
extraction_status, halacha_extraction_status,
|
||||
created_at,
|
||||
(SELECT COUNT(*) FROM halachot h WHERE h.case_law_id = case_law.id) AS halachot_count,
|
||||
(SELECT COUNT(*) FROM halachot h WHERE h.case_law_id = case_law.id
|
||||
AND h.review_status IN ('approved', 'published')) AS approved_count
|
||||
FROM case_law
|
||||
WHERE {where_sql}
|
||||
ORDER BY created_at DESC
|
||||
LIMIT ${idx} OFFSET ${idx + 1}
|
||||
"""
|
||||
rows = await pool.fetch(sql, *params)
|
||||
return [_row_to_case_law(r) for r in rows]
|
||||
|
||||
|
||||
async def delete_case_law(case_law_id: UUID) -> bool:
|
||||
"""Delete a precedent and cascade chunks + halachot."""
|
||||
pool = await get_pool()
|
||||
result = await pool.execute(
|
||||
"DELETE FROM case_law WHERE id = $1", case_law_id,
|
||||
)
|
||||
return result == "DELETE 1"
|
||||
|
||||
|
||||
async def store_precedent_chunks(
|
||||
case_law_id: UUID, chunks: list[dict],
|
||||
) -> int:
|
||||
"""Replace precedent chunks for a case_law row.
|
||||
|
||||
Each chunk dict has: chunk_index, content, section_type, page_number,
|
||||
embedding (list[float] or None).
|
||||
"""
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
await conn.execute(
|
||||
"DELETE FROM precedent_chunks WHERE case_law_id = $1",
|
||||
case_law_id,
|
||||
)
|
||||
for c in chunks:
|
||||
await conn.execute(
|
||||
"""INSERT INTO precedent_chunks
|
||||
(case_law_id, chunk_index, content, section_type,
|
||||
page_number, embedding)
|
||||
VALUES ($1, $2, $3, $4, $5, $6)""",
|
||||
case_law_id,
|
||||
c["chunk_index"],
|
||||
c["content"],
|
||||
c.get("section_type", "other"),
|
||||
c.get("page_number"),
|
||||
c.get("embedding"),
|
||||
)
|
||||
return len(chunks)
|
||||
|
||||
|
||||
async def list_precedent_chunks(
|
||||
case_law_id: UUID,
|
||||
section_types: tuple[str, ...] | None = None,
|
||||
) -> list[dict]:
|
||||
pool = await get_pool()
|
||||
if section_types:
|
||||
rows = await pool.fetch(
|
||||
"""SELECT id, chunk_index, content, section_type, page_number
|
||||
FROM precedent_chunks
|
||||
WHERE case_law_id = $1 AND section_type = ANY($2::text[])
|
||||
ORDER BY chunk_index""",
|
||||
case_law_id, list(section_types),
|
||||
)
|
||||
else:
|
||||
rows = await pool.fetch(
|
||||
"""SELECT id, chunk_index, content, section_type, page_number
|
||||
FROM precedent_chunks
|
||||
WHERE case_law_id = $1
|
||||
ORDER BY chunk_index""",
|
||||
case_law_id,
|
||||
)
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
|
||||
async def delete_halachot(case_law_id: UUID) -> int:
|
||||
pool = await get_pool()
|
||||
result = await pool.execute(
|
||||
"DELETE FROM halachot WHERE case_law_id = $1", case_law_id,
|
||||
)
|
||||
# result is e.g. "DELETE 5" — extract the number.
|
||||
try:
|
||||
return int(result.split()[-1])
|
||||
except (ValueError, IndexError):
|
||||
return 0
|
||||
|
||||
|
||||
async def store_halachot(case_law_id: UUID, halachot: list[dict]) -> int:
|
||||
"""Bulk-insert extracted halachot. Always with review_status='pending_review'."""
|
||||
if not halachot:
|
||||
return 0
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
for i, h in enumerate(halachot):
|
||||
await conn.execute(
|
||||
"""INSERT INTO halachot
|
||||
(case_law_id, halacha_index, rule_statement, rule_type,
|
||||
reasoning_summary, supporting_quote, page_reference,
|
||||
practice_areas, subject_tags, cites, confidence,
|
||||
quote_verified, embedding, review_status)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11,
|
||||
$12, $13, 'pending_review')""",
|
||||
case_law_id,
|
||||
i,
|
||||
h["rule_statement"],
|
||||
h.get("rule_type", "binding"),
|
||||
h.get("reasoning_summary", ""),
|
||||
h["supporting_quote"],
|
||||
h.get("page_reference", ""),
|
||||
h.get("practice_areas", []),
|
||||
h.get("subject_tags", []),
|
||||
h.get("cites", []),
|
||||
h.get("confidence", 0.0),
|
||||
h.get("quote_verified", False),
|
||||
h.get("embedding"),
|
||||
)
|
||||
return len(halachot)
|
||||
|
||||
|
||||
async def list_halachot(
|
||||
case_law_id: UUID | None = None,
|
||||
review_status: str | None = None,
|
||||
practice_area: str | None = None,
|
||||
limit: int = 200,
|
||||
offset: int = 0,
|
||||
) -> list[dict]:
|
||||
pool = await get_pool()
|
||||
conditions = []
|
||||
params: list = []
|
||||
idx = 1
|
||||
if case_law_id is not None:
|
||||
conditions.append(f"h.case_law_id = ${idx}")
|
||||
params.append(case_law_id)
|
||||
idx += 1
|
||||
if review_status:
|
||||
conditions.append(f"h.review_status = ${idx}")
|
||||
params.append(review_status)
|
||||
idx += 1
|
||||
if practice_area:
|
||||
conditions.append(f"${idx} = ANY(h.practice_areas)")
|
||||
params.append(practice_area)
|
||||
idx += 1
|
||||
where_sql = f"WHERE {' AND '.join(conditions)}" if conditions else ""
|
||||
params.extend([limit, offset])
|
||||
sql = f"""
|
||||
SELECT h.id, h.case_law_id, h.halacha_index, h.rule_statement,
|
||||
h.rule_type, h.reasoning_summary, h.supporting_quote,
|
||||
h.page_reference, h.practice_areas, h.subject_tags,
|
||||
h.cites, h.confidence, h.quote_verified, h.review_status,
|
||||
h.reviewer, h.reviewed_at, h.created_at, h.updated_at,
|
||||
cl.case_number, cl.case_name, cl.court, cl.date AS decision_date,
|
||||
cl.precedent_level
|
||||
FROM halachot h
|
||||
LEFT JOIN case_law cl ON cl.id = h.case_law_id
|
||||
{where_sql}
|
||||
ORDER BY h.case_law_id, h.halacha_index
|
||||
LIMIT ${idx} OFFSET ${idx + 1}
|
||||
"""
|
||||
rows = await pool.fetch(sql, *params)
|
||||
out = []
|
||||
for r in rows:
|
||||
d = dict(r)
|
||||
if d.get("decision_date") is not None:
|
||||
d["decision_date"] = d["decision_date"].isoformat()
|
||||
out.append(d)
|
||||
return out
|
||||
|
||||
|
||||
async def update_halacha(
|
||||
halacha_id: UUID,
|
||||
review_status: str | None = None,
|
||||
reviewer: str = "",
|
||||
rule_statement: str | None = None,
|
||||
reasoning_summary: str | None = None,
|
||||
subject_tags: list[str] | None = None,
|
||||
practice_areas: list[str] | None = None,
|
||||
) -> dict | None:
|
||||
"""Update a halacha — used by the chair to approve/reject/edit."""
|
||||
pool = await get_pool()
|
||||
set_parts: list[str] = []
|
||||
params: list = [halacha_id]
|
||||
idx = 2
|
||||
if review_status is not None:
|
||||
set_parts.append(f"review_status = ${idx}")
|
||||
params.append(review_status)
|
||||
idx += 1
|
||||
if review_status in ("approved", "rejected", "published"):
|
||||
set_parts.append(f"reviewed_at = now()")
|
||||
set_parts.append(f"reviewer = ${idx}")
|
||||
params.append(reviewer)
|
||||
idx += 1
|
||||
if rule_statement is not None:
|
||||
set_parts.append(f"rule_statement = ${idx}")
|
||||
params.append(rule_statement)
|
||||
idx += 1
|
||||
if reasoning_summary is not None:
|
||||
set_parts.append(f"reasoning_summary = ${idx}")
|
||||
params.append(reasoning_summary)
|
||||
idx += 1
|
||||
if subject_tags is not None:
|
||||
set_parts.append(f"subject_tags = ${idx}")
|
||||
params.append(subject_tags)
|
||||
idx += 1
|
||||
if practice_areas is not None:
|
||||
set_parts.append(f"practice_areas = ${idx}")
|
||||
params.append(practice_areas)
|
||||
idx += 1
|
||||
if not set_parts:
|
||||
return None
|
||||
set_parts.append("updated_at = now()")
|
||||
sql = f"UPDATE halachot SET {', '.join(set_parts)} WHERE id = $1 RETURNING *"
|
||||
row = await pool.fetchrow(sql, *params)
|
||||
return dict(row) if row else None
|
||||
|
||||
|
||||
async def search_precedent_library_semantic(
|
||||
query_embedding: list[float],
|
||||
practice_area: str = "",
|
||||
court: str = "",
|
||||
precedent_level: str = "",
|
||||
appeal_subtype: str = "",
|
||||
is_binding: bool | None = None,
|
||||
subject_tag: str = "",
|
||||
limit: int = 10,
|
||||
include_halachot: bool = True,
|
||||
) -> list[dict]:
|
||||
"""Semantic search over chair-uploaded precedents.
|
||||
|
||||
Returns merged halachot + chunks. Halachot are pre-distilled rules, so
|
||||
they get a small score boost. Only ``approved`` / ``published`` halachot
|
||||
are visible (per chair-review policy). Chunks are visible regardless
|
||||
of halacha review status.
|
||||
"""
|
||||
pool = await get_pool()
|
||||
halacha_filters = ["h.review_status IN ('approved', 'published')"]
|
||||
chunk_filters = ["cl.source_kind = 'external_upload'"]
|
||||
h_params: list = [query_embedding, limit]
|
||||
c_params: list = [query_embedding, limit]
|
||||
h_idx = 3
|
||||
c_idx = 3
|
||||
|
||||
if practice_area:
|
||||
halacha_filters.append(f"${h_idx} = ANY(h.practice_areas)")
|
||||
h_params.append(practice_area)
|
||||
h_idx += 1
|
||||
chunk_filters.append(f"cl.practice_area = ${c_idx}")
|
||||
c_params.append(practice_area)
|
||||
c_idx += 1
|
||||
if court:
|
||||
halacha_filters.append(f"cl.court ILIKE ${h_idx}")
|
||||
h_params.append(f"%{court}%")
|
||||
h_idx += 1
|
||||
chunk_filters.append(f"cl.court ILIKE ${c_idx}")
|
||||
c_params.append(f"%{court}%")
|
||||
c_idx += 1
|
||||
if precedent_level:
|
||||
halacha_filters.append(f"cl.precedent_level = ${h_idx}")
|
||||
h_params.append(precedent_level)
|
||||
h_idx += 1
|
||||
chunk_filters.append(f"cl.precedent_level = ${c_idx}")
|
||||
c_params.append(precedent_level)
|
||||
c_idx += 1
|
||||
if appeal_subtype:
|
||||
halacha_filters.append(f"cl.appeal_subtype = ${h_idx}")
|
||||
h_params.append(appeal_subtype)
|
||||
h_idx += 1
|
||||
chunk_filters.append(f"cl.appeal_subtype = ${c_idx}")
|
||||
c_params.append(appeal_subtype)
|
||||
c_idx += 1
|
||||
if is_binding is not None:
|
||||
halacha_filters.append(f"cl.is_binding = ${h_idx}")
|
||||
h_params.append(is_binding)
|
||||
h_idx += 1
|
||||
chunk_filters.append(f"cl.is_binding = ${c_idx}")
|
||||
c_params.append(is_binding)
|
||||
c_idx += 1
|
||||
if subject_tag:
|
||||
halacha_filters.append(f"${h_idx} = ANY(h.subject_tags)")
|
||||
h_params.append(subject_tag)
|
||||
h_idx += 1
|
||||
|
||||
halacha_sql = f"""
|
||||
SELECT h.id AS halacha_id, h.case_law_id, h.rule_statement,
|
||||
h.reasoning_summary, h.supporting_quote, h.page_reference,
|
||||
h.practice_areas, h.subject_tags, h.confidence, h.rule_type,
|
||||
cl.case_number, cl.case_name, cl.court, cl.date AS decision_date,
|
||||
cl.precedent_level,
|
||||
1 - (h.embedding <=> $1) AS score
|
||||
FROM halachot h
|
||||
JOIN case_law cl ON cl.id = h.case_law_id
|
||||
WHERE {' AND '.join(halacha_filters)}
|
||||
AND h.embedding IS NOT NULL
|
||||
ORDER BY h.embedding <=> $1
|
||||
LIMIT $2
|
||||
"""
|
||||
|
||||
chunk_sql = f"""
|
||||
SELECT pc.id AS chunk_id, pc.case_law_id, pc.content,
|
||||
pc.section_type, pc.page_number,
|
||||
cl.case_number, cl.case_name, cl.court, cl.date AS decision_date,
|
||||
cl.precedent_level, cl.practice_area,
|
||||
1 - (pc.embedding <=> $1) AS score
|
||||
FROM precedent_chunks pc
|
||||
JOIN case_law cl ON cl.id = pc.case_law_id
|
||||
WHERE {' AND '.join(chunk_filters)}
|
||||
AND pc.embedding IS NOT NULL
|
||||
ORDER BY pc.embedding <=> $1
|
||||
LIMIT $2
|
||||
"""
|
||||
|
||||
results: list[dict] = []
|
||||
if include_halachot:
|
||||
rows = await pool.fetch(halacha_sql, *h_params)
|
||||
for r in rows:
|
||||
d = dict(r)
|
||||
if d.get("decision_date") is not None:
|
||||
d["decision_date"] = d["decision_date"].isoformat()
|
||||
d["score"] = float(d["score"]) + 0.05 # rule-level boost
|
||||
d["type"] = "halacha"
|
||||
results.append(d)
|
||||
|
||||
rows = await pool.fetch(chunk_sql, *c_params)
|
||||
for r in rows:
|
||||
d = dict(r)
|
||||
if d.get("decision_date") is not None:
|
||||
d["decision_date"] = d["decision_date"].isoformat()
|
||||
d["score"] = float(d["score"])
|
||||
d["type"] = "passage"
|
||||
results.append(d)
|
||||
|
||||
results.sort(key=lambda x: x["score"], reverse=True)
|
||||
return results[:limit]
|
||||
|
||||
|
||||
async def precedent_library_stats() -> dict:
|
||||
"""Aggregate stats for the /precedents stats tab."""
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
total = await conn.fetchval(
|
||||
"SELECT COUNT(*) FROM case_law WHERE source_kind = 'external_upload'"
|
||||
)
|
||||
by_practice = await conn.fetch(
|
||||
"""SELECT practice_area, COUNT(*) AS n
|
||||
FROM case_law
|
||||
WHERE source_kind = 'external_upload'
|
||||
GROUP BY practice_area
|
||||
ORDER BY n DESC"""
|
||||
)
|
||||
by_level = await conn.fetch(
|
||||
"""SELECT precedent_level, COUNT(*) AS n
|
||||
FROM case_law
|
||||
WHERE source_kind = 'external_upload'
|
||||
GROUP BY precedent_level
|
||||
ORDER BY n DESC"""
|
||||
)
|
||||
halachot_total = await conn.fetchval(
|
||||
"SELECT COUNT(*) FROM halachot"
|
||||
)
|
||||
halachot_pending = await conn.fetchval(
|
||||
"SELECT COUNT(*) FROM halachot WHERE review_status = 'pending_review'"
|
||||
)
|
||||
halachot_approved = await conn.fetchval(
|
||||
"SELECT COUNT(*) FROM halachot WHERE review_status IN ('approved', 'published')"
|
||||
)
|
||||
return {
|
||||
"precedents_total": int(total or 0),
|
||||
"by_practice_area": [
|
||||
{"practice_area": r["practice_area"], "count": int(r["n"])}
|
||||
for r in by_practice
|
||||
],
|
||||
"by_precedent_level": [
|
||||
{"precedent_level": r["precedent_level"], "count": int(r["n"])}
|
||||
for r in by_level
|
||||
],
|
||||
"halachot_total": int(halachot_total or 0),
|
||||
"halachot_pending": int(halachot_pending or 0),
|
||||
"halachot_approved": int(halachot_approved or 0),
|
||||
}
|
||||
|
||||
326
mcp-server/src/legal_mcp/services/halacha_extractor.py
Normal file
326
mcp-server/src/legal_mcp/services/halacha_extractor.py
Normal file
@@ -0,0 +1,326 @@
|
||||
"""Extract binding legal rules (הלכות) from external court rulings.
|
||||
|
||||
Runs Claude (via the local headless ``claude -p`` bridge) over the
|
||||
legal_analysis / ruling / conclusion chunks of a precedent, returns a
|
||||
structured list of halachot, validates each one against the source text,
|
||||
embeds the rule statement, and stores everything as ``pending_review`` in
|
||||
the ``halachot`` table.
|
||||
|
||||
All extraction is idempotent — calling ``extract(case_law_id)`` twice
|
||||
deletes prior rows for that precedent first.
|
||||
|
||||
Trust model:
|
||||
Per chair decision, NO halacha is auto-published. Every extracted
|
||||
halacha enters with ``review_status='pending_review'``. The chair
|
||||
approves/rejects via the UI, and only ``approved`` (or ``published``)
|
||||
rows are visible to ``search_precedent_library`` and the writing
|
||||
agents.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import re
|
||||
from uuid import UUID
|
||||
|
||||
from legal_mcp import config
|
||||
from legal_mcp.config import parse_llm_json
|
||||
from legal_mcp.services import claude_session, db, embeddings, proofreader
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Concurrency model mirrors claims_extractor — each ``claude -p`` subprocess
|
||||
# holds ~300 MB RSS, so we cap parallel chunks to keep the box healthy.
|
||||
CHUNK_CONCURRENCY = 3
|
||||
CHUNK_RETRY_ATTEMPTS = 1
|
||||
|
||||
# Sections from which to extract. facts/intro/appellant_claims/respondent_claims
|
||||
# never contain holdings, only positions, so we skip them.
|
||||
EXTRACTABLE_SECTIONS = ("legal_analysis", "ruling", "conclusion")
|
||||
|
||||
|
||||
HALACHA_EXTRACTION_PROMPT = """אתה משפטן בכיר המתמחה בדיני תכנון ובניה (ועדות ערר, היטל השבחה, פיצויים לפי סעיף 197 לחוק התכנון והבניה). תפקידך: לחלץ הלכות מחייבות מתוך פסק דין/החלטה משפטית.
|
||||
|
||||
## הגדרות מחייבות
|
||||
|
||||
הלכה (binding rule) = כלל משפטי שהפסק קובע או מאמץ ומיישם, באופן שניתן להסתמך עליו בהחלטות עתידיות.
|
||||
|
||||
לא-הלכה (אין לחלץ):
|
||||
- אמרת אגב (obiter dicta) — הערות שאינן הכרחיות להכרעה.
|
||||
- ממצאים עובדתיים ספציפיים לתיק ("העורר לא הוכיח X").
|
||||
- ציטוטי הלכות מפסקי דין אחרים שלא אומצו במפורש בפסק זה.
|
||||
- הצהרות על דין קיים שאינן מיושמות בהכרעה.
|
||||
|
||||
הבחנה קריטית: כאשר הפסק מצטט הלכה מפסק קודם, חלץ אותה רק אם בית המשפט בפסק הנוכחי **מאמץ ומחיל** אותה (לא רק מזכיר אותה ברקע).
|
||||
|
||||
## תחומים אפשריים (practice_areas) — תחומי ועדת הערר בלבד
|
||||
- rishuy_uvniya — רישוי ובניה (תיקי 1xxx: היתרים, שימוש חורג, תכניות, קווי בניין, גובה, חניה)
|
||||
- betterment_levy — היטל השבחה (תיקי 8xxx: שומה, מערכות, תכניות המקנות בה, מועד קובע, סופיות ההחלטה)
|
||||
- compensation_197 — פיצויים לפי ס' 197 (תיקי 9xxx: פגיעה במקרקעין, ירידת ערך, ס' 200/פטור)
|
||||
|
||||
הלכה אחת יכולה לחול על כמה תחומים — practice_areas הוא array ולא string יחיד.
|
||||
|
||||
## סוגי הלכה (rule_type)
|
||||
- binding — הלכה מחייבת שהוחלה על התיק.
|
||||
- interpretive — פרשנות סעיף חוק/תכנית שאומצה.
|
||||
- procedural — כלל פרוצדורלי (סמכות, מועדים, הליכי שמיעה).
|
||||
- obiter — אמרת אגב חשובה (חלץ רק אם משמעותית; סמן confidence נמוך).
|
||||
|
||||
## פלט נדרש
|
||||
החזר JSON array בלבד, ללא markdown, ללא הסברים. דוגמה:
|
||||
[
|
||||
{
|
||||
"rule_statement": "ניסוח הכלל בלשון משפטית מדויקת בגוף שלישי, 1-3 משפטים.",
|
||||
"rule_type": "binding",
|
||||
"reasoning_summary": "תמצית ההיגיון: למה בית המשפט הגיע לכלל הזה (1-2 משפטים).",
|
||||
"supporting_quote": "ציטוט מילולי מדויק מהפסק התומך בכלל. חייב להופיע מילה במילה בטקסט הקלט.",
|
||||
"page_reference": "פס' 12 / עמ' 8 — ככל שניתן לזהות מהקלט.",
|
||||
"practice_areas": ["betterment_levy"],
|
||||
"subject_tags": ["מועד_קביעת_שומה", "סופיות_ההחלטה"],
|
||||
"cites": ["עע\\"מ 3975/22"],
|
||||
"confidence": 0.85
|
||||
}
|
||||
]
|
||||
|
||||
## כללי איכות
|
||||
1. **נאמנות מוחלטת לציטוט** — supporting_quote חייב להיות הדבקה מדויקת מהקלט. אם אין ציטוט מתאים — אל תמציא הלכה.
|
||||
2. **מספר הלכות** — פסק רגיל מכיל 1-4 הלכות מחייבות. אל תמתח את הרשימה. אם אין הלכה — החזר [].
|
||||
3. **לא לפצל יתר על המידה** — אם שני סעיפים מבטאים את אותו עיקרון, אחד את הניסוח.
|
||||
4. **שפה** — rule_statement בעברית משפטית מקצועית, לא צמצום מילולי של הציטוט.
|
||||
5. **subject_tags** — 2-5 תגיות בעברית, snake_case (חניה, קווי_בניין, שיקול_דעת, פגם_פרוצדורלי, סמכות, מועדים, פגיעה_במקרקעין, ירידת_ערך).
|
||||
6. **confidence** — 0..1. מתחת ל-0.7 = ספק לגבי היות זה הלכה מחייבת.
|
||||
"""
|
||||
|
||||
|
||||
_VALID_PRACTICE_AREAS = {"rishuy_uvniya", "betterment_levy", "compensation_197"}
|
||||
_VALID_RULE_TYPES = {"binding", "interpretive", "procedural", "obiter"}
|
||||
|
||||
|
||||
def _normalize_for_comparison(text: str) -> str:
|
||||
"""Normalize Hebrew text for substring matching.
|
||||
|
||||
Collapses whitespace and unifies the half-dozen Hebrew quote-mark
|
||||
variants. Use ``proofreader._fix_hebrew_quotes`` for the quote part
|
||||
so we stay consistent with the proofreader pipeline.
|
||||
"""
|
||||
fixed = proofreader._fix_hebrew_quotes(text)
|
||||
# Collapse all whitespace (newlines, tabs, multiple spaces) to a single space.
|
||||
return re.sub(r"\s+", " ", fixed).strip()
|
||||
|
||||
|
||||
def _verify_quote(supporting_quote: str, full_text: str) -> bool:
|
||||
"""Return True if ``supporting_quote`` appears verbatim in ``full_text``
|
||||
after Hebrew quote/whitespace normalization.
|
||||
|
||||
The LLM occasionally trims a leading/trailing word from the quote;
|
||||
we accept the quote if at least 90% of its characters match a
|
||||
contiguous substring of the source.
|
||||
"""
|
||||
if not supporting_quote.strip():
|
||||
return False
|
||||
normalized_quote = _normalize_for_comparison(supporting_quote)
|
||||
normalized_text = _normalize_for_comparison(full_text)
|
||||
if not normalized_quote:
|
||||
return False
|
||||
if normalized_quote in normalized_text:
|
||||
return True
|
||||
# Fallback: try the inner 90% of the quote (drops boundary trim).
|
||||
if len(normalized_quote) >= 30:
|
||||
trim = max(2, len(normalized_quote) // 20)
|
||||
inner = normalized_quote[trim:-trim]
|
||||
if inner and inner in normalized_text:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _coerce_halacha(raw: dict) -> dict | None:
|
||||
"""Validate and normalize one LLM-returned halacha dict.
|
||||
|
||||
Returns ``None`` if the entry is missing required fields.
|
||||
"""
|
||||
if not isinstance(raw, dict):
|
||||
return None
|
||||
rule_statement = (raw.get("rule_statement") or "").strip()
|
||||
supporting_quote = (raw.get("supporting_quote") or "").strip()
|
||||
if not rule_statement or not supporting_quote:
|
||||
return None
|
||||
|
||||
rule_type = (raw.get("rule_type") or "binding").strip().lower()
|
||||
if rule_type not in _VALID_RULE_TYPES:
|
||||
rule_type = "binding"
|
||||
|
||||
practice_areas_raw = raw.get("practice_areas") or []
|
||||
if isinstance(practice_areas_raw, str):
|
||||
practice_areas_raw = [practice_areas_raw]
|
||||
practice_areas = [p for p in practice_areas_raw if p in _VALID_PRACTICE_AREAS]
|
||||
|
||||
subject_tags_raw = raw.get("subject_tags") or []
|
||||
if isinstance(subject_tags_raw, str):
|
||||
subject_tags_raw = [subject_tags_raw]
|
||||
subject_tags = [str(t).strip() for t in subject_tags_raw if str(t).strip()]
|
||||
|
||||
cites_raw = raw.get("cites") or []
|
||||
if isinstance(cites_raw, str):
|
||||
cites_raw = [cites_raw]
|
||||
cites = [str(c).strip() for c in cites_raw if str(c).strip()]
|
||||
|
||||
try:
|
||||
confidence = float(raw.get("confidence", 0.0))
|
||||
except (TypeError, ValueError):
|
||||
confidence = 0.0
|
||||
confidence = max(0.0, min(1.0, confidence))
|
||||
|
||||
return {
|
||||
"rule_statement": rule_statement,
|
||||
"rule_type": rule_type,
|
||||
"reasoning_summary": (raw.get("reasoning_summary") or "").strip(),
|
||||
"supporting_quote": supporting_quote,
|
||||
"page_reference": (raw.get("page_reference") or "").strip(),
|
||||
"practice_areas": practice_areas,
|
||||
"subject_tags": subject_tags,
|
||||
"cites": cites,
|
||||
"confidence": confidence,
|
||||
}
|
||||
|
||||
|
||||
async def _extract_chunk(
|
||||
chunk_text: str,
|
||||
section_type: str,
|
||||
chunk_index: int,
|
||||
chunk_total: int,
|
||||
context: str,
|
||||
) -> list[dict]:
|
||||
"""Run the halacha extractor on one chunk with retry."""
|
||||
chunk_label = f" (חלק {chunk_index + 1}/{chunk_total})" if chunk_total > 1 else ""
|
||||
prompt = (
|
||||
f"{HALACHA_EXTRACTION_PROMPT}\n\n"
|
||||
f"## הקלט\n"
|
||||
f"סוג קטע: {section_type}\n"
|
||||
f"{context}{chunk_label}\n\n"
|
||||
f"--- תחילת הטקסט ---\n{chunk_text}\n--- סוף הטקסט ---"
|
||||
)
|
||||
last_err: Exception | None = None
|
||||
for attempt in range(CHUNK_RETRY_ATTEMPTS + 1):
|
||||
try:
|
||||
result = await claude_session.query_json(prompt)
|
||||
except Exception as e:
|
||||
last_err = e
|
||||
logger.warning(
|
||||
"halacha_extractor chunk %d/%d attempt %d raised: %s",
|
||||
chunk_index + 1, chunk_total, attempt + 1, e,
|
||||
)
|
||||
continue
|
||||
if isinstance(result, list):
|
||||
return result
|
||||
logger.warning(
|
||||
"halacha_extractor chunk %d/%d attempt %d returned non-list (%s)",
|
||||
chunk_index + 1, chunk_total, attempt + 1, type(result).__name__,
|
||||
)
|
||||
logger.error(
|
||||
"halacha_extractor chunk %d/%d failed after %d attempts: %s",
|
||||
chunk_index + 1, chunk_total, CHUNK_RETRY_ATTEMPTS + 1, last_err,
|
||||
)
|
||||
return []
|
||||
|
||||
|
||||
async def extract(case_law_id: UUID | str) -> dict:
|
||||
"""Extract halachot from an uploaded precedent and store them.
|
||||
|
||||
Idempotent: replaces any existing halachot for this case_law_id.
|
||||
All inserted rows start as ``review_status='pending_review'``.
|
||||
|
||||
Returns:
|
||||
``{"status": "...", "extracted": N, "verified": M, "stored": K, ...}``
|
||||
"""
|
||||
if isinstance(case_law_id, str):
|
||||
case_law_id = UUID(case_law_id)
|
||||
|
||||
record = await db.get_case_law(case_law_id)
|
||||
if not record:
|
||||
return {"status": "not_found", "extracted": 0, "stored": 0}
|
||||
|
||||
chunks = await db.list_precedent_chunks(
|
||||
case_law_id, section_types=EXTRACTABLE_SECTIONS,
|
||||
)
|
||||
if not chunks:
|
||||
await db.set_case_law_halacha_status(case_law_id, "completed")
|
||||
return {"status": "no_chunks", "extracted": 0, "stored": 0}
|
||||
|
||||
await db.set_case_law_halacha_status(case_law_id, "processing")
|
||||
await db.delete_halachot(case_law_id)
|
||||
|
||||
citation = record.get("case_number", "")
|
||||
court = record.get("court", "")
|
||||
date_str = str(record.get("date") or "")
|
||||
context = f"מקור: {citation} — {court}, {date_str}"
|
||||
|
||||
sem = asyncio.Semaphore(CHUNK_CONCURRENCY)
|
||||
|
||||
async def _bounded(idx: int, chunk_row: dict) -> list[dict]:
|
||||
async with sem:
|
||||
return await _extract_chunk(
|
||||
chunk_row["content"], chunk_row["section_type"],
|
||||
idx, len(chunks), context,
|
||||
)
|
||||
|
||||
chunk_results = await asyncio.gather(
|
||||
*[_bounded(i, c) for i, c in enumerate(chunks)]
|
||||
)
|
||||
raw_halachot: list[dict] = []
|
||||
for items in chunk_results:
|
||||
raw_halachot.extend(items)
|
||||
|
||||
if not raw_halachot:
|
||||
await db.set_case_law_halacha_status(case_law_id, "completed")
|
||||
return {"status": "no_halachot", "extracted": 0, "stored": 0}
|
||||
|
||||
# Validate against the full text of the precedent for the quote check.
|
||||
full_text = record.get("full_text") or ""
|
||||
|
||||
cleaned: list[dict] = []
|
||||
for raw in raw_halachot:
|
||||
coerced = _coerce_halacha(raw)
|
||||
if coerced is None:
|
||||
continue
|
||||
coerced["quote_verified"] = _verify_quote(
|
||||
coerced["supporting_quote"], full_text,
|
||||
)
|
||||
cleaned.append(coerced)
|
||||
|
||||
if not cleaned:
|
||||
await db.set_case_law_halacha_status(case_law_id, "completed")
|
||||
return {"status": "no_valid_halachot", "extracted": len(raw_halachot), "stored": 0}
|
||||
|
||||
# Embed rule_statement + reasoning_summary so semantic search hits the
|
||||
# rule directly rather than the surrounding chunk centroid.
|
||||
embed_inputs = [
|
||||
f"{h['rule_statement']} — {h['reasoning_summary']}".strip(" —")
|
||||
for h in cleaned
|
||||
]
|
||||
try:
|
||||
vectors = await embeddings.embed_texts(embed_inputs, input_type="document")
|
||||
except Exception as e:
|
||||
logger.error("halacha_extractor: embeddings failed: %s", e)
|
||||
vectors = [None] * len(cleaned)
|
||||
|
||||
for halacha, vec in zip(cleaned, vectors):
|
||||
halacha["embedding"] = vec
|
||||
|
||||
stored = await db.store_halachot(case_law_id, cleaned)
|
||||
|
||||
verified = sum(1 for h in cleaned if h["quote_verified"])
|
||||
await db.set_case_law_halacha_status(case_law_id, "completed")
|
||||
|
||||
logger.info(
|
||||
"halacha_extractor: case_law=%s extracted=%d cleaned=%d verified=%d stored=%d",
|
||||
case_law_id, len(raw_halachot), len(cleaned), verified, stored,
|
||||
)
|
||||
return {
|
||||
"status": "completed",
|
||||
"extracted": len(raw_halachot),
|
||||
"valid": len(cleaned),
|
||||
"verified": verified,
|
||||
"stored": stored,
|
||||
}
|
||||
309
mcp-server/src/legal_mcp/services/precedent_library.py
Normal file
309
mcp-server/src/legal_mcp/services/precedent_library.py
Normal file
@@ -0,0 +1,309 @@
|
||||
"""Orchestrator for the External Precedent Library.
|
||||
|
||||
Ingest pipeline (one upload):
|
||||
file → extract_text → proofread → INSERT case_law (source_kind='external_upload')
|
||||
→ chunk → embed → store precedent_chunks
|
||||
→ halacha_extractor.extract → embed halachot → store halachot
|
||||
→ set extraction_status='completed'
|
||||
|
||||
Progress is reported via a caller-supplied async callback so the
|
||||
web layer can pipe updates into the existing Redis ProgressStore /
|
||||
SSE plumbing without this module knowing about Redis.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
import shutil
|
||||
from datetime import date
|
||||
from pathlib import Path
|
||||
from typing import Awaitable, Callable
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
from legal_mcp import config
|
||||
from legal_mcp.services import (
|
||||
chunker,
|
||||
db,
|
||||
embeddings,
|
||||
extractor,
|
||||
halacha_extractor,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
ProgressCb = Callable[[str, int, str], Awaitable[None]]
|
||||
|
||||
|
||||
PRECEDENT_LIBRARY_DIR = Path(config.DATA_DIR) / "precedent-library"
|
||||
|
||||
|
||||
_VALID_PRACTICE_AREAS = {"", "rishuy_uvniya", "betterment_levy", "compensation_197"}
|
||||
_VALID_SOURCE_TYPES = {"", "court_ruling", "appeals_committee"}
|
||||
_VALID_PRECEDENT_LEVELS = {
|
||||
"", "עליון", "מנהלי", "ועדת_ערר_ארצית", "ועדת_ערר_מחוזית",
|
||||
"supreme", "administrative", "national_appeals_committee", "district_appeals_committee",
|
||||
}
|
||||
|
||||
|
||||
async def _noop_progress(_status: str, _percent: int, _msg: str) -> None:
|
||||
return None
|
||||
|
||||
|
||||
def _safe_filename(name: str) -> str:
|
||||
"""Strip path separators and unsafe chars from a user-provided name."""
|
||||
base = Path(name).name
|
||||
return re.sub(r"[^\w.\-+א-ת ]", "_", base) or f"upload-{uuid4().hex[:8]}"
|
||||
|
||||
|
||||
def _stage_file(src_path: Path, source_type: str) -> Path:
|
||||
"""Copy the uploaded file into data/precedent-library/<source_type>/.
|
||||
|
||||
Returns the destination path. Source file is not deleted (caller decides).
|
||||
"""
|
||||
sub = source_type if source_type in {"court_ruling", "appeals_committee"} else "other"
|
||||
dest_dir = PRECEDENT_LIBRARY_DIR / sub
|
||||
dest_dir.mkdir(parents=True, exist_ok=True)
|
||||
safe_name = _safe_filename(src_path.name)
|
||||
dest = dest_dir / f"{uuid4().hex[:8]}_{safe_name}"
|
||||
shutil.copy2(src_path, dest)
|
||||
return dest
|
||||
|
||||
|
||||
def _coerce_date(value) -> date | None:
|
||||
if value is None or value == "":
|
||||
return None
|
||||
if isinstance(value, date):
|
||||
return value
|
||||
if isinstance(value, str):
|
||||
try:
|
||||
return date.fromisoformat(value[:10])
|
||||
except ValueError:
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
async def ingest_precedent(
|
||||
*,
|
||||
file_path: str | Path,
|
||||
citation: str,
|
||||
case_name: str = "",
|
||||
court: str = "",
|
||||
decision_date=None,
|
||||
source_type: str = "",
|
||||
precedent_level: str = "",
|
||||
practice_area: str = "",
|
||||
appeal_subtype: str = "",
|
||||
subject_tags: list[str] | None = None,
|
||||
is_binding: bool = True,
|
||||
headnote: str = "",
|
||||
summary: str = "",
|
||||
document_id: UUID | None = None,
|
||||
progress: ProgressCb | None = None,
|
||||
) -> dict:
|
||||
"""Ingest a single uploaded precedent through the full pipeline.
|
||||
|
||||
Required: file_path + citation. Everything else has a sensible default.
|
||||
|
||||
Returns:
|
||||
``{"status": "...", "case_law_id": "...", "chunks": N, "halachot": M}``
|
||||
"""
|
||||
progress = progress or _noop_progress
|
||||
src = Path(file_path)
|
||||
if not src.is_file():
|
||||
raise FileNotFoundError(f"file not found: {src}")
|
||||
if not citation.strip():
|
||||
raise ValueError("citation is required")
|
||||
if practice_area not in _VALID_PRACTICE_AREAS:
|
||||
raise ValueError(f"invalid practice_area: {practice_area!r}")
|
||||
if source_type not in _VALID_SOURCE_TYPES:
|
||||
raise ValueError(f"invalid source_type: {source_type!r}")
|
||||
|
||||
await progress("staging", 5, "מעתיק את הקובץ לאחסון")
|
||||
|
||||
staged = _stage_file(src, source_type)
|
||||
|
||||
await progress("extracting", 15, "מחלץ טקסט מהקובץ")
|
||||
try:
|
||||
text, page_count = await extractor.extract_text(str(staged))
|
||||
except Exception as e:
|
||||
await progress("failed", 100, f"כשל בחילוץ טקסט: {e}")
|
||||
raise
|
||||
|
||||
text = (text or "").strip()
|
||||
if not text:
|
||||
await progress("failed", 100, "לא נמצא טקסט בקובץ")
|
||||
raise ValueError("no extractable text in file")
|
||||
|
||||
# Strip any Nevo preamble that might wrap court rulings downloaded from Nevo.
|
||||
text = extractor.strip_nevo_preamble(text)
|
||||
|
||||
await progress("storing_metadata", 25, "שומר את הפסיקה במסד הנתונים")
|
||||
record = await db.create_external_case_law(
|
||||
case_number=citation.strip(),
|
||||
case_name=case_name.strip() or citation.strip(),
|
||||
full_text=text,
|
||||
court=court.strip(),
|
||||
decision_date=_coerce_date(decision_date),
|
||||
practice_area=practice_area,
|
||||
appeal_subtype=appeal_subtype.strip(),
|
||||
subject_tags=list(subject_tags or []),
|
||||
summary=summary.strip(),
|
||||
headnote=headnote.strip(),
|
||||
source_type=source_type,
|
||||
precedent_level=precedent_level,
|
||||
is_binding=is_binding,
|
||||
document_id=document_id,
|
||||
)
|
||||
case_law_id = UUID(str(record["id"]))
|
||||
|
||||
try:
|
||||
await progress("chunking", 40, f"מחלק את הטקסט ל-chunks ({page_count} עמ')")
|
||||
chunks = chunker.chunk_document(text)
|
||||
if not chunks:
|
||||
await db.set_case_law_extraction_status(case_law_id, "completed")
|
||||
await db.set_case_law_halacha_status(case_law_id, "completed")
|
||||
await progress("completed", 100, "אין טקסט לעיבוד")
|
||||
return {
|
||||
"status": "completed",
|
||||
"case_law_id": str(case_law_id),
|
||||
"chunks": 0,
|
||||
"halachot": 0,
|
||||
}
|
||||
|
||||
await progress("embedding", 55, f"מייצר embeddings ל-{len(chunks)} chunks")
|
||||
chunk_texts = [c.content for c in chunks]
|
||||
chunk_vectors = await embeddings.embed_texts(chunk_texts, input_type="document")
|
||||
|
||||
chunk_dicts = [
|
||||
{
|
||||
"chunk_index": c.chunk_index,
|
||||
"content": c.content,
|
||||
"section_type": c.section_type,
|
||||
"page_number": c.page_number,
|
||||
"embedding": v,
|
||||
}
|
||||
for c, v in zip(chunks, chunk_vectors)
|
||||
]
|
||||
stored_chunks = await db.store_precedent_chunks(case_law_id, chunk_dicts)
|
||||
|
||||
await progress("extracting_halachot", 75, "מחלץ הלכות מחייבות")
|
||||
await db.set_case_law_extraction_status(case_law_id, "completed")
|
||||
halacha_result = await halacha_extractor.extract(case_law_id)
|
||||
|
||||
await progress(
|
||||
"completed",
|
||||
100,
|
||||
f"הוכנס לספרייה: {stored_chunks} chunks, "
|
||||
f"{halacha_result.get('stored', 0)} הלכות ממתינות לאישור",
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "completed",
|
||||
"case_law_id": str(case_law_id),
|
||||
"chunks": stored_chunks,
|
||||
"halachot": halacha_result.get("stored", 0),
|
||||
"halachot_extracted_raw": halacha_result.get("extracted", 0),
|
||||
"halachot_verified": halacha_result.get("verified", 0),
|
||||
"pages": page_count,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.exception("precedent_library.ingest_precedent failed: %s", e)
|
||||
await db.set_case_law_extraction_status(case_law_id, "failed")
|
||||
await progress("failed", 100, f"כשל בעיבוד: {e}")
|
||||
raise
|
||||
|
||||
|
||||
async def reextract_halachot(
|
||||
case_law_id: UUID | str,
|
||||
progress: ProgressCb | None = None,
|
||||
) -> dict:
|
||||
"""Re-run the halacha extractor on an existing precedent. Idempotent."""
|
||||
progress = progress or _noop_progress
|
||||
if isinstance(case_law_id, str):
|
||||
case_law_id = UUID(case_law_id)
|
||||
|
||||
record = await db.get_case_law(case_law_id)
|
||||
if not record or record.get("source_kind") != "external_upload":
|
||||
raise ValueError("precedent not found or not chair-uploaded")
|
||||
|
||||
await progress("extracting_halachot", 50, "מחלץ הלכות מחדש")
|
||||
result = await halacha_extractor.extract(case_law_id)
|
||||
await progress(
|
||||
"completed",
|
||||
100,
|
||||
f"הופקו {result.get('stored', 0)} הלכות (ממתינות לאישור)",
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
async def delete_precedent(case_law_id: UUID | str) -> bool:
|
||||
"""Delete a precedent and cascade chunks + halachot."""
|
||||
if isinstance(case_law_id, str):
|
||||
case_law_id = UUID(case_law_id)
|
||||
return await db.delete_case_law(case_law_id)
|
||||
|
||||
|
||||
async def get_precedent(case_law_id: UUID | str) -> dict | None:
|
||||
"""Get a precedent with its halachot attached."""
|
||||
if isinstance(case_law_id, str):
|
||||
case_law_id = UUID(case_law_id)
|
||||
record = await db.get_case_law(case_law_id)
|
||||
if not record:
|
||||
return None
|
||||
record["halachot"] = await db.list_halachot(case_law_id=case_law_id, limit=500)
|
||||
return record
|
||||
|
||||
|
||||
async def list_precedents(
|
||||
practice_area: str = "",
|
||||
court: str = "",
|
||||
precedent_level: str = "",
|
||||
source_type: str = "",
|
||||
search: str = "",
|
||||
limit: int = 100,
|
||||
offset: int = 0,
|
||||
) -> list[dict]:
|
||||
return await db.list_external_case_law(
|
||||
practice_area=practice_area,
|
||||
court=court,
|
||||
precedent_level=precedent_level,
|
||||
source_type=source_type,
|
||||
search=search,
|
||||
limit=limit,
|
||||
offset=offset,
|
||||
)
|
||||
|
||||
|
||||
async def search_library(
|
||||
query: str,
|
||||
practice_area: str = "",
|
||||
court: str = "",
|
||||
precedent_level: str = "",
|
||||
appeal_subtype: str = "",
|
||||
is_binding: bool | None = None,
|
||||
subject_tag: str = "",
|
||||
limit: int = 10,
|
||||
include_halachot: bool = True,
|
||||
) -> list[dict]:
|
||||
"""Semantic search merging halachot (rule-level) and chunks (passage-level).
|
||||
|
||||
Only ``approved`` / ``published`` halachot are returned, per chair-review
|
||||
policy. Chunks are returned regardless of halacha review status.
|
||||
"""
|
||||
if not query.strip():
|
||||
return []
|
||||
query_vec = await embeddings.embed_query(query)
|
||||
return await db.search_precedent_library_semantic(
|
||||
query_embedding=query_vec,
|
||||
practice_area=practice_area,
|
||||
court=court,
|
||||
precedent_level=precedent_level,
|
||||
appeal_subtype=appeal_subtype,
|
||||
is_binding=is_binding,
|
||||
subject_tag=subject_tag,
|
||||
limit=limit,
|
||||
include_halachot=include_halachot,
|
||||
)
|
||||
234
mcp-server/src/legal_mcp/tools/precedent_library.py
Normal file
234
mcp-server/src/legal_mcp/tools/precedent_library.py
Normal file
@@ -0,0 +1,234 @@
|
||||
"""MCP tools for the External Precedent Library.
|
||||
|
||||
This is distinct from:
|
||||
|
||||
- ``precedents`` (case_precedents table) — chair-attached quotes scoped to
|
||||
a specific case section. Use ``precedent_search_library`` for that.
|
||||
- ``style_corpus`` (Daphna's prior decisions) — searched via
|
||||
``search_decisions`` for style/voice.
|
||||
|
||||
The precedent library is the **authoritative law** corpus: external court
|
||||
rulings and other appeals committees' decisions, with halachot extracted
|
||||
and reviewed by the chair.
|
||||
|
||||
All halachot enter as ``pending_review`` and are invisible to search until
|
||||
the chair approves them — per project review policy.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from uuid import UUID
|
||||
|
||||
from legal_mcp.services import db, precedent_library
|
||||
|
||||
|
||||
def _ok(payload) -> str:
|
||||
return json.dumps(payload, ensure_ascii=False, indent=2, default=str)
|
||||
|
||||
|
||||
def _err(msg: str) -> str:
|
||||
return json.dumps({"error": msg}, ensure_ascii=False)
|
||||
|
||||
|
||||
async def precedent_library_upload(
|
||||
file_path: str,
|
||||
citation: str,
|
||||
case_name: str = "",
|
||||
court: str = "",
|
||||
decision_date: str = "",
|
||||
source_type: str = "",
|
||||
precedent_level: str = "",
|
||||
practice_area: str = "",
|
||||
appeal_subtype: str = "",
|
||||
subject_tags: list[str] | None = None,
|
||||
is_binding: bool = True,
|
||||
headnote: str = "",
|
||||
summary: str = "",
|
||||
) -> str:
|
||||
"""העלאת פסיקה חיצונית לקורפוס הסמכותי + חילוץ הלכות אוטומטי.
|
||||
|
||||
Args:
|
||||
file_path: נתיב מלא לקובץ PDF/DOCX/RTF/TXT/MD.
|
||||
citation: מראה המקום ("עע\\"מ 3975/22 ב. קרן-נכסים נ' ועדה מקומית").
|
||||
case_name: שם קצר.
|
||||
court: ערכאה (עליון / מנהלי / ועדת ערר ארצית / ועדת ערר מחוזית).
|
||||
decision_date: ISO date (YYYY-MM-DD), אופציונלי.
|
||||
source_type: court_ruling / appeals_committee.
|
||||
precedent_level: עליון / מנהלי / ועדת_ערר_ארצית / ועדת_ערר_מחוזית.
|
||||
practice_area: rishuy_uvniya / betterment_levy / compensation_197.
|
||||
subject_tags: תגיות נושא (חניה, קווי_בניין, וכד').
|
||||
|
||||
Returns: JSON עם case_law_id, מספר chunks, מספר הלכות שנכנסו לתור אישור.
|
||||
"""
|
||||
if not citation.strip():
|
||||
return _err("citation חובה")
|
||||
try:
|
||||
result = await precedent_library.ingest_precedent(
|
||||
file_path=file_path,
|
||||
citation=citation,
|
||||
case_name=case_name,
|
||||
court=court,
|
||||
decision_date=decision_date or None,
|
||||
source_type=source_type,
|
||||
precedent_level=precedent_level,
|
||||
practice_area=practice_area,
|
||||
appeal_subtype=appeal_subtype,
|
||||
subject_tags=subject_tags or [],
|
||||
is_binding=is_binding,
|
||||
headnote=headnote,
|
||||
summary=summary,
|
||||
)
|
||||
except Exception as e:
|
||||
return _err(str(e))
|
||||
return _ok(result)
|
||||
|
||||
|
||||
async def precedent_library_list(
|
||||
practice_area: str = "",
|
||||
court: str = "",
|
||||
precedent_level: str = "",
|
||||
source_type: str = "",
|
||||
search: str = "",
|
||||
limit: int = 100,
|
||||
) -> str:
|
||||
"""רשימה של פסיקה בקורפוס הסמכותי, עם פילטרים."""
|
||||
rows = await precedent_library.list_precedents(
|
||||
practice_area=practice_area,
|
||||
court=court,
|
||||
precedent_level=precedent_level,
|
||||
source_type=source_type,
|
||||
search=search,
|
||||
limit=limit,
|
||||
)
|
||||
return _ok(rows)
|
||||
|
||||
|
||||
async def precedent_library_get(case_law_id: str) -> str:
|
||||
"""פסיקה ספציפית עם כל ההלכות שלה (כולל ממתינות לאישור)."""
|
||||
try:
|
||||
cid = UUID(case_law_id)
|
||||
except ValueError:
|
||||
return _err("case_law_id לא תקין")
|
||||
record = await precedent_library.get_precedent(cid)
|
||||
if not record:
|
||||
return _err("פסיקה לא נמצאה")
|
||||
return _ok(record)
|
||||
|
||||
|
||||
async def precedent_library_delete(case_law_id: str) -> str:
|
||||
"""מחיקת פסיקה מהקורפוס. cascade: chunks + halachot."""
|
||||
try:
|
||||
cid = UUID(case_law_id)
|
||||
except ValueError:
|
||||
return _err("case_law_id לא תקין")
|
||||
ok = await precedent_library.delete_precedent(cid)
|
||||
return _ok({"deleted": ok, "case_law_id": case_law_id})
|
||||
|
||||
|
||||
async def precedent_extract_halachot(case_law_id: str) -> str:
|
||||
"""הרצה מחדש של חילוץ ההלכות לפסיקה קיימת. הלכות קודמות נמחקות."""
|
||||
try:
|
||||
cid = UUID(case_law_id)
|
||||
except ValueError:
|
||||
return _err("case_law_id לא תקין")
|
||||
try:
|
||||
result = await precedent_library.reextract_halachot(cid)
|
||||
except Exception as e:
|
||||
return _err(str(e))
|
||||
return _ok(result)
|
||||
|
||||
|
||||
async def search_precedent_library(
|
||||
query: str,
|
||||
practice_area: str = "",
|
||||
court: str = "",
|
||||
precedent_level: str = "",
|
||||
appeal_subtype: str = "",
|
||||
is_binding: bool | None = None,
|
||||
subject_tag: str = "",
|
||||
limit: int = 10,
|
||||
include_halachot: bool = True,
|
||||
) -> str:
|
||||
"""חיפוש סמנטי בקורפוס הפסיקה הסמכותית.
|
||||
|
||||
מחזיר תוצאות מעורבות: הלכות (rule-level, מאושרות בלבד) + קטעי טקסט
|
||||
(passage-level). הלכות מקבלות boost קל בדירוג כי הן מזוקקות מראש.
|
||||
|
||||
Args:
|
||||
query: שאילתת חיפוש בעברית.
|
||||
practice_area: rishuy_uvniya / betterment_levy / compensation_197.
|
||||
court: סינון לפי ערכאה (substring).
|
||||
precedent_level: עליון / מנהלי / ועדת_ערר_ארצית / ועדת_ערר_מחוזית.
|
||||
appeal_subtype: סינון לתת-סוג.
|
||||
is_binding: True/False (None = ללא סינון).
|
||||
subject_tag: סינון לפי תגית נושא (לדוגמה "מועד_קביעת_שומה").
|
||||
limit: מספר תוצאות מקסימלי.
|
||||
include_halachot: האם לכלול הלכות (ברירת מחדל: כן).
|
||||
|
||||
Returns: רשימה מדורגת. כל פריט הוא {"type": "halacha"|"passage", "score", ...}.
|
||||
"""
|
||||
if not query or len(query.strip()) < 2:
|
||||
return json.dumps([], ensure_ascii=False)
|
||||
results = await precedent_library.search_library(
|
||||
query=query.strip(),
|
||||
practice_area=practice_area,
|
||||
court=court,
|
||||
precedent_level=precedent_level,
|
||||
appeal_subtype=appeal_subtype,
|
||||
is_binding=is_binding,
|
||||
subject_tag=subject_tag,
|
||||
limit=limit,
|
||||
include_halachot=include_halachot,
|
||||
)
|
||||
return _ok(results)
|
||||
|
||||
|
||||
async def halacha_review(
|
||||
halacha_id: str,
|
||||
status: str,
|
||||
reviewer: str = "דפנה",
|
||||
rule_statement: str = "",
|
||||
reasoning_summary: str = "",
|
||||
subject_tags: list[str] | None = None,
|
||||
practice_areas: list[str] | None = None,
|
||||
) -> str:
|
||||
"""אישור / דחייה / עריכה של הלכה שחולצה אוטומטית.
|
||||
|
||||
Args:
|
||||
halacha_id: מזהה ההלכה.
|
||||
status: pending_review / approved / rejected / published.
|
||||
reviewer: שם המאשר (ברירת מחדל: דפנה).
|
||||
rule_statement: עריכת ניסוח הכלל (ריק = ללא שינוי).
|
||||
reasoning_summary: עריכת תמצית ההיגיון (ריק = ללא שינוי).
|
||||
subject_tags: עריכת תגיות (None = ללא שינוי).
|
||||
practice_areas: עריכת תחומים (None = ללא שינוי).
|
||||
"""
|
||||
if status not in {"pending_review", "approved", "rejected", "published"}:
|
||||
return _err(
|
||||
"status לא חוקי. ערכים תקינים: "
|
||||
"pending_review / approved / rejected / published"
|
||||
)
|
||||
try:
|
||||
hid = UUID(halacha_id)
|
||||
except ValueError:
|
||||
return _err("halacha_id לא תקין")
|
||||
|
||||
row = await db.update_halacha(
|
||||
halacha_id=hid,
|
||||
review_status=status,
|
||||
reviewer=reviewer,
|
||||
rule_statement=rule_statement or None,
|
||||
reasoning_summary=reasoning_summary or None,
|
||||
subject_tags=subject_tags,
|
||||
practice_areas=practice_areas,
|
||||
)
|
||||
if row is None:
|
||||
return _err("הלכה לא נמצאה")
|
||||
return _ok(row)
|
||||
|
||||
|
||||
async def halachot_pending(limit: int = 100) -> str:
|
||||
"""תור ההלכות הממתינות לאישור (review_status='pending_review')."""
|
||||
rows = await db.list_halachot(review_status="pending_review", limit=limit)
|
||||
return _ok(rows)
|
||||
Reference in New Issue
Block a user