Add docs, scripts, skills, commands, and taskmaster config to repo
Includes: - docs/: architecture, block-schema, migration-plan, product-specification - scripts/: bidi_table, decompose-decisions, extract-claims, seed-knowledge, etc. - skill-legal-decision/: SKILL.md + references + block-schema - skill-legal-assistant/: SKILL.md - skill-legal-docx/: SKILL.md + references - .claude/commands/: bidi-table skill - .taskmaster/: task config + PRDs - .gitignore: exclude legacy/, kiryat-yearim/, node_modules/, memory/ Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
62
scripts/bidi_table.py
Normal file
62
scripts/bidi_table.py
Normal file
@@ -0,0 +1,62 @@
|
||||
#!/usr/bin/env python3
|
||||
"""BiDi-safe box-drawing table renderer for mixed Hebrew/English terminal output.
|
||||
|
||||
Uses LRM (Left-to-Right Mark, U+200E) before box-drawing characters to prevent
|
||||
the BiDi algorithm from breaking table alignment when Hebrew text is present.
|
||||
|
||||
Usage as module:
|
||||
from scripts.bidi_table import bidi_table
|
||||
print(bidi_table(['Col1', 'Col2'], [['val1', 'ערך2']]))
|
||||
|
||||
Usage from CLI:
|
||||
python3 scripts/bidi_table.py
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
LRM = "\u200E" # Left-to-Right Mark — invisible, prevents BiDi reordering
|
||||
|
||||
|
||||
def bidi_table(headers: list[str], rows: list[list[str]]) -> str:
|
||||
"""Render a box-drawing table safe for mixed RTL/LTR terminal display."""
|
||||
ncols = len(headers)
|
||||
|
||||
# Calculate column widths
|
||||
col_widths = [len(h) for h in headers]
|
||||
for row in rows:
|
||||
for i, cell in enumerate(row[:ncols]):
|
||||
col_widths[i] = max(col_widths[i], len(cell))
|
||||
|
||||
def hline(left: str, mid: str, right: str) -> str:
|
||||
return left + mid.join("─" * (w + 2) for w in col_widths) + right
|
||||
|
||||
def dataline(cells: list[str]) -> str:
|
||||
parts = []
|
||||
for i in range(ncols):
|
||||
cell = cells[i] if i < len(cells) else ""
|
||||
padded = cell + " " * max(0, col_widths[i] - len(cell))
|
||||
parts.append(" " + padded + " ")
|
||||
return LRM + "│" + (LRM + "│").join(parts) + LRM + "│"
|
||||
|
||||
lines = [hline("┌", "┬", "┐")]
|
||||
lines.append(dataline(headers))
|
||||
lines.append(hline("├", "┼", "┤"))
|
||||
for row in rows:
|
||||
lines.append(dataline(row))
|
||||
lines.append(hline("└", "┴", "┘"))
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
table = bidi_table(
|
||||
["File", "Description", "Model", "Step"],
|
||||
[
|
||||
["claims_extractor.py", "חילוץ טענות מכתבי טענות", "Sonnet", "שלב 3 — הבא בתור"],
|
||||
["brainstorm.py", "סיעור מוחות — כיווני נימוק", "Sonnet", "שלב 4"],
|
||||
["block_writer.py", "כתיבת בלוקים של החלטה", "Sonnet/Opus", "שלב 5"],
|
||||
["qa_validator.py", "בדיקת איכות QA", "Sonnet", "שלב 6"],
|
||||
["style_analyzer.py", "ניתוח סגנון דפנה", "Opus", "חד-פעמי"],
|
||||
["learning_loop.py", "למידה מהחלטה סופית", "Sonnet", "סוף תהליך"],
|
||||
],
|
||||
)
|
||||
print(table)
|
||||
289
scripts/decompose-decisions-v2.py
Normal file
289
scripts/decompose-decisions-v2.py
Normal file
@@ -0,0 +1,289 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Decompose final decisions into 12-block structure — V2 calibrated on הכט.
|
||||
|
||||
Key insight: DOCX extraction strips header blocks (א-ד). The real content
|
||||
starts at block ה (opening "לפנינו"). We identify blocks by known section
|
||||
headers and line-by-line analysis.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "mcp-server" / "src"))
|
||||
|
||||
from legal_mcp.services.db import get_pool, init_schema, close_pool
|
||||
|
||||
|
||||
BLOCK_DEFS = [
|
||||
("block-alef", 1, "כותרת מוסדית", "template-fill"),
|
||||
("block-bet", 2, "הרכב הוועדה", "template-fill"),
|
||||
("block-gimel", 3, "צדדים", "template-fill"),
|
||||
("block-dalet", 4, "כותרת החלטה", "template-fill"),
|
||||
("block-he", 5, "פתיחה", "paraphrase"),
|
||||
("block-vav", 6, "רקע עובדתי", "reproduction"),
|
||||
("block-zayin", 7, "טענות הצדדים", "paraphrase"),
|
||||
("block-chet", 8, "הליכים בפני ועדת הערר", "reproduction"),
|
||||
("block-tet", 9, "תכניות חלות", "guided-synthesis"),
|
||||
("block-yod", 10, "דיון והכרעה", "rhetorical-construction"),
|
||||
("block-yod-alef", 11, "סיכום", "paraphrase"),
|
||||
("block-yod-bet", 12, "חתימות", "template-fill"),
|
||||
]
|
||||
|
||||
|
||||
def find_line(lines: list[str], pattern: str, start: int = 0) -> int:
|
||||
"""Find first line matching pattern (substring or regex). Returns -1 if not found."""
|
||||
pat = re.compile(pattern)
|
||||
for i in range(start, len(lines)):
|
||||
if pat.search(lines[i]):
|
||||
return i
|
||||
return -1
|
||||
|
||||
|
||||
def slice_text(lines: list[str], start: int, end: int) -> str:
|
||||
"""Join lines[start:end] into text."""
|
||||
if start < 0 or end <= start:
|
||||
return ""
|
||||
return "\n".join(lines[start:end]).strip()
|
||||
|
||||
|
||||
def count_words(text: str) -> int:
|
||||
return len(text.split()) if text else 0
|
||||
|
||||
|
||||
def decompose(text: str) -> dict[str, str]:
|
||||
"""Parse decision into blocks. Returns {block_id: content}."""
|
||||
lines = text.split("\n")
|
||||
n = len(lines)
|
||||
blocks = {}
|
||||
|
||||
# Find key section headers
|
||||
# Style 1: רישוי — descriptive headers ("תמצית טענות הצדדים", "דיון והכרעה")
|
||||
# Style 2: היטל השבחה — numbered headers ("א. רקע עובדתי", "ו. דיון והכרעה")
|
||||
opening = find_line(lines, r"^לפנינו\s|^בפנינו\s|^בפני\s*ועדת|^בפני\s*בקשה")
|
||||
|
||||
claims = find_line(lines, r"תמצית\s*טענות|טענות\s*הצדדים|טענות\s*העוררי")
|
||||
if claims == -1:
|
||||
claims = find_line(lines, r"^טענות\s*העוררי")
|
||||
if claims == -1:
|
||||
# היטל השבחה style: "ב. טענות העורר"
|
||||
claims = find_line(lines, r"^[א-ת][\.\)]\s*טענות")
|
||||
|
||||
background = find_line(lines, r"^[א-ת][\.\)]\s*רקע\s*עובדתי")
|
||||
|
||||
proceedings = find_line(lines, r"ההליכים\s*בפני|הליכים\s*בפני|הדיון\s*בפני\s*ועדת\s*הערר")
|
||||
if proceedings == -1:
|
||||
# היטל השבחה: "ד. הבהרות השמאית" or similar procedural sections
|
||||
proceedings = find_line(lines, r"^[א-ת][\.\)]\s*הבהרות|^[א-ת][\.\)]\s*ההליך")
|
||||
|
||||
plans = find_line(lines, r"תכניות\s*חלות|המסגרת\s*הנורמטיבית|הוראות\s*התכנית")
|
||||
if plans == -1:
|
||||
plans = find_line(lines, r"^[א-ת][\.\)]\s*המסגרת\s*הנורמטיבית")
|
||||
|
||||
discussion = find_line(lines, r"^דיון\s*והכרעה|^דיון$|^הכרעה$")
|
||||
if discussion == -1:
|
||||
discussion = find_line(lines, r"^[א-ת][\.\)]\s*דיון\s*והכרעה")
|
||||
|
||||
summary = find_line(lines, r"^סיכום\s*$|^סוף\s*דבר\s*$")
|
||||
if summary == -1:
|
||||
summary = find_line(lines, r"^[א-ת][\.\)]\s*סיכום")
|
||||
signature = find_line(lines, r"^ניתנה?\s*(היום|פה\s*אחד|ביום)")
|
||||
|
||||
# If no explicit discussion header, look for the opening formula
|
||||
if discussion == -1:
|
||||
discussion = find_line(lines, r"לאחר\s*שבחנו\s*את\s*טענות")
|
||||
|
||||
# ── Header blocks (א-ד): everything before opening ──
|
||||
if opening >= 0:
|
||||
header_text = slice_text(lines, 0, opening)
|
||||
if header_text:
|
||||
# Try to split header, but usually DOCX extraction loses these
|
||||
blocks["block-alef"] = header_text
|
||||
else:
|
||||
blocks["block-alef"] = ""
|
||||
else:
|
||||
blocks["block-alef"] = ""
|
||||
|
||||
blocks["block-bet"] = "" # Usually lost in extraction
|
||||
blocks["block-gimel"] = ""
|
||||
blocks["block-dalet"] = "החלטה"
|
||||
|
||||
# ── Block ה: Opening — first 1-3 paragraphs from "לפנינו" ──
|
||||
if opening >= 0:
|
||||
next_section = claims if claims > opening else discussion if discussion > opening else n
|
||||
opening_end = opening + 1
|
||||
for i in range(opening + 1, min(opening + 5, next_section)):
|
||||
line = lines[i].strip()
|
||||
if not line:
|
||||
break
|
||||
opening_end = i + 1
|
||||
blocks["block-he"] = slice_text(lines, opening, opening_end)
|
||||
else:
|
||||
blocks["block-he"] = ""
|
||||
|
||||
# ── Block ו: Background ──
|
||||
# Style 1 (רישוי): after opening, before claims
|
||||
# Style 2 (היטל השבחה): explicit "א. רקע עובדתי" section
|
||||
if background >= 0:
|
||||
# Explicit background header (היטל השבחה style)
|
||||
bg_end = claims if claims > background else (proceedings if proceedings > background else (discussion if discussion > background else n))
|
||||
blocks["block-vav"] = slice_text(lines, background, bg_end)
|
||||
# In this case, opening (ה) might not exist — "לפנינו" may be absent
|
||||
elif opening >= 0 and claims > opening:
|
||||
bg_start = opening + 1
|
||||
he_lines = count_words(blocks.get("block-he", ""))
|
||||
if he_lines > 0:
|
||||
he_end = opening
|
||||
for i in range(opening, min(opening + 5, claims)):
|
||||
if lines[i].strip():
|
||||
he_end = i + 1
|
||||
else:
|
||||
break
|
||||
bg_start = he_end
|
||||
blocks["block-vav"] = slice_text(lines, bg_start, claims)
|
||||
elif opening >= 0 and discussion > opening:
|
||||
blocks["block-vav"] = slice_text(lines, opening + 1, discussion)
|
||||
else:
|
||||
blocks["block-vav"] = ""
|
||||
|
||||
# ── Block ז: Claims — from claims header to next section ──
|
||||
if claims >= 0:
|
||||
claims_end = min(
|
||||
x for x in [proceedings, plans, discussion, summary, n]
|
||||
if x > claims
|
||||
)
|
||||
blocks["block-zayin"] = slice_text(lines, claims, claims_end)
|
||||
else:
|
||||
blocks["block-zayin"] = ""
|
||||
|
||||
# ── Block ח: Proceedings (optional) ──
|
||||
if proceedings >= 0:
|
||||
proc_end = min(
|
||||
x for x in [plans, discussion, summary, n]
|
||||
if x > proceedings
|
||||
)
|
||||
blocks["block-chet"] = slice_text(lines, proceedings, proc_end)
|
||||
else:
|
||||
blocks["block-chet"] = ""
|
||||
|
||||
# ── Block ט: Plans (optional) ──
|
||||
if plans >= 0 and (discussion == -1 or plans < discussion):
|
||||
plans_end = min(
|
||||
x for x in [discussion, summary, n]
|
||||
if x > plans
|
||||
)
|
||||
blocks["block-tet"] = slice_text(lines, plans, plans_end)
|
||||
else:
|
||||
blocks["block-tet"] = ""
|
||||
|
||||
# ── Block י: Discussion ──
|
||||
if discussion >= 0:
|
||||
disc_end = summary if summary > discussion else (signature if signature > discussion else n)
|
||||
blocks["block-yod"] = slice_text(lines, discussion, disc_end)
|
||||
else:
|
||||
blocks["block-yod"] = ""
|
||||
|
||||
# ── Block יא: Summary ──
|
||||
if summary >= 0:
|
||||
summ_end = signature if signature > summary else n
|
||||
blocks["block-yod-alef"] = slice_text(lines, summary, summ_end)
|
||||
else:
|
||||
blocks["block-yod-alef"] = ""
|
||||
|
||||
# ── Block יב: Signatures ──
|
||||
if signature >= 0:
|
||||
blocks["block-yod-bet"] = slice_text(lines, signature, n)
|
||||
else:
|
||||
blocks["block-yod-bet"] = ""
|
||||
|
||||
return blocks
|
||||
|
||||
|
||||
async def main():
|
||||
await init_schema()
|
||||
pool = await get_pool()
|
||||
|
||||
async with pool.acquire() as conn:
|
||||
decisions = await conn.fetch(
|
||||
"""SELECT d.id as decision_id, c.case_number, c.title,
|
||||
doc.extracted_text
|
||||
FROM decisions d
|
||||
JOIN cases c ON c.id = d.case_id
|
||||
JOIN documents doc ON doc.case_id = d.case_id AND doc.doc_type = 'decision'
|
||||
WHERE d.status = 'final'
|
||||
ORDER BY c.case_number"""
|
||||
)
|
||||
|
||||
for dec in decisions:
|
||||
decision_id = dec["decision_id"]
|
||||
case_number = dec["case_number"]
|
||||
text = dec["extracted_text"]
|
||||
total_words = count_words(text)
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"מפרק: {case_number} — {dec['title']}")
|
||||
print(f"סה\"כ מילים: {total_words}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
parsed = decompose(text)
|
||||
|
||||
async with pool.acquire() as conn:
|
||||
# Delete existing blocks
|
||||
await conn.execute(
|
||||
"DELETE FROM decision_blocks WHERE decision_id = $1", decision_id
|
||||
)
|
||||
|
||||
total_parsed_words = 0
|
||||
for block_id, block_index, title, gen_type in BLOCK_DEFS:
|
||||
content = parsed.get(block_id, "")
|
||||
wc = count_words(content)
|
||||
weight = round(wc / total_words * 100, 1) if total_words > 0 and wc > 0 else 0
|
||||
status = "final" if wc > 0 else "empty"
|
||||
total_parsed_words += wc
|
||||
|
||||
await conn.execute(
|
||||
"""INSERT INTO decision_blocks
|
||||
(decision_id, block_id, block_index, title, content,
|
||||
word_count, weight_percent, generation_type, status)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)""",
|
||||
decision_id, block_id, block_index, title,
|
||||
content, wc, weight, gen_type, status,
|
||||
)
|
||||
|
||||
marker = "✅" if wc > 0 else "⬜"
|
||||
print(f" {marker} {block_id:18s} | {title:25s} | {wc:5d} מילים | {weight:5.1f}%")
|
||||
|
||||
# Update decision totals
|
||||
disc_words = count_words(parsed.get("block-yod", ""))
|
||||
disc_paras = len([p for p in parsed.get("block-yod", "").split("\n") if p.strip() and len(p.strip()) > 20])
|
||||
await conn.execute(
|
||||
"UPDATE decisions SET total_words = $1, total_paragraphs = $2, updated_at = now() WHERE id = $3",
|
||||
total_words, disc_paras, decision_id,
|
||||
)
|
||||
|
||||
coverage = round(total_parsed_words / total_words * 100, 1) if total_words > 0 else 0
|
||||
print(f" --- כיסוי: {total_parsed_words}/{total_words} מילים ({coverage}%)")
|
||||
|
||||
# Summary
|
||||
async with pool.acquire() as conn:
|
||||
stats = await conn.fetch(
|
||||
"""SELECT block_id, count(*) as decisions,
|
||||
avg(word_count) as avg_words,
|
||||
avg(weight_percent) as avg_weight
|
||||
FROM decision_blocks
|
||||
WHERE word_count > 0
|
||||
GROUP BY block_id ORDER BY block_id"""
|
||||
)
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print("סטטיסטיקה לפי בלוק (רק בלוקים עם תוכן):")
|
||||
for s in stats:
|
||||
print(f" {s['block_id']:18s} | {s['decisions']} החלטות | ממוצע {s['avg_words']:.0f} מילים | {s['avg_weight']:.1f}%")
|
||||
|
||||
await close_pool()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
323
scripts/decompose-decisions.py
Normal file
323
scripts/decompose-decisions.py
Normal file
@@ -0,0 +1,323 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Decompose 6 final decisions into 12-block structure.
|
||||
|
||||
Uses heuristic parsing based on known section headers in Dafna's decisions.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from uuid import UUID
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "mcp-server" / "src"))
|
||||
|
||||
from legal_mcp.services.db import get_pool, init_schema, close_pool
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════
|
||||
# Block definitions with detection patterns
|
||||
# ═══════════════════════════════════════════════════════════════════
|
||||
|
||||
BLOCKS = [
|
||||
{
|
||||
"block_id": "block-alef",
|
||||
"block_index": 1,
|
||||
"title": "כותרת מוסדית",
|
||||
"generation_type": "template-fill",
|
||||
},
|
||||
{
|
||||
"block_id": "block-bet",
|
||||
"block_index": 2,
|
||||
"title": "הרכב הוועדה",
|
||||
"generation_type": "template-fill",
|
||||
},
|
||||
{
|
||||
"block_id": "block-gimel",
|
||||
"block_index": 3,
|
||||
"title": "צדדים",
|
||||
"generation_type": "template-fill",
|
||||
},
|
||||
{
|
||||
"block_id": "block-dalet",
|
||||
"block_index": 4,
|
||||
"title": "כותרת החלטה",
|
||||
"generation_type": "template-fill",
|
||||
},
|
||||
{
|
||||
"block_id": "block-he",
|
||||
"block_index": 5,
|
||||
"title": "פתיחה",
|
||||
"generation_type": "paraphrase",
|
||||
},
|
||||
{
|
||||
"block_id": "block-vav",
|
||||
"block_index": 6,
|
||||
"title": "רקע עובדתי",
|
||||
"generation_type": "reproduction",
|
||||
},
|
||||
{
|
||||
"block_id": "block-zayin",
|
||||
"block_index": 7,
|
||||
"title": "טענות הצדדים",
|
||||
"generation_type": "paraphrase",
|
||||
},
|
||||
{
|
||||
"block_id": "block-chet",
|
||||
"block_index": 8,
|
||||
"title": "הליכים בפני ועדת הערר",
|
||||
"generation_type": "reproduction",
|
||||
},
|
||||
{
|
||||
"block_id": "block-tet",
|
||||
"block_index": 9,
|
||||
"title": "תכניות חלות",
|
||||
"generation_type": "guided-synthesis",
|
||||
},
|
||||
{
|
||||
"block_id": "block-yod",
|
||||
"block_index": 10,
|
||||
"title": "דיון והכרעה",
|
||||
"generation_type": "rhetorical-construction",
|
||||
},
|
||||
{
|
||||
"block_id": "block-yod-alef",
|
||||
"block_index": 11,
|
||||
"title": "סיכום",
|
||||
"generation_type": "paraphrase",
|
||||
},
|
||||
{
|
||||
"block_id": "block-yod-bet",
|
||||
"block_index": 12,
|
||||
"title": "חתימות",
|
||||
"generation_type": "template-fill",
|
||||
},
|
||||
]
|
||||
|
||||
# Section header patterns (Hebrew)
|
||||
SECTION_PATTERNS = {
|
||||
"claims": re.compile(r"תמצית\s*טענות\s*הצדדים|טענות\s*הצדדים|טענות\s*העוררי"),
|
||||
"proceedings": re.compile(r"ההליכים\s*בפני\s*ועדת\s*הערר|הליכים\s*בפני\s*הוועדה|הדיון\s*בפני\s*ועדת\s*הערר"),
|
||||
"plans": re.compile(r"תכניות\s*חלות|המסגרת\s*התכנונית|הוראות\s*התכנית"),
|
||||
"discussion": re.compile(r"דיון\s*והכרעה|דיון|הכרעה"),
|
||||
"summary": re.compile(r"^סיכום$|^סוף\s*דבר$", re.MULTILINE),
|
||||
"appellant_claims": re.compile(r"טענות\s*העוררי|טענות\s*העורר"),
|
||||
"respondent_claims": re.compile(r"עמדת\s*הוועדה\s*המקומית|תגובת\s*המשיבה|עמדת\s*המשיב"),
|
||||
"permit_applicant": re.compile(r"עמדת\s*מבקש|עמדת\s*מגיש|עמדת\s*היזם"),
|
||||
"panel": re.compile(r"בפני[:\s]|יו\"ר"),
|
||||
"parties_vs": re.compile(r"\s*נגד\s*"),
|
||||
"decision_title": re.compile(r"^החלטה$", re.MULTILINE),
|
||||
"opening": re.compile(r"^לפנינו\s|^בפנינו\s"),
|
||||
"signature": re.compile(r"ניתנה?\s*(היום|פה\s*אחד|ביום)|חתימ"),
|
||||
}
|
||||
|
||||
|
||||
def find_section_start(text: str, pattern: re.Pattern) -> int:
|
||||
"""Find the character position where a section starts."""
|
||||
match = pattern.search(text)
|
||||
return match.start() if match else -1
|
||||
|
||||
|
||||
def decompose_decision(text: str) -> list[dict]:
|
||||
"""Parse decision text into blocks based on section headers."""
|
||||
lines = text.split("\n")
|
||||
total_len = len(text)
|
||||
|
||||
# Find key section boundaries
|
||||
pos_claims = find_section_start(text, SECTION_PATTERNS["claims"])
|
||||
pos_proceedings = find_section_start(text, SECTION_PATTERNS["proceedings"])
|
||||
pos_plans = find_section_start(text, SECTION_PATTERNS["plans"])
|
||||
pos_discussion = find_section_start(text, SECTION_PATTERNS["discussion"])
|
||||
pos_summary = find_section_start(text, SECTION_PATTERNS["summary"])
|
||||
pos_signature = find_section_start(text, SECTION_PATTERNS["signature"])
|
||||
pos_opening = find_section_start(text, SECTION_PATTERNS["opening"])
|
||||
pos_decision_title = find_section_start(text, SECTION_PATTERNS["decision_title"])
|
||||
pos_panel = find_section_start(text, SECTION_PATTERNS["panel"])
|
||||
pos_parties = find_section_start(text, SECTION_PATTERNS["parties_vs"])
|
||||
|
||||
# Build blocks based on what we found
|
||||
blocks = []
|
||||
|
||||
# Blocks א-ד: Header area (before the opening "לפנינו")
|
||||
header_end = pos_opening if pos_opening > 0 else pos_claims if pos_claims > 0 else 500
|
||||
header_text = text[:header_end].strip()
|
||||
|
||||
# Try to split header into institutional header, panel, parties, title
|
||||
if pos_panel > 0 and pos_panel < header_end:
|
||||
blocks.append({"block_id": "block-alef", "content": text[:pos_panel].strip()})
|
||||
|
||||
if pos_parties > 0 and pos_parties < header_end:
|
||||
blocks.append({"block_id": "block-bet", "content": text[pos_panel:pos_parties].strip()})
|
||||
if pos_decision_title > 0 and pos_decision_title < header_end:
|
||||
blocks.append({"block_id": "block-gimel", "content": text[pos_parties:pos_decision_title].strip()})
|
||||
blocks.append({"block_id": "block-dalet", "content": "החלטה"})
|
||||
else:
|
||||
blocks.append({"block_id": "block-gimel", "content": text[pos_parties:header_end].strip()})
|
||||
blocks.append({"block_id": "block-dalet", "content": "החלטה"})
|
||||
else:
|
||||
blocks.append({"block_id": "block-bet", "content": text[pos_panel:header_end].strip()})
|
||||
blocks.append({"block_id": "block-gimel", "content": ""})
|
||||
blocks.append({"block_id": "block-dalet", "content": "החלטה"})
|
||||
else:
|
||||
# Can't split — put everything in alef
|
||||
blocks.append({"block_id": "block-alef", "content": header_text})
|
||||
blocks.append({"block_id": "block-bet", "content": ""})
|
||||
blocks.append({"block_id": "block-gimel", "content": ""})
|
||||
blocks.append({"block_id": "block-dalet", "content": "החלטה"})
|
||||
|
||||
# Block ה: Opening — from "לפנינו" to claims section
|
||||
if pos_opening > 0:
|
||||
opening_end = pos_claims if pos_claims > pos_opening else pos_discussion if pos_discussion > pos_opening else total_len
|
||||
# Opening is usually just 1-3 paragraphs
|
||||
opening_text = text[pos_opening:min(pos_opening + 1000, opening_end)].strip()
|
||||
# Find end of first few paragraphs
|
||||
para_breaks = [i for i, c in enumerate(opening_text) if c == '\n' and i > 50]
|
||||
if len(para_breaks) >= 2:
|
||||
opening_text = opening_text[:para_breaks[1]].strip()
|
||||
blocks.append({"block_id": "block-he", "content": opening_text})
|
||||
|
||||
# Block ו: Background — from after opening to claims
|
||||
if pos_claims > pos_opening:
|
||||
bg_start = pos_opening + len(opening_text)
|
||||
blocks.append({"block_id": "block-vav", "content": text[bg_start:pos_claims].strip()})
|
||||
else:
|
||||
blocks.append({"block_id": "block-vav", "content": ""})
|
||||
else:
|
||||
blocks.append({"block_id": "block-he", "content": ""})
|
||||
blocks.append({"block_id": "block-vav", "content": ""})
|
||||
|
||||
# Block ז: Claims
|
||||
if pos_claims > 0:
|
||||
claims_end = pos_proceedings if pos_proceedings > pos_claims else pos_discussion if pos_discussion > pos_claims else pos_summary if pos_summary > pos_claims else total_len
|
||||
blocks.append({"block_id": "block-zayin", "content": text[pos_claims:claims_end].strip()})
|
||||
else:
|
||||
blocks.append({"block_id": "block-zayin", "content": ""})
|
||||
|
||||
# Block ח: Proceedings (optional)
|
||||
if pos_proceedings > 0:
|
||||
proc_end = pos_plans if pos_plans > pos_proceedings else pos_discussion if pos_discussion > pos_proceedings else pos_summary if pos_summary > pos_proceedings else total_len
|
||||
blocks.append({"block_id": "block-chet", "content": text[pos_proceedings:proc_end].strip()})
|
||||
else:
|
||||
blocks.append({"block_id": "block-chet", "content": ""})
|
||||
|
||||
# Block ט: Plans (optional)
|
||||
if pos_plans > 0 and pos_plans < (pos_discussion if pos_discussion > 0 else total_len):
|
||||
plans_end = pos_discussion if pos_discussion > pos_plans else pos_summary if pos_summary > pos_plans else total_len
|
||||
blocks.append({"block_id": "block-tet", "content": text[pos_plans:plans_end].strip()})
|
||||
else:
|
||||
blocks.append({"block_id": "block-tet", "content": ""})
|
||||
|
||||
# Block י: Discussion
|
||||
if pos_discussion > 0:
|
||||
disc_end = pos_summary if pos_summary > pos_discussion else pos_signature if pos_signature > pos_discussion else total_len
|
||||
blocks.append({"block_id": "block-yod", "content": text[pos_discussion:disc_end].strip()})
|
||||
else:
|
||||
blocks.append({"block_id": "block-yod", "content": ""})
|
||||
|
||||
# Block יא: Summary
|
||||
if pos_summary > 0:
|
||||
summ_end = pos_signature if pos_signature > pos_summary else total_len
|
||||
blocks.append({"block_id": "block-yod-alef", "content": text[pos_summary:summ_end].strip()})
|
||||
else:
|
||||
blocks.append({"block_id": "block-yod-alef", "content": ""})
|
||||
|
||||
# Block יב: Signatures
|
||||
if pos_signature > 0:
|
||||
blocks.append({"block_id": "block-yod-bet", "content": text[pos_signature:].strip()})
|
||||
else:
|
||||
blocks.append({"block_id": "block-yod-bet", "content": ""})
|
||||
|
||||
return blocks
|
||||
|
||||
|
||||
async def main():
|
||||
await init_schema()
|
||||
pool = await get_pool()
|
||||
|
||||
async with pool.acquire() as conn:
|
||||
decisions = await conn.fetch(
|
||||
"""SELECT d.id as decision_id, c.case_number, c.title, d.total_words,
|
||||
doc.extracted_text
|
||||
FROM decisions d
|
||||
JOIN cases c ON c.id = d.case_id
|
||||
JOIN documents doc ON doc.case_id = d.case_id AND doc.doc_type = 'decision'
|
||||
WHERE d.status = 'final'
|
||||
ORDER BY c.case_number"""
|
||||
)
|
||||
|
||||
for dec in decisions:
|
||||
decision_id = dec["decision_id"]
|
||||
case_number = dec["case_number"]
|
||||
text = dec["extracted_text"]
|
||||
total_words = len(text.split())
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"מפרק: {case_number} — {dec['title']}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
# Decompose
|
||||
blocks = decompose_decision(text)
|
||||
|
||||
# Merge with block metadata
|
||||
block_data = []
|
||||
for block_def in BLOCKS:
|
||||
matching = [b for b in blocks if b["block_id"] == block_def["block_id"]]
|
||||
content = matching[0]["content"] if matching else ""
|
||||
word_count = len(content.split()) if content else 0
|
||||
weight = round((word_count / total_words * 100), 2) if total_words > 0 and word_count > 0 else 0
|
||||
|
||||
block_data.append({
|
||||
**block_def,
|
||||
"content": content,
|
||||
"word_count": word_count,
|
||||
"weight_percent": weight,
|
||||
"status": "final" if content else "empty",
|
||||
})
|
||||
|
||||
# Print summary
|
||||
for b in block_data:
|
||||
status = "✅" if b["word_count"] > 0 else "⬜"
|
||||
print(f" {status} {b['block_id']:18s} | {b['title']:25s} | {b['word_count']:5d} מילים | {b['weight_percent']:5.1f}%")
|
||||
|
||||
# Store in DB
|
||||
async with pool.acquire() as conn:
|
||||
# Delete existing blocks for this decision
|
||||
await conn.execute(
|
||||
"DELETE FROM decision_blocks WHERE decision_id = $1", decision_id
|
||||
)
|
||||
|
||||
for b in block_data:
|
||||
await conn.execute(
|
||||
"""INSERT INTO decision_blocks
|
||||
(decision_id, block_id, block_index, title, content,
|
||||
word_count, weight_percent, generation_type, status)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)""",
|
||||
decision_id,
|
||||
b["block_id"], b["block_index"], b["title"],
|
||||
b["content"], b["word_count"], b["weight_percent"],
|
||||
b["generation_type"], b["status"],
|
||||
)
|
||||
|
||||
# Count paragraphs in discussion block
|
||||
discussion = [b for b in block_data if b["block_id"] == "block-yod"][0]
|
||||
if discussion["content"]:
|
||||
paragraphs = [p.strip() for p in discussion["content"].split("\n") if p.strip() and len(p.strip()) > 20]
|
||||
await conn.execute(
|
||||
"UPDATE decisions SET total_paragraphs = $1 WHERE id = $2",
|
||||
len(paragraphs), decision_id,
|
||||
)
|
||||
|
||||
# Final summary
|
||||
async with pool.acquire() as conn:
|
||||
block_count = await conn.fetchval("SELECT count(*) FROM decision_blocks")
|
||||
non_empty = await conn.fetchval("SELECT count(*) FROM decision_blocks WHERE status = 'final'")
|
||||
|
||||
await close_pool()
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"✅ סה\"כ בלוקים: {block_count} ({non_empty} עם תוכן)")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
139
scripts/export-decision-docx.py
Normal file
139
scripts/export-decision-docx.py
Normal file
@@ -0,0 +1,139 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Export a decision from DB to DOCX using the CJS template generator.
|
||||
|
||||
Usage: python export-decision-docx.py <case_number> [output.docx]
|
||||
|
||||
Pulls decision blocks from DB, generates structure JSON,
|
||||
invokes create-decision-structure.cjs to produce DOCX.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "mcp-server" / "src"))
|
||||
|
||||
from legal_mcp.services.db import get_pool, init_schema, close_pool
|
||||
|
||||
CJS_SCRIPT = Path(__file__).parent.parent / "skill-legal-decision" / "scripts" / "create-decision-structure.cjs"
|
||||
|
||||
|
||||
def block_id_to_hebrew(block_id: str) -> str:
|
||||
"""Map block_id to Hebrew letter label."""
|
||||
mapping = {
|
||||
"block-alef": "א", "block-bet": "ב", "block-gimel": "ג",
|
||||
"block-dalet": "ד", "block-he": "ה", "block-vav": "ו",
|
||||
"block-zayin": "ז", "block-chet": "ח", "block-tet": "ט",
|
||||
"block-yod": "י", "block-yod-alef": "יא", "block-yod-bet": "יב",
|
||||
}
|
||||
return mapping.get(block_id, "")
|
||||
|
||||
|
||||
async def main():
|
||||
if len(sys.argv) < 2:
|
||||
print("שימוש: python export-decision-docx.py <מספר_תיק> [output.docx]")
|
||||
sys.exit(1)
|
||||
|
||||
case_number = sys.argv[1]
|
||||
output_path = sys.argv[2] if len(sys.argv) > 2 else f"החלטה-{case_number}.docx"
|
||||
|
||||
await init_schema()
|
||||
pool = await get_pool()
|
||||
|
||||
async with pool.acquire() as conn:
|
||||
# Get case info
|
||||
case = await conn.fetchrow(
|
||||
"SELECT * FROM cases WHERE case_number = $1", case_number
|
||||
)
|
||||
if not case:
|
||||
print(f"תיק {case_number} לא נמצא")
|
||||
sys.exit(1)
|
||||
|
||||
# Get decision
|
||||
decision = await conn.fetchrow(
|
||||
"SELECT * FROM decisions WHERE case_id = $1 AND status = 'final'",
|
||||
case["id"],
|
||||
)
|
||||
if not decision:
|
||||
print(f"אין החלטה סופית לתיק {case_number}")
|
||||
sys.exit(1)
|
||||
|
||||
# Get blocks
|
||||
blocks = await conn.fetch(
|
||||
"""SELECT block_id, block_index, title, content, word_count
|
||||
FROM decision_blocks
|
||||
WHERE decision_id = $1
|
||||
ORDER BY block_index""",
|
||||
decision["id"],
|
||||
)
|
||||
|
||||
await close_pool()
|
||||
|
||||
# Build structure JSON for CJS script
|
||||
appellants = json.loads(case["appellants"]) if isinstance(case["appellants"], str) else case["appellants"]
|
||||
respondents = json.loads(case["respondents"]) if isinstance(case["respondents"], str) else case["respondents"]
|
||||
|
||||
structure = {
|
||||
"metadata": {
|
||||
"case_number": case["case_number"],
|
||||
"title": case["title"],
|
||||
"subject": case["subject"],
|
||||
"property_address": case["property_address"],
|
||||
"committee": case["committee_type"],
|
||||
"outcome": decision["outcome"] or "",
|
||||
"decision_date": str(decision["decision_date"]) if decision["decision_date"] else "",
|
||||
"author": decision["author"],
|
||||
},
|
||||
"parties": {
|
||||
"appellants": [{"name": a} for a in appellants],
|
||||
"respondents": [{"name": r} for r in respondents],
|
||||
},
|
||||
"blocks": [],
|
||||
}
|
||||
|
||||
for block in blocks:
|
||||
content = block["content"] or ""
|
||||
# Skip empty header blocks
|
||||
if block["block_id"] in ("block-alef", "block-bet", "block-gimel", "block-dalet") and not content:
|
||||
continue
|
||||
|
||||
paragraphs = [p.strip() for p in content.split("\n") if p.strip()]
|
||||
|
||||
structure["blocks"].append({
|
||||
"id": block["block_id"],
|
||||
"index": block["block_index"],
|
||||
"title": block["title"],
|
||||
"hebrew_letter": block_id_to_hebrew(block["block_id"]),
|
||||
"word_count": block["word_count"],
|
||||
"paragraphs": paragraphs,
|
||||
})
|
||||
|
||||
# Write JSON (absolute paths)
|
||||
output_abs = Path(output_path).resolve()
|
||||
json_path = output_abs.with_suffix(".json")
|
||||
json_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with open(json_path, "w", encoding="utf-8") as f:
|
||||
json.dump(structure, f, ensure_ascii=False, indent=2)
|
||||
print(f"JSON נוצר: {json_path}")
|
||||
|
||||
# Run CJS script with absolute paths
|
||||
result = subprocess.run(
|
||||
["node", str(CJS_SCRIPT), str(json_path), str(output_abs)],
|
||||
capture_output=True, text=True,
|
||||
cwd=str(CJS_SCRIPT.parent),
|
||||
)
|
||||
|
||||
if result.returncode == 0:
|
||||
print(f"✅ DOCX נוצר: {output_path}")
|
||||
else:
|
||||
print(f"❌ שגיאה ביצירת DOCX:")
|
||||
print(result.stderr)
|
||||
# JSON is still available for manual processing
|
||||
print(f"ה-JSON זמין: {json_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
134
scripts/extract-citations.py
Normal file
134
scripts/extract-citations.py
Normal file
@@ -0,0 +1,134 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Extract case law citations from block-yod and link to case_law table."""
|
||||
|
||||
import asyncio
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from uuid import UUID
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "mcp-server" / "src"))
|
||||
|
||||
from legal_mcp.services.db import get_pool, init_schema, close_pool
|
||||
|
||||
# Patterns for Israeli case law citations
|
||||
CITATION_PATTERNS = [
|
||||
# עע"מ, בג"ץ, ע"א, etc.
|
||||
re.compile(r'(עע"מ|בג"ץ|ע"א|בר"ם|עת"מ|עמ"נ|ע"ע|רע"א|דנ"א|בש"א)\s*(\d[\d/\-]+)'),
|
||||
# ערר with number
|
||||
re.compile(r'ערר\s*\(?\s*(?:מרכז|ירושלים|חי\'?|ת"א|דרום|צפון)?\s*\)?\s*(\d[\d/\-]+)'),
|
||||
# ערר without district
|
||||
re.compile(r'ערר\s+(\d{3,5}[\-/]\d{2,4})'),
|
||||
]
|
||||
|
||||
|
||||
def extract_citations_from_text(text: str) -> list[dict]:
|
||||
"""Find all case law citations in text."""
|
||||
citations = []
|
||||
seen = set()
|
||||
|
||||
for pattern in CITATION_PATTERNS:
|
||||
for match in pattern.finditer(text):
|
||||
full_match = match.group(0)
|
||||
if full_match in seen:
|
||||
continue
|
||||
seen.add(full_match)
|
||||
|
||||
# Get surrounding context (50 chars before and after)
|
||||
start = max(0, match.start() - 50)
|
||||
end = min(len(text), match.end() + 100)
|
||||
context = text[start:end].replace("\n", " ")
|
||||
|
||||
citations.append({
|
||||
"citation_text": full_match,
|
||||
"context": context,
|
||||
})
|
||||
|
||||
return citations
|
||||
|
||||
|
||||
async def main():
|
||||
await init_schema()
|
||||
pool = await get_pool()
|
||||
|
||||
async with pool.acquire() as conn:
|
||||
# Get all block-yod content with decision info
|
||||
blocks = await conn.fetch(
|
||||
"""SELECT db.content, d.id as decision_id, c.case_number
|
||||
FROM decision_blocks db
|
||||
JOIN decisions d ON d.id = db.decision_id
|
||||
JOIN cases c ON c.id = d.case_id
|
||||
WHERE db.block_id = 'block-yod' AND db.word_count > 0
|
||||
ORDER BY c.case_number"""
|
||||
)
|
||||
|
||||
# Get existing case_law for matching
|
||||
case_laws = await conn.fetch("SELECT id, case_number, case_name FROM case_law")
|
||||
case_law_map = {}
|
||||
for cl in case_laws:
|
||||
# Index by various forms of the case number
|
||||
case_law_map[cl["case_number"]] = cl["id"]
|
||||
# Also index by short number (e.g., "3975/22" from "עע"מ 3975/22")
|
||||
parts = cl["case_number"].split()
|
||||
if len(parts) > 1:
|
||||
case_law_map[parts[-1]] = cl["id"]
|
||||
|
||||
total_citations = 0
|
||||
total_linked = 0
|
||||
|
||||
for block in blocks:
|
||||
case_number = block["case_number"]
|
||||
decision_id = block["decision_id"]
|
||||
text = block["content"]
|
||||
|
||||
citations = extract_citations_from_text(text)
|
||||
|
||||
if not citations:
|
||||
continue
|
||||
|
||||
print(f"\n{case_number}: {len(citations)} ציטוטים נמצאו")
|
||||
|
||||
async with pool.acquire() as conn:
|
||||
for cit in citations:
|
||||
total_citations += 1
|
||||
|
||||
# Try to match to case_law table
|
||||
case_law_id = None
|
||||
for key, cl_id in case_law_map.items():
|
||||
if key in cit["citation_text"] or cit["citation_text"] in key:
|
||||
case_law_id = cl_id
|
||||
break
|
||||
|
||||
if case_law_id:
|
||||
# Check if already exists
|
||||
existing = await conn.fetchval(
|
||||
"""SELECT id FROM case_law_citations
|
||||
WHERE case_law_id = $1 AND decision_id = $2""",
|
||||
case_law_id, decision_id,
|
||||
)
|
||||
if not existing:
|
||||
await conn.execute(
|
||||
"""INSERT INTO case_law_citations
|
||||
(case_law_id, decision_id, citation_type, context_text)
|
||||
VALUES ($1, $2, 'support', $3)""",
|
||||
case_law_id, decision_id, cit["context"],
|
||||
)
|
||||
total_linked += 1
|
||||
print(f" ✅ {cit['citation_text'][:40]} → קושר לפסיקה")
|
||||
else:
|
||||
print(f" ⬜ {cit['citation_text'][:40]} — לא נמצא ב-DB")
|
||||
|
||||
# Summary
|
||||
async with pool.acquire() as conn:
|
||||
total_in_db = await conn.fetchval("SELECT count(*) FROM case_law_citations")
|
||||
|
||||
await close_pool()
|
||||
|
||||
print(f"\n{'='*50}")
|
||||
print(f"סה\"כ ציטוטים שנמצאו: {total_citations}")
|
||||
print(f"סה\"כ קושרו לפסיקה ב-DB: {total_linked}")
|
||||
print(f"סה\"כ ב-case_law_citations: {total_in_db}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
228
scripts/extract-claims.py
Normal file
228
scripts/extract-claims.py
Normal file
@@ -0,0 +1,228 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Extract individual claims from block-zayin of each decision.
|
||||
|
||||
Identifies party sub-sections and individual claims (paragraphs).
|
||||
Stores in the claims table with party_role classification.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from uuid import UUID
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "mcp-server" / "src"))
|
||||
|
||||
from legal_mcp.services.db import get_pool, init_schema, close_pool
|
||||
|
||||
|
||||
# Party role detection patterns
|
||||
PARTY_PATTERNS = [
|
||||
# Appellants
|
||||
(r"טענות\s*העוררי[םן]|טענות\s*העורר\b|טענות\s*המבקש|טענות\s*המערער", "appellant"),
|
||||
# Respondent - local committee
|
||||
(r"עמדת\s*הוועדה\s*המקומית|עמדת\s*המשיבה|טענות\s*המשיבה|תגובת\s*המשיבה|הוועדה\s*המקומית$", "committee"),
|
||||
# Respondent - general
|
||||
(r"עמדת\s*המשיבי[םן]|עמדת\s*המשיב\b|טענות\s*המשיבי[םן]|טענות\s*המשיב\b", "respondent"),
|
||||
# Permit applicant
|
||||
(r"מבקשי\s*ההיתר|עמדת\s*מבקש|עמדת\s*היזם|מגישי\s*התכנית", "permit_applicant"),
|
||||
# Appraiser clarifications (היטל השבחה)
|
||||
(r"הבהרות\s*השמא|התייחסות\s*הצדדים", "appraiser"),
|
||||
]
|
||||
|
||||
|
||||
def detect_party_role(line: str) -> str | None:
|
||||
"""Detect if a line is a party section header. Returns role or None."""
|
||||
for pattern, role in PARTY_PATTERNS:
|
||||
if re.search(pattern, line):
|
||||
return role
|
||||
return None
|
||||
|
||||
|
||||
def is_section_header(line: str) -> bool:
|
||||
"""Check if line is a section/sub-section header (not a claim)."""
|
||||
line = line.strip()
|
||||
if not line:
|
||||
return False
|
||||
# Very short lines that are headers
|
||||
if len(line) < 50 and (
|
||||
detect_party_role(line) is not None
|
||||
or re.match(r"^תמצית\s*טענות", line)
|
||||
or re.match(r"^[א-ת][\.\)]\s*טענות", line)
|
||||
or re.match(r"^[א-ת][\.\)]\s*כללי", line)
|
||||
or re.match(r"^\d+\.\s*$", line) # just a number
|
||||
):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def is_numbered_sub_header(line: str) -> bool:
|
||||
"""Check if line is a numbered topic header within claims (e.g., '2. שיעור ההפקעה')."""
|
||||
return bool(re.match(r"^\d+\.\s+\S.{3,40}$", line.strip()))
|
||||
|
||||
|
||||
def extract_claims_from_block(text: str) -> list[dict]:
|
||||
"""Extract individual claims grouped by party from block-zayin text."""
|
||||
lines = text.split("\n")
|
||||
claims = []
|
||||
current_role = "appellant" # default if no header found
|
||||
current_claim_lines = []
|
||||
claim_index = 0
|
||||
|
||||
for line in lines:
|
||||
stripped = line.strip()
|
||||
if not stripped:
|
||||
continue
|
||||
|
||||
# Check for party header — must be a SHORT line (header, not claim content)
|
||||
role = detect_party_role(stripped) if len(stripped.split()) <= 8 else None
|
||||
if role:
|
||||
# Save accumulated claim
|
||||
if current_claim_lines:
|
||||
claim_text = "\n".join(current_claim_lines).strip()
|
||||
if len(claim_text) > 30:
|
||||
claims.append({
|
||||
"party_role": current_role,
|
||||
"claim_text": claim_text,
|
||||
"claim_index": claim_index,
|
||||
})
|
||||
claim_index += 1
|
||||
current_claim_lines = []
|
||||
current_role = role
|
||||
continue
|
||||
|
||||
# Skip generic section headers
|
||||
if is_section_header(stripped):
|
||||
# Save accumulated claim before skipping header
|
||||
if current_claim_lines:
|
||||
claim_text = "\n".join(current_claim_lines).strip()
|
||||
if len(claim_text) > 30:
|
||||
claims.append({
|
||||
"party_role": current_role,
|
||||
"claim_text": claim_text,
|
||||
"claim_index": claim_index,
|
||||
})
|
||||
claim_index += 1
|
||||
current_claim_lines = []
|
||||
continue
|
||||
|
||||
# Numbered sub-header in היטל השבחה style (e.g., "2. שיעור ההפקעה")
|
||||
# starts a new claim
|
||||
if is_numbered_sub_header(stripped):
|
||||
if current_claim_lines:
|
||||
claim_text = "\n".join(current_claim_lines).strip()
|
||||
if len(claim_text) > 30:
|
||||
claims.append({
|
||||
"party_role": current_role,
|
||||
"claim_text": claim_text,
|
||||
"claim_index": claim_index,
|
||||
})
|
||||
claim_index += 1
|
||||
current_claim_lines = [stripped]
|
||||
continue
|
||||
|
||||
# Each substantial paragraph is a separate claim
|
||||
# Save previous accumulated claim first
|
||||
if current_claim_lines:
|
||||
claim_text = "\n".join(current_claim_lines).strip()
|
||||
if len(claim_text) > 30:
|
||||
claims.append({
|
||||
"party_role": current_role,
|
||||
"claim_text": claim_text,
|
||||
"claim_index": claim_index,
|
||||
})
|
||||
claim_index += 1
|
||||
current_claim_lines = [stripped]
|
||||
|
||||
# Save last claim
|
||||
if current_claim_lines:
|
||||
claim_text = "\n".join(current_claim_lines).strip()
|
||||
if len(claim_text) > 30:
|
||||
claims.append({
|
||||
"party_role": current_role,
|
||||
"claim_text": claim_text,
|
||||
"claim_index": claim_index,
|
||||
})
|
||||
|
||||
return claims
|
||||
|
||||
|
||||
async def main():
|
||||
await init_schema()
|
||||
pool = await get_pool()
|
||||
|
||||
async with pool.acquire() as conn:
|
||||
# Get all block-zayin with content
|
||||
rows = await conn.fetch(
|
||||
"""SELECT c.id as case_id, c.case_number, c.title,
|
||||
db.content
|
||||
FROM decision_blocks db
|
||||
JOIN decisions d ON d.id = db.decision_id
|
||||
JOIN cases c ON c.id = d.case_id
|
||||
WHERE db.block_id = 'block-zayin' AND db.word_count > 0
|
||||
ORDER BY c.case_number"""
|
||||
)
|
||||
|
||||
total_claims = 0
|
||||
|
||||
for row in rows:
|
||||
case_id = row["case_id"]
|
||||
case_number = row["case_number"]
|
||||
text = row["content"]
|
||||
|
||||
claims = extract_claims_from_block(text)
|
||||
|
||||
print(f"\n{'='*50}")
|
||||
print(f"תיק: {case_number} — {row['title']}")
|
||||
print(f"{'='*50}")
|
||||
|
||||
async with pool.acquire() as conn:
|
||||
# Delete existing claims for this case
|
||||
await conn.execute("DELETE FROM claims WHERE case_id = $1", case_id)
|
||||
|
||||
role_counts = {}
|
||||
for claim in claims:
|
||||
role = claim["party_role"]
|
||||
role_counts[role] = role_counts.get(role, 0) + 1
|
||||
|
||||
await conn.execute(
|
||||
"""INSERT INTO claims (case_id, party_role, claim_text, claim_index, source_document)
|
||||
VALUES ($1, $2, $3, $4, $5)""",
|
||||
case_id,
|
||||
claim["party_role"],
|
||||
claim["claim_text"],
|
||||
claim["claim_index"],
|
||||
"block-zayin",
|
||||
)
|
||||
|
||||
for role, count in sorted(role_counts.items()):
|
||||
role_heb = {
|
||||
"appellant": "עוררים",
|
||||
"committee": "ועדה מקומית",
|
||||
"respondent": "משיבים",
|
||||
"permit_applicant": "מבקשי היתר",
|
||||
"appraiser": "שמאי",
|
||||
}.get(role, role)
|
||||
print(f" {role_heb:20s} — {count} טענות")
|
||||
|
||||
total_claims += len(claims)
|
||||
print(f" סה\"כ: {len(claims)} טענות")
|
||||
|
||||
# Summary
|
||||
async with pool.acquire() as conn:
|
||||
total = await conn.fetchval("SELECT count(*) FROM claims")
|
||||
by_role = await conn.fetch(
|
||||
"SELECT party_role, count(*) as cnt FROM claims GROUP BY party_role ORDER BY cnt DESC"
|
||||
)
|
||||
|
||||
print(f"\n{'='*50}")
|
||||
print(f"סיכום כללי — {total} טענות מ-{len(rows)} החלטות")
|
||||
for r in by_role:
|
||||
print(f" {r['party_role']:20s} — {r['cnt']}")
|
||||
|
||||
await close_pool()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
177
scripts/generate-embeddings.py
Normal file
177
scripts/generate-embeddings.py
Normal file
@@ -0,0 +1,177 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Generate embeddings for decision blocks and case law.
|
||||
|
||||
Creates:
|
||||
- paragraph_embeddings: for each decision block with content
|
||||
- case_law_embeddings: for each case law summary
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from uuid import UUID
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "mcp-server" / "src"))
|
||||
|
||||
from legal_mcp.services.db import get_pool, init_schema, close_pool
|
||||
from legal_mcp.services.embeddings import embed_texts
|
||||
from legal_mcp import config
|
||||
|
||||
|
||||
async def generate_block_embeddings(conn) -> int:
|
||||
"""Generate embeddings for decision blocks.
|
||||
|
||||
First creates decision_paragraphs records from block content,
|
||||
then generates embeddings in paragraph_embeddings.
|
||||
"""
|
||||
blocks = await conn.fetch(
|
||||
"""SELECT db.id as block_id, db.decision_id, db.block_id as block_type,
|
||||
db.content, db.word_count, c.case_number
|
||||
FROM decision_blocks db
|
||||
JOIN decisions d ON d.id = db.decision_id
|
||||
JOIN cases c ON c.id = d.case_id
|
||||
WHERE db.word_count > 10
|
||||
AND db.block_id NOT IN ('block-alef', 'block-bet', 'block-gimel', 'block-dalet')
|
||||
ORDER BY c.case_number, db.block_index"""
|
||||
)
|
||||
|
||||
if not blocks:
|
||||
print(" אין בלוקים ליצירת embeddings")
|
||||
return 0
|
||||
|
||||
print(f" מעבד {len(blocks)} בלוקים...")
|
||||
|
||||
# Create paragraphs and collect texts for embedding
|
||||
para_records = []
|
||||
para_number = 1
|
||||
|
||||
for block in blocks:
|
||||
content = block["content"]
|
||||
words = content.split()
|
||||
|
||||
# Split into chunks for embedding
|
||||
if len(words) <= 600:
|
||||
chunk_texts = [content]
|
||||
else:
|
||||
chunk_texts = []
|
||||
for start in range(0, len(words), 400):
|
||||
chunk_words = words[start:start + 500]
|
||||
if len(chunk_words) > 50:
|
||||
chunk_texts.append(" ".join(chunk_words))
|
||||
|
||||
for chunk_text in chunk_texts:
|
||||
# Create decision_paragraph record
|
||||
para_id = await conn.fetchval(
|
||||
"""INSERT INTO decision_paragraphs
|
||||
(block_id, paragraph_number, content, word_count)
|
||||
VALUES ($1, $2, $3, $4)
|
||||
ON CONFLICT DO NOTHING
|
||||
RETURNING id""",
|
||||
block["block_id"],
|
||||
para_number,
|
||||
chunk_text,
|
||||
len(chunk_text.split()),
|
||||
)
|
||||
if para_id:
|
||||
para_records.append({
|
||||
"para_id": para_id,
|
||||
"text": chunk_text,
|
||||
"case_number": block["case_number"],
|
||||
})
|
||||
para_number += 1
|
||||
|
||||
if not para_records:
|
||||
print(" אין פסקאות חדשות")
|
||||
return 0
|
||||
|
||||
print(f" {len(para_records)} פסקאות נוצרו, מייצר embeddings...")
|
||||
|
||||
# Generate embeddings in batches
|
||||
texts = [p["text"] for p in para_records]
|
||||
embeddings = await embed_texts(texts, input_type="document")
|
||||
|
||||
# Store embeddings
|
||||
count = 0
|
||||
for para, embedding in zip(para_records, embeddings):
|
||||
await conn.execute(
|
||||
"""INSERT INTO paragraph_embeddings (paragraph_id, embedding)
|
||||
VALUES ($1, $2)""",
|
||||
para["para_id"],
|
||||
embedding,
|
||||
)
|
||||
count += 1
|
||||
|
||||
return count
|
||||
|
||||
|
||||
async def generate_case_law_embeddings(conn) -> int:
|
||||
"""Generate embeddings for case law summaries."""
|
||||
cases = await conn.fetch(
|
||||
"""SELECT id, case_number, case_name, summary, key_quote
|
||||
FROM case_law
|
||||
WHERE summary != '' OR key_quote != ''"""
|
||||
)
|
||||
|
||||
# Filter out existing
|
||||
existing = await conn.fetch("SELECT case_law_id FROM case_law_embeddings")
|
||||
existing_ids = {r["case_law_id"] for r in existing}
|
||||
|
||||
to_embed = [c for c in cases if c["id"] not in existing_ids]
|
||||
|
||||
if not to_embed:
|
||||
print(" אין פסיקה חדשה ליצירת embeddings")
|
||||
return 0
|
||||
|
||||
print(f" מייצר embeddings ל-{len(to_embed)} תקדימים...")
|
||||
|
||||
texts = []
|
||||
for c in to_embed:
|
||||
# Combine case info into a searchable text
|
||||
text = f"{c['case_number']} {c['case_name']}: {c['summary']}"
|
||||
if c["key_quote"]:
|
||||
text += f" ציטוט: {c['key_quote']}"
|
||||
texts.append(text)
|
||||
|
||||
embeddings = await embed_texts(texts, input_type="document")
|
||||
|
||||
count = 0
|
||||
for case, embedding in zip(to_embed, embeddings):
|
||||
await conn.execute(
|
||||
"""INSERT INTO case_law_embeddings (case_law_id, chunk_text, embedding)
|
||||
VALUES ($1, $2, $3)""",
|
||||
case["id"],
|
||||
f"{case['case_number']} {case['case_name']}: {case['summary']}",
|
||||
embedding,
|
||||
)
|
||||
count += 1
|
||||
|
||||
return count
|
||||
|
||||
|
||||
async def main():
|
||||
await init_schema()
|
||||
pool = await get_pool()
|
||||
|
||||
async with pool.acquire() as conn:
|
||||
print("שלב 1: embeddings לבלוקי החלטה")
|
||||
block_count = await generate_block_embeddings(conn)
|
||||
print(f" ✅ {block_count} embeddings נוצרו")
|
||||
|
||||
print("\nשלב 2: embeddings לפסיקה")
|
||||
cl_count = await generate_case_law_embeddings(conn)
|
||||
print(f" ✅ {cl_count} embeddings נוצרו")
|
||||
|
||||
# Summary
|
||||
para_total = await conn.fetchval("SELECT count(*) FROM paragraph_embeddings")
|
||||
cl_total = await conn.fetchval("SELECT count(*) FROM case_law_embeddings")
|
||||
|
||||
await close_pool()
|
||||
|
||||
print(f"\nסיכום:")
|
||||
print(f" סה\"כ paragraph_embeddings: {para_total}")
|
||||
print(f" סה\"כ case_law_embeddings: {cl_total}")
|
||||
print(f" מודל: {config.VOYAGE_MODEL}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
202
scripts/import-final-decisions.py
Normal file
202
scripts/import-final-decisions.py
Normal file
@@ -0,0 +1,202 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Import 6 final signed decisions: extract text, store in DB."""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import sys
|
||||
from datetime import date
|
||||
from pathlib import Path
|
||||
from uuid import UUID
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "mcp-server" / "src"))
|
||||
|
||||
import fitz # PyMuPDF
|
||||
from docx import Document as DocxDocument
|
||||
|
||||
from legal_mcp.services.db import get_pool, init_schema, close_pool
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════
|
||||
# 6 Final Decisions
|
||||
# ═══════════════════════════════════════════════════════════════════
|
||||
|
||||
FINAL_DECISIONS = [
|
||||
{
|
||||
"case_number": "1180-1181",
|
||||
"file_path": "legacy/dafna-tamir/04_Archive/ערר 1180-1181 הכט/החלטה/הכט 1180-1181.pdf",
|
||||
"title": "החלטה סופית — הכט 1180-1181",
|
||||
"outcome": "rejected",
|
||||
"decision_date": date(2026, 2, 5),
|
||||
},
|
||||
{
|
||||
"case_number": "8255-25",
|
||||
"file_path": "legacy/dafna-tamir/04_Archive/בל\"מ 8255-25 אפרים אבי נ' הוועדה המקומית לתכנון ובניה/החלטה/אליהו הרנון - להפצה.docx",
|
||||
"title": "החלטה סופית — אפרים אבי 8255-25",
|
||||
"outcome": "rejected",
|
||||
"decision_date": None,
|
||||
},
|
||||
{
|
||||
"case_number": "8007-24",
|
||||
"file_path": "legacy/dafna-tamir/04_Archive/ערר 8007-24-עומר דרוויש-ערר על שומה מכרעת/החלטה/החלטה-סופית.docx",
|
||||
"title": "החלטה סופית — עומר דרוויש 8007-24",
|
||||
"outcome": "",
|
||||
"decision_date": None,
|
||||
},
|
||||
{
|
||||
"case_number": "1113/25",
|
||||
"file_path": "legacy/dafna-tamir/04_Archive/ערר-1113-25-אייל-מבורך/החלטה/החלטה-1113-25-טיוטה-סופית.docx",
|
||||
"title": "החלטה סופית — מבורך 1113-25",
|
||||
"outcome": "",
|
||||
"decision_date": None,
|
||||
},
|
||||
{
|
||||
"case_number": "1126/25+1141/25",
|
||||
"file_path": "legacy/dafna-tamir/04_Archive/ערר-1126-25-תמא-38-בית-הכרם/החלטה/בית הכרם-טיוטת החלטה-9.pdf",
|
||||
"title": "החלטה סופית — בית הכרם 1126/25",
|
||||
"outcome": "partial",
|
||||
"decision_date": date(2026, 3, 1),
|
||||
},
|
||||
{
|
||||
"case_number": "1128/25",
|
||||
"file_path": "legacy/dafna-tamir/04_Archive/ערר-1128-25-שטרית/החלטה/1128-25 החלטה להפצה.pdf",
|
||||
"title": "החלטה סופית — שטרית 1128-25",
|
||||
"outcome": "",
|
||||
"decision_date": None,
|
||||
},
|
||||
]
|
||||
|
||||
PROJECT_ROOT = Path(__file__).parent.parent
|
||||
|
||||
|
||||
def extract_pdf_text(file_path: Path) -> str:
|
||||
"""Extract text from PDF using PyMuPDF."""
|
||||
doc = fitz.open(str(file_path))
|
||||
text_parts = []
|
||||
for page in doc:
|
||||
text_parts.append(page.get_text())
|
||||
doc.close()
|
||||
return "\n".join(text_parts)
|
||||
|
||||
|
||||
def extract_docx_text(file_path: Path) -> str:
|
||||
"""Extract text from DOCX."""
|
||||
doc = DocxDocument(str(file_path))
|
||||
return "\n".join(p.text for p in doc.paragraphs if p.text.strip())
|
||||
|
||||
|
||||
def extract_text(file_path: Path) -> str:
|
||||
"""Extract text based on file extension."""
|
||||
suffix = file_path.suffix.lower()
|
||||
if suffix == ".pdf":
|
||||
return extract_pdf_text(file_path)
|
||||
elif suffix == ".docx":
|
||||
return extract_docx_text(file_path)
|
||||
else:
|
||||
raise ValueError(f"Unsupported format: {suffix}")
|
||||
|
||||
|
||||
def count_words(text: str) -> int:
|
||||
return len(text.split())
|
||||
|
||||
|
||||
async def main():
|
||||
await init_schema()
|
||||
pool = await get_pool()
|
||||
|
||||
for d in FINAL_DECISIONS:
|
||||
file_path = PROJECT_ROOT / d["file_path"]
|
||||
if not file_path.exists():
|
||||
print(f"❌ קובץ לא נמצא: {file_path}")
|
||||
continue
|
||||
|
||||
# Extract text
|
||||
print(f"\nמחלץ טקסט: {d['title']}...")
|
||||
text = extract_text(file_path)
|
||||
word_count = count_words(text)
|
||||
print(f" {word_count} מילים, {len(text)} תווים")
|
||||
|
||||
async with pool.acquire() as conn:
|
||||
# Get case_id
|
||||
case_id = await conn.fetchval(
|
||||
"SELECT id FROM cases WHERE case_number = $1", d["case_number"]
|
||||
)
|
||||
if not case_id:
|
||||
print(f" ⚠ תיק {d['case_number']} לא נמצא ב-DB — מדלג")
|
||||
continue
|
||||
|
||||
# Register document
|
||||
existing_doc = await conn.fetchval(
|
||||
"SELECT id FROM documents WHERE file_path = $1",
|
||||
str(file_path),
|
||||
)
|
||||
if existing_doc:
|
||||
doc_id = existing_doc
|
||||
print(f" מסמך כבר קיים ב-DB: {doc_id}")
|
||||
# Update text
|
||||
await conn.execute(
|
||||
"""UPDATE documents SET extracted_text = $1, extraction_status = 'completed'
|
||||
WHERE id = $2""",
|
||||
text, doc_id,
|
||||
)
|
||||
else:
|
||||
doc_id = await conn.fetchval(
|
||||
"""INSERT INTO documents (case_id, doc_type, title, file_path, extracted_text, extraction_status, page_count)
|
||||
VALUES ($1, 'decision', $2, $3, $4, 'completed', $5)
|
||||
RETURNING id""",
|
||||
case_id, d["title"], str(file_path), text,
|
||||
len(fitz.open(str(file_path))) if file_path.suffix == ".pdf" else None,
|
||||
)
|
||||
print(f" מסמך נרשם: {doc_id}")
|
||||
|
||||
# Create/update decision record
|
||||
existing_decision = await conn.fetchval(
|
||||
"SELECT id FROM decisions WHERE case_id = $1", case_id
|
||||
)
|
||||
if existing_decision:
|
||||
await conn.execute(
|
||||
"""UPDATE decisions SET status = 'final', outcome = $1, total_words = $2,
|
||||
decision_date = $3, updated_at = now() WHERE id = $4""",
|
||||
d["outcome"], word_count, d["decision_date"], existing_decision,
|
||||
)
|
||||
decision_id = existing_decision
|
||||
print(f" החלטה עודכנה: {decision_id}")
|
||||
else:
|
||||
decision_id = await conn.fetchval(
|
||||
"""INSERT INTO decisions (case_id, version, status, outcome, outcome_summary,
|
||||
total_words, decision_date, author)
|
||||
VALUES ($1, 1, 'final', $2, $3, $4, $5, 'דפנה תמיר')
|
||||
RETURNING id""",
|
||||
case_id, d["outcome"], d["title"], word_count, d["decision_date"],
|
||||
)
|
||||
print(f" החלטה נוצרה: {decision_id}")
|
||||
|
||||
# Update case status
|
||||
await conn.execute(
|
||||
"UPDATE cases SET status = 'final', expected_outcome = $1, updated_at = now() WHERE id = $2",
|
||||
d["outcome"], case_id,
|
||||
)
|
||||
|
||||
print(f" ✅ הושלם: {d['case_number']}")
|
||||
|
||||
# Summary
|
||||
async with pool.acquire() as conn:
|
||||
doc_count = await conn.fetchval(
|
||||
"SELECT count(*) FROM documents WHERE doc_type = 'decision' AND extraction_status = 'completed'"
|
||||
)
|
||||
dec_count = await conn.fetchval(
|
||||
"SELECT count(*) FROM decisions WHERE status = 'final'"
|
||||
)
|
||||
total_words = await conn.fetchval(
|
||||
"SELECT sum(total_words) FROM decisions WHERE status = 'final'"
|
||||
)
|
||||
|
||||
await close_pool()
|
||||
|
||||
print(f"\n{'='*50}")
|
||||
print(f"✅ סה\"כ מסמכי החלטה: {doc_count}")
|
||||
print(f"✅ סה\"כ החלטות סופיות: {dec_count}")
|
||||
print(f"✅ סה\"כ מילים: {total_words:,}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
118
scripts/link-claims-to-discussion.py
Normal file
118
scripts/link-claims-to-discussion.py
Normal file
@@ -0,0 +1,118 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Link claims to discussion paragraphs using semantic similarity.
|
||||
|
||||
For each claim, finds the most similar paragraph in block-yod of the same decision.
|
||||
Updates claims.addressed_in_paragraph with the paragraph number.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from uuid import UUID
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "mcp-server" / "src"))
|
||||
|
||||
from legal_mcp.services.db import get_pool, init_schema, close_pool
|
||||
from legal_mcp.services.embeddings import embed_texts
|
||||
|
||||
|
||||
async def main():
|
||||
await init_schema()
|
||||
pool = await get_pool()
|
||||
|
||||
async with pool.acquire() as conn:
|
||||
# Get all cases with both claims and discussion blocks
|
||||
cases = await conn.fetch(
|
||||
"""SELECT DISTINCT c.id as case_id, c.case_number
|
||||
FROM cases c
|
||||
JOIN claims cl ON cl.case_id = c.id
|
||||
JOIN decisions d ON d.case_id = c.id
|
||||
JOIN decision_blocks db ON db.decision_id = d.id AND db.block_id = 'block-yod' AND db.word_count > 0
|
||||
ORDER BY c.case_number"""
|
||||
)
|
||||
|
||||
total_linked = 0
|
||||
|
||||
for case in cases:
|
||||
case_id = case["case_id"]
|
||||
case_number = case["case_number"]
|
||||
|
||||
async with pool.acquire() as conn:
|
||||
# Get claims for this case
|
||||
claims = await conn.fetch(
|
||||
"SELECT id, claim_text, party_role, claim_index FROM claims WHERE case_id = $1 ORDER BY claim_index",
|
||||
case_id,
|
||||
)
|
||||
|
||||
# Get discussion paragraphs (split block-yod into paragraphs)
|
||||
yod_content = await conn.fetchval(
|
||||
"""SELECT db.content FROM decision_blocks db
|
||||
JOIN decisions d ON d.id = db.decision_id
|
||||
WHERE d.case_id = $1 AND db.block_id = 'block-yod'""",
|
||||
case_id,
|
||||
)
|
||||
|
||||
if not yod_content or not claims:
|
||||
continue
|
||||
|
||||
# Split discussion into paragraphs
|
||||
disc_paragraphs = [p.strip() for p in yod_content.split("\n") if p.strip() and len(p.strip()) > 30]
|
||||
|
||||
if not disc_paragraphs:
|
||||
continue
|
||||
|
||||
print(f"\n{case_number}: {len(claims)} טענות ← {len(disc_paragraphs)} פסקאות דיון")
|
||||
|
||||
# Embed all claims and discussion paragraphs
|
||||
claim_texts = [c["claim_text"][:500] for c in claims]
|
||||
all_texts = claim_texts + disc_paragraphs
|
||||
|
||||
embeddings = await embed_texts(all_texts, input_type="document")
|
||||
|
||||
claim_embeddings = embeddings[:len(claims)]
|
||||
disc_embeddings = embeddings[len(claims):]
|
||||
|
||||
# For each claim, find the best matching discussion paragraph
|
||||
linked = 0
|
||||
async with pool.acquire() as conn:
|
||||
for i, claim in enumerate(claims):
|
||||
claim_emb = claim_embeddings[i]
|
||||
|
||||
# Cosine similarity
|
||||
best_score = -1
|
||||
best_para_idx = -1
|
||||
for j, disc_emb in enumerate(disc_embeddings):
|
||||
dot = sum(a * b for a, b in zip(claim_emb, disc_emb))
|
||||
norm_a = sum(a * a for a in claim_emb) ** 0.5
|
||||
norm_b = sum(b * b for b in disc_emb) ** 0.5
|
||||
score = dot / (norm_a * norm_b) if norm_a > 0 and norm_b > 0 else 0
|
||||
|
||||
if score > best_score:
|
||||
best_score = score
|
||||
best_para_idx = j
|
||||
|
||||
if best_para_idx >= 0 and best_score > 0.3:
|
||||
# paragraph_number is 1-indexed
|
||||
para_num = best_para_idx + 1
|
||||
await conn.execute(
|
||||
"UPDATE claims SET addressed_in_paragraph = $1 WHERE id = $2",
|
||||
para_num, claim["id"],
|
||||
)
|
||||
linked += 1
|
||||
|
||||
total_linked += linked
|
||||
print(f" קושרו: {linked}/{len(claims)} טענות (ציון מינימלי: 0.3)")
|
||||
|
||||
# Summary
|
||||
async with pool.acquire() as conn:
|
||||
total_claims = await conn.fetchval("SELECT count(*) FROM claims")
|
||||
linked_claims = await conn.fetchval("SELECT count(*) FROM claims WHERE addressed_in_paragraph IS NOT NULL")
|
||||
|
||||
await close_pool()
|
||||
|
||||
print(f"\n{'='*50}")
|
||||
print(f"סיכום: {linked_claims}/{total_claims} טענות קושרו לפסקאות דיון ({linked_claims/total_claims*100:.0f}%)")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
254
scripts/seed-appeals.py
Normal file
254
scripts/seed-appeals.py
Normal file
@@ -0,0 +1,254 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Seed appeals (cases) from legacy vault metadata."""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import sys
|
||||
from datetime import date
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "mcp-server" / "src"))
|
||||
|
||||
from legal_mcp.services.db import get_pool, init_schema, close_pool
|
||||
|
||||
|
||||
APPEALS = [
|
||||
# ── Active (01_Projects) ──
|
||||
{
|
||||
"case_number": "1130/25",
|
||||
"title": "ערר קרית יערים-1 — קובר",
|
||||
"appellants": ["מרק קובר", "יצחק מטמון"],
|
||||
"respondents": ["הוועדה המרחבית הראל", "ליבמן"],
|
||||
"subject": "ערר על אישור תכנית להוספת קומה וזכויות בנייה",
|
||||
"property_address": "רח' אבינדב 23, קריית יערים",
|
||||
"status": "in_progress",
|
||||
"expected_outcome": "partial",
|
||||
},
|
||||
{
|
||||
"case_number": "1194/25+1199/25",
|
||||
"title": "ערר קרית יערים-2 — מטמון/קובר",
|
||||
"appellants": ["יצחק מטמון", "מרק קובר"],
|
||||
"respondents": ["הוועדה המקומית"],
|
||||
"subject": "תוספת קומה + הגדלת זכויות בנייה",
|
||||
"property_address": "חלקה 240, גוש 29536, רח' אבינדב",
|
||||
"status": "new",
|
||||
"expected_outcome": "",
|
||||
},
|
||||
{
|
||||
"case_number": "8027-25",
|
||||
"title": "ערר היטל השבחה תחכמוני 20",
|
||||
"appellants": ["עובדיה", "מירב", "ווינשטיין ואח'"],
|
||||
"respondents": ["הוועדה המקומית ירושלים"],
|
||||
"subject": "היטל השבחה",
|
||||
"property_address": "רח' תחכמוני, ירושלים, גוש 30069, חלקה 156",
|
||||
"status": "new",
|
||||
"expected_outcome": "",
|
||||
},
|
||||
# ── Archived — completed decisions ──
|
||||
{
|
||||
"case_number": "1180-1181",
|
||||
"title": "ערר הכט",
|
||||
"appellants": [],
|
||||
"respondents": [],
|
||||
"subject": "רישוי ובנייה",
|
||||
"property_address": "",
|
||||
"status": "final",
|
||||
"expected_outcome": "rejected",
|
||||
"notes": "פורסם 05.02.2026. דחייה. שימש כמודל לניתוח סגנון.",
|
||||
},
|
||||
{
|
||||
"case_number": "1126/25+1141/25",
|
||||
"title": "תמ\"א 38/2 בית הכרם",
|
||||
"appellants": ["מרכז קהילתי זיו-מרקס", "12 תושבים"],
|
||||
"respondents": ["הוועדה המקומית", "יזם"],
|
||||
"subject": "תמ\"א 38/2 הריסה ובנייה מחדש",
|
||||
"property_address": "רח' החלוץ 36, בית הכרם, גוש 30159/6",
|
||||
"status": "final",
|
||||
"expected_outcome": "partial",
|
||||
"notes": "גרסה סופית טיוטה 9, מרץ 2026. קבלה חלקית. שימש כמודל לניתוח סגנון.",
|
||||
},
|
||||
{
|
||||
"case_number": "8255-25",
|
||||
"title": "בל\"מ אפרים אבי",
|
||||
"appellants": ["אפרים אברהם"],
|
||||
"respondents": ["הוועדה המקומית ירושלים"],
|
||||
"subject": "היטל השבחה — בקשה להארכת מועד",
|
||||
"property_address": "רח' הורקניה 4, קטמונים, ירושלים",
|
||||
"status": "final",
|
||||
"expected_outcome": "rejected",
|
||||
"notes": "גרסה סופית מאושרת. דחייה.",
|
||||
},
|
||||
# ── Archived — unified decisions ──
|
||||
{
|
||||
"case_number": "8107-25",
|
||||
"title": "אבו זאהריה",
|
||||
"appellants": ["אבו זאהריה מפיד"],
|
||||
"respondents": ["הוועדה המקומית ירושלים"],
|
||||
"subject": "ערר על החלטת שמאי מכריע — היטל השבחה",
|
||||
"property_address": "רח' אום כולתום 26, בית חנינא, גוש 30615, חלקה 69",
|
||||
"status": "final",
|
||||
"expected_outcome": "",
|
||||
"notes": "החלטה מאחדת: ערר גפני.",
|
||||
},
|
||||
{
|
||||
"case_number": "9005-24",
|
||||
"title": "רמת שלמה — פיצויים ס' 197",
|
||||
"appellants": ["קירמאיר אסתר ואח' (63-67 עוררים)"],
|
||||
"respondents": ["הוועדה המקומית ירושלים"],
|
||||
"subject": "פיצויים לפי סעיף 197",
|
||||
"property_address": "רמת שלמה, ירושלים, גוש 30561, חלקות 36, 40",
|
||||
"status": "final",
|
||||
"expected_outcome": "",
|
||||
"notes": "החלטה מאחדת: ערר ורדי 9003-23.",
|
||||
},
|
||||
# ── Archived — in progress ──
|
||||
{
|
||||
"case_number": "1113/25",
|
||||
"title": "אייל מבורך לוי ואברהם עדי",
|
||||
"appellants": ["אייל מבורך לוי", "אברהם עדי"],
|
||||
"respondents": ["הוועדה המקומית הראל"],
|
||||
"subject": "הרחבת דירות + תוספת 2 יח\"ד",
|
||||
"property_address": "רח' השלום 63, מבשרת ציון, גוש 30475, חלקה 5",
|
||||
"status": "in_progress",
|
||||
"expected_outcome": "",
|
||||
},
|
||||
{
|
||||
"case_number": "1128/25",
|
||||
"title": "שטרית",
|
||||
"appellants": [],
|
||||
"respondents": [],
|
||||
"subject": "",
|
||||
"property_address": "",
|
||||
"status": "drafted",
|
||||
"expected_outcome": "",
|
||||
},
|
||||
{
|
||||
"case_number": "1107/06/25",
|
||||
"title": "בלוי נ' הוועדה המקומית",
|
||||
"appellants": ["בלוי מאיר", "מזיע מאיר", "דזימיטרובסקי הדסה"],
|
||||
"respondents": ["הוועדה המקומית ירושלים", "היזם"],
|
||||
"subject": "תוספת בנייה וחיזוק מפני רעידות (תמ\"א 38/1)",
|
||||
"property_address": "רח' הרב בלוי 16, ירושלים, גוש 30099/115",
|
||||
"status": "in_progress",
|
||||
"expected_outcome": "",
|
||||
},
|
||||
{
|
||||
"case_number": "8141-23",
|
||||
"title": "אזורים בנין",
|
||||
"appellants": ["אזורים בנין (1965) בע\"מ"],
|
||||
"respondents": ["הוועדה המקומית ירושלים"],
|
||||
"subject": "היטל השבחה — תכנית 101-0611905",
|
||||
"property_address": "רח' הנביאים 27, ירושלים",
|
||||
"status": "drafted",
|
||||
"expected_outcome": "",
|
||||
},
|
||||
{
|
||||
"case_number": "8047-24",
|
||||
"title": "משכן אליהו — היטל השבחה שמאי מכריע",
|
||||
"appellants": ["עומר דרוויש"],
|
||||
"respondents": ["הוועדה המקומית ירושלים"],
|
||||
"subject": "ערר על שמאית מכריעה — היטל השבחה",
|
||||
"property_address": "גוש 30614, חלקה 89, בית חנינא",
|
||||
"status": "in_progress",
|
||||
"expected_outcome": "",
|
||||
},
|
||||
{
|
||||
"case_number": "1195-25",
|
||||
"title": "וליד ג'מל",
|
||||
"appellants": ["וליד ג'מל"],
|
||||
"respondents": ["ועדת משנה מטה יהודה", "סמיר מוסא זעאתרה"],
|
||||
"subject": "הסדרת קומה שלישית למשרדים",
|
||||
"property_address": "גוש 30492, חלקה 23, כפר עין נקובא",
|
||||
"status": "in_progress",
|
||||
"expected_outcome": "",
|
||||
},
|
||||
{
|
||||
"case_number": "1200/25",
|
||||
"title": "קרית ענבים נופש",
|
||||
"appellants": ["קרית ענבים נופש בע\"מ"],
|
||||
"respondents": ["הוועדה המקומית מטה יהודה", "חברי קיבוץ קרית ענבים"],
|
||||
"subject": "שימוש חורג — סופרמרקט בייעוד ספורט ונופש",
|
||||
"property_address": "קיבוץ קרית ענבים, גוש 29551",
|
||||
"status": "in_progress",
|
||||
"expected_outcome": "",
|
||||
},
|
||||
{
|
||||
"case_number": "1184/25",
|
||||
"title": "שטוקהיים — בית נקופה",
|
||||
"appellants": ["אמנון שטוקהיים", "אילנית שטוקהיים"],
|
||||
"respondents": ["הוועדה המקומית מטה יהודה", "יערה טל"],
|
||||
"subject": "אישור בקשה להיתר עם הקלות",
|
||||
"property_address": "מגרש 51, גוש 31399, חלקה 52, בית נקופה",
|
||||
"status": "in_progress",
|
||||
"expected_outcome": "",
|
||||
},
|
||||
{
|
||||
"case_number": "8070-25",
|
||||
"title": "היטל השבחה — דירת גג",
|
||||
"appellants": ["חיים ראם"],
|
||||
"respondents": ["הוועדה המקומית ירושלים"],
|
||||
"subject": "היטל השבחה — הקלה להשלמת דירת גג",
|
||||
"property_address": "רח' צ.פ. חיות 2, דירה 31, נווה יעקב",
|
||||
"status": "in_progress",
|
||||
"expected_outcome": "",
|
||||
},
|
||||
{
|
||||
"case_number": "8136-24",
|
||||
"title": "ערר השבחה — מרפסות שירות",
|
||||
"appellants": [],
|
||||
"respondents": [],
|
||||
"subject": "היטל השבחה — מרפסות שירות",
|
||||
"property_address": "",
|
||||
"status": "in_progress",
|
||||
"expected_outcome": "",
|
||||
},
|
||||
{
|
||||
"case_number": "8007-24",
|
||||
"title": "עומר דרוויש — שומה מכרעת",
|
||||
"appellants": [],
|
||||
"respondents": [],
|
||||
"subject": "היטל השבחה",
|
||||
"property_address": "",
|
||||
"status": "in_progress",
|
||||
"expected_outcome": "",
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
async def main():
|
||||
await init_schema()
|
||||
pool = await get_pool()
|
||||
|
||||
inserted = 0
|
||||
skipped = 0
|
||||
async with pool.acquire() as conn:
|
||||
for a in APPEALS:
|
||||
existing = await conn.fetchval(
|
||||
"SELECT id FROM cases WHERE case_number = $1", a["case_number"]
|
||||
)
|
||||
if existing:
|
||||
skipped += 1
|
||||
continue
|
||||
await conn.execute(
|
||||
"""INSERT INTO cases
|
||||
(case_number, title, appellants, respondents, subject,
|
||||
property_address, status, expected_outcome, notes)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)""",
|
||||
a["case_number"],
|
||||
a["title"],
|
||||
json.dumps(a.get("appellants", [])),
|
||||
json.dumps(a.get("respondents", [])),
|
||||
a.get("subject", ""),
|
||||
a.get("property_address", ""),
|
||||
a.get("status", "new"),
|
||||
a.get("expected_outcome", ""),
|
||||
a.get("notes", ""),
|
||||
)
|
||||
inserted += 1
|
||||
|
||||
await close_pool()
|
||||
print(f"✓ appeals: {inserted} inserted, {skipped} skipped (already exist)")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
449
scripts/seed-knowledge.py
Normal file
449
scripts/seed-knowledge.py
Normal file
@@ -0,0 +1,449 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Seed knowledge tables from legacy vault data.
|
||||
|
||||
Imports: lessons_learned, transition_phrases, case_law, statutory_provisions.
|
||||
Sources: memory/legal-decision-lessons.md, skill-legal-decision/SKILL.md
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add mcp-server to path so we can reuse db module
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "mcp-server" / "src"))
|
||||
|
||||
from legal_mcp.services.db import get_pool, init_schema, close_pool
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════
|
||||
# Data: Lessons Learned
|
||||
# ═══════════════════════════════════════════════════════════════════
|
||||
|
||||
LESSONS = [
|
||||
# --- הכט 1180-1181 (rejected, 02.2026) ---
|
||||
{
|
||||
"lesson_title": "Discussion = continuous essay, no sub-headers",
|
||||
"lesson_text": "הדיון נקרא כחיבור משפטי רציף עם סעיפים ממוספרים, לא כמתווה מובנה עם כותרות משנה. הגרסה המפורסמת של הכט השתמשה באפס כותרות משנה בדיון, בעוד הטיוטה שלנו הכילה 6 כותרות H2.",
|
||||
"category": "structure",
|
||||
"applies_to": ["block-yod"],
|
||||
"source_case": "הכט 1180-1181",
|
||||
"severity": "critical",
|
||||
},
|
||||
{
|
||||
"lesson_title": "Citation through consolidating decision",
|
||||
"lesson_text": "להשתמש בהחלטה מאחדת קודמת (כמו ערר נגאח 1011-03-25) לצטט מספר תקדימים בפסקה אחת ארוכה (~600 מילים), במקום לצטט כל תקדים בפסקה נפרדת.",
|
||||
"category": "style",
|
||||
"applies_to": ["block-yod"],
|
||||
"source_case": "הכט 1180-1181",
|
||||
"severity": "important",
|
||||
},
|
||||
{
|
||||
"lesson_title": "Paragraph length variation in discussion",
|
||||
"lesson_text": "לא לפרגמנט טיעונים משפטיים ארוכים לפסקאות זהות וקצרות. לגוון אורך פסקאות מ-20 עד 600+ מילים. פסקאות ציטוט מרכזיות ארוכות מאוד.",
|
||||
"category": "style",
|
||||
"applies_to": ["block-yod"],
|
||||
"source_case": "הכט 1180-1181",
|
||||
"severity": "important",
|
||||
},
|
||||
{
|
||||
"lesson_title": "Opening formula promises both conclusion AND elaboration",
|
||||
"lesson_text": 'פתיחת הדיון צריכה להבטיח גם מסקנה וגם הרחבה: "לאחר שבחנו... החלטנו בשלב ראשון כי... אך יחד עם זאת ועל מנת לא לצאת בחסר... מצאנו להוסיף מספר הערות"',
|
||||
"category": "style",
|
||||
"applies_to": ["block-yod"],
|
||||
"source_case": "הכט 1180-1181",
|
||||
"severity": "important",
|
||||
},
|
||||
{
|
||||
"lesson_title": 'Summary title is "סיכום"',
|
||||
"lesson_text": 'כותרת פרק הסיכום היא "סיכום" בלבד, לא "סיכום והכרעה" ולא "סוף דבר".',
|
||||
"category": "structure",
|
||||
"applies_to": ["block-yod-alef"],
|
||||
"source_case": "הכט 1180-1181",
|
||||
"severity": "nice-to-have",
|
||||
},
|
||||
# --- בית הכרם 1126/25 (partial acceptance, 03.2026) ---
|
||||
{
|
||||
"lesson_title": "Threshold question is STRATEGIC, not mandatory",
|
||||
"lesson_text": "שאלת הסף (זכות ערר לפי ס' 152) היא כלי אסטרטגי, לא חובה. כשלתיק יש שאלות מהותיות חזקות (חניה, קווי בניין, שימור), דפנה מעדיפה להתעמק בתוכן על פני חסימה פרוצדורלית. זה גם מחזק את ההחלטה מפני ביקורת שיפוטית.",
|
||||
"category": "process",
|
||||
"applies_to": ["all"],
|
||||
"source_case": "בית הכרם 1126/25",
|
||||
"severity": "critical",
|
||||
},
|
||||
{
|
||||
"lesson_title": "Concentric circles = rejected appeals only",
|
||||
"lesson_text": 'מודל השכבות (עיגולים קונצנטריים, סעיף 6.3 ב-SKILL) הוא כלי אחד מתוך כמה, לא המסגרת הנדרשת. לעררים שמתקבלים חלקית, דפנה משתמשת בניתוח גמיש נושא-נושא.',
|
||||
"category": "process",
|
||||
"applies_to": ["block-yod"],
|
||||
"source_case": "בית הכרם 1126/25",
|
||||
"severity": "critical",
|
||||
},
|
||||
{
|
||||
"lesson_title": "New opening type: tension mapping",
|
||||
"lesson_text": 'לקבלה חלקית או תיקים עם סוגיות מורכבות מצטלבות, פתיחת "מיפוי מתחים": רשימה של 6+ מתחים ספציפיים בתבליטים לפני הניתוח. דפוס: "בערר דנן עולות שאלות כיצד והאם..." → רשימת מתחים → "כל הנקודות לעיל עומדות לפנינו..."',
|
||||
"category": "structure",
|
||||
"applies_to": ["block-yod"],
|
||||
"source_case": "בית הכרם 1126/25",
|
||||
"severity": "important",
|
||||
},
|
||||
{
|
||||
"lesson_title": "Single building weakens TAMA 38 interest",
|
||||
"lesson_text": 'כשתמ"א 38 חלה על בית בודד (לעומת בניין דירות גדול), אינטרס החיזוק מפני רעידת אדמה חלש יותר. זה מצדיק אישור זהיר יותר של זכויות, במיוחד קווי בניין וחניה.',
|
||||
"category": "content",
|
||||
"applies_to": ["block-yod"],
|
||||
"source_case": "בית הכרם 1126/25",
|
||||
"severity": "important",
|
||||
},
|
||||
{
|
||||
"lesson_title": "Master plan as shield against ad-hoc planning",
|
||||
"lesson_text": 'כשקיימת תכנית אב — לצטט אותה כדי לתת לגיטימציה להיתר בודד. מסקנה: ההיתר "משתלב בחזון כולל קיים" במקום ליצור תקדים אד-הוק.',
|
||||
"category": "content",
|
||||
"applies_to": ["block-yod"],
|
||||
"source_case": "בית הכרם 1126/25",
|
||||
"severity": "important",
|
||||
},
|
||||
{
|
||||
"lesson_title": "Deep plan provision citations for parking",
|
||||
"lesson_text": "לסוגיות חניה/תשתיות, דפנה נכנסת עמוק להוראות תכנית עם ציטוטים ישירים נרחבים (300+ מילים) וניתוח משולב. כולל מספרי סעיפים ספציפיים (לדוגמה: 6.8(4), 6.8(9), נספח תנועה, 5166b).",
|
||||
"category": "content",
|
||||
"applies_to": ["block-yod", "block-tet"],
|
||||
"source_case": "בית הכרם 1126/25",
|
||||
"severity": "important",
|
||||
},
|
||||
{
|
||||
"lesson_title": "Ultra-minimal summary for partial acceptance",
|
||||
"lesson_text": "בקבלה חלקית, כל ההנמקה כבר בדיון. סיכום = הוראות אופרטיביות בלבד (בדרך כלל 3 סעיפים קצרים). ללא דיון בהוצאות. ללא סיום חם.",
|
||||
"category": "structure",
|
||||
"applies_to": ["block-yod-alef"],
|
||||
"source_case": "בית הכרם 1126/25",
|
||||
"severity": "important",
|
||||
},
|
||||
# --- קרית יערים-1 (03.2026) ---
|
||||
{
|
||||
"lesson_title": "Neutral background rule",
|
||||
"lesson_text": 'רקע (בלוק ו) = עובדות אובייקטיביות בלבד. מבחן: האם המשפט מכיל ציטוט ישיר מצד, או מילות ערך/שיפוט (חריג, חטא, בעייתי)? אם כן → שייך בטענות (בלוק ז) או דיון (בלוק י), לא ברקע. החלטות קודמות = עובדה יבשה ("ביום X נדחתה תכנית Y"), ללא נימוקים וציטוטים.',
|
||||
"category": "structure",
|
||||
"applies_to": ["block-vav"],
|
||||
"source_case": "קרית יערים-1 (1130/25)",
|
||||
"severity": "critical",
|
||||
},
|
||||
{
|
||||
"lesson_title": "12-block mandatory structure",
|
||||
"lesson_text": 'מבנה 12 בלוקים פורמלי חובה עם שלב "טיוטת טרום-דיון". כולל: פתיחה (ה) → רקע (ו) → טענות (ז) → הליכים (ח) → תכניות (ט) → דיון (י) → סיכום (יא). חידוש מאריאלי: "ההליכים בפני ועדת הערר" כפרק נפרד. כל בלוק נכתב כאילו שופט בית משפט מנהלי קורא בפעם הראשונה.',
|
||||
"category": "structure",
|
||||
"applies_to": ["all"],
|
||||
"source_case": "קרית יערים-1 (1130/25)",
|
||||
"severity": "critical",
|
||||
},
|
||||
# --- Meta-lesson ---
|
||||
{
|
||||
"lesson_title": "Skill was over-indexed on single case type",
|
||||
"lesson_text": "ה-SKILL המקורי היה מבוסס יתר על מקרה אחד (הכט = דחייה). מודל העיגולים, שאלת סף כחובה, וסיום חם — כולם דפוסים מתיק בודד. בית הכרם (קבלה חלקית) חשף שהגישה של דפנה גמישה יותר ממה שתפסנו. צריך להבחין בין דפוסים אוניברסליים לתלויי-תוצאה.",
|
||||
"category": "process",
|
||||
"applies_to": ["all"],
|
||||
"source_case": "בית הכרם 1126/25",
|
||||
"severity": "critical",
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════
|
||||
# Data: Transition Phrases
|
||||
# ═══════════════════════════════════════════════════════════════════
|
||||
|
||||
TRANSITION_PHRASES = [
|
||||
# From הכט
|
||||
{"phrase": "ועל מנת לא לצאת בחסר", "usage_context": "פתיחת אוביטר דיקטה / הנמקה נוספת", "block_types": ["block-yod"], "source_decision": "הכט 1180-1181"},
|
||||
{"phrase": "נציין כי טענות אלו נטענו בלשון רפה", "usage_context": "הכרה בטענות חלשות תוך דיון בהן", "block_types": ["block-yod"], "source_decision": "הכט 1180-1181"},
|
||||
{"phrase": "עינינו הרואות", "usage_context": "סיכום אחרי ציטוט ארוך", "block_types": ["block-yod"], "source_decision": "הכט 1180-1181"},
|
||||
{"phrase": "נוסיף.", "usage_context": "מעבר קצר ביותר (מילה אחת) לנקודה הבאה", "block_types": ["block-yod"], "source_decision": "הכט 1180-1181"},
|
||||
{"phrase": "אם כך, לעת הזו", "usage_context": "הסקת מסקנה מציטוטים", "block_types": ["block-yod"], "source_decision": "הכט 1180-1181"},
|
||||
{"phrase": "למעלה מן הצורך", "usage_context": "דיון לא הכרחי להכרעה אך נכתב מטעמים אסטרטגיים", "block_types": ["block-yod"], "source_decision": "הכט 1180-1181"},
|
||||
{"phrase": "למיטב הבנתנו", "usage_context": "עמדה זהירה בשאלה משפטית לא מיושבת", "block_types": ["block-yod"], "source_decision": "הכט 1180-1181"},
|
||||
{"phrase": "נשלים ונציין", "usage_context": "נקודה אחרונה לפני מעבר לסיכום", "block_types": ["block-yod"], "source_decision": "הכט 1180-1181"},
|
||||
# From בית הכרם
|
||||
{"phrase": "הדברים משליכים על שיקול הדעת ב...", "usage_context": "קישור ממצא למסקנה", "block_types": ["block-yod"], "source_decision": "בית הכרם 1126/25"},
|
||||
{"phrase": "רוצה לומר כי", "usage_context": "ניסוח חלופי / הסבר", "block_types": ["block-yod"], "source_decision": "בית הכרם 1126/25"},
|
||||
{"phrase": "נוצר מצב בו", "usage_context": "הצגת מצב עובדתי / בעיה", "block_types": ["block-yod"], "source_decision": "בית הכרם 1126/25"},
|
||||
{"phrase": "לכך נוסיף כי", "usage_context": "הוספת שכבה נוספת לטיעון", "block_types": ["block-yod"], "source_decision": "בית הכרם 1126/25"},
|
||||
{"phrase": "יש אולי להצר על כך ש...", "usage_context": "הערה ביקורתית עדינה (כלפי רשות תכנון)", "block_types": ["block-yod"], "source_decision": "בית הכרם 1126/25"},
|
||||
{"phrase": "עם ההבנה לטענה זו של העוררים, אין בידנו לקבלה", "usage_context": "הכרה רכה בטענה תוך דחייתה", "block_types": ["block-yod"], "source_decision": "בית הכרם 1126/25"},
|
||||
# General (from SKILL.md)
|
||||
{"phrase": "ברי כי", "usage_context": "מסקנה מובנת מאליה", "block_types": ["block-yod"], "source_decision": ""},
|
||||
{"phrase": "נפנה ל...", "usage_context": "פתיחת ניתוח חוק/פסיקה", "block_types": ["block-yod"], "source_decision": ""},
|
||||
{"phrase": "מכל האמור לעיל", "usage_context": "מעבר לסיכום", "block_types": ["block-yod", "block-yod-alef"], "source_decision": ""},
|
||||
{"phrase": "נשוב על כך כי", "usage_context": "חזרה מכוונת על עיקרון חשוב", "block_types": ["block-yod"], "source_decision": ""},
|
||||
{"phrase": "דא עקא", "usage_context": "הצגת בעיה מרכזית או סתירה", "block_types": ["block-yod"], "source_decision": ""},
|
||||
{"phrase": "ובמילים אחרות", "usage_context": "הבהרה / ניסוח מחדש", "block_types": ["block-yod"], "source_decision": ""},
|
||||
{"phrase": "הגענו לכלל מסקנה כי", "usage_context": "מסקנה מרכזית (פתיחת דיון)", "block_types": ["block-yod"], "source_decision": ""},
|
||||
{"phrase": "לא נוכל לקבל", "usage_context": "דחיית עמדה / טענה", "block_types": ["block-yod"], "source_decision": ""},
|
||||
{"phrase": "מקובלת עלינו", "usage_context": "קבלת עמדה", "block_types": ["block-yod"], "source_decision": ""},
|
||||
{"phrase": "התרשמנו כי", "usage_context": "מסקנה מדיון / עיון במסמכים", "block_types": ["block-yod"], "source_decision": ""},
|
||||
{"phrase": "נחדד כי", "usage_context": "חידוד נקודה קודמת", "block_types": ["block-yod"], "source_decision": ""},
|
||||
{"phrase": "סיכומם של דברים", "usage_context": "פתיחת סיכום מהותי לפני פרק הסיכום", "block_types": ["block-yod"], "source_decision": ""},
|
||||
{"phrase": "המסקנה מכל האמור היא כי", "usage_context": "מסקנת ביניים מקיפה", "block_types": ["block-yod"], "source_decision": ""},
|
||||
{"phrase": "לעמדתנו", "usage_context": "עמדת הוועדה", "block_types": ["block-yod"], "source_decision": ""},
|
||||
{"phrase": "בנסיבות אלה", "usage_context": "מעבר מעובדות למסקנה", "block_types": ["block-yod"], "source_decision": ""},
|
||||
{"phrase": "נזכיר כי", "usage_context": "תזכורת לעיקרון ידוע", "block_types": ["block-yod"], "source_decision": ""},
|
||||
{"phrase": "מצאנו כי", "usage_context": "קביעה עובדתית", "block_types": ["block-yod"], "source_decision": ""},
|
||||
{"phrase": "שוכנענו כי", "usage_context": "קביעה לאחר בחינה", "block_types": ["block-yod"], "source_decision": ""},
|
||||
{"phrase": "על כן ולו רק מסיבה זו", "usage_context": "נטרול טענה חלשה לפני ניתוח עמוק", "block_types": ["block-yod"], "source_decision": ""},
|
||||
{"phrase": "יחד עם זאת, מצאנו לנכון לדון בשאלה העקרונית", "usage_context": "מעבר לדיון עקרוני למרות דחייה", "block_types": ["block-yod"], "source_decision": ""},
|
||||
{"phrase": "משכך", "usage_context": "הסקת מסקנה מעמדה שהוצגה", "block_types": ["block-yod"], "source_decision": ""},
|
||||
{"phrase": "הדברים מתחדדים שעה ש...", "usage_context": "הבהרה נוספת לאור נסיבות", "block_types": ["block-yod"], "source_decision": ""},
|
||||
{"phrase": "זאת ועוד", "usage_context": "הוספת נימוק", "block_types": ["block-yod"], "source_decision": ""},
|
||||
{"phrase": "יתרה מכך", "usage_context": "חיזוק הנמקה קודמת", "block_types": ["block-yod"], "source_decision": ""},
|
||||
{"phrase": "לאור כל האמור לעיל", "usage_context": "פתיחת סיכום סופי", "block_types": ["block-yod", "block-yod-alef"], "source_decision": ""},
|
||||
{"phrase": "נפתח בכך כי", "usage_context": "פתיחת דיון (לא מסמך)", "block_types": ["block-yod"], "source_decision": ""},
|
||||
{"phrase": "נפנה בעניין זה להחלטת...", "usage_context": "הפניה לתקדים", "block_types": ["block-yod"], "source_decision": ""},
|
||||
{"phrase": "ברי כי משאב הקרקע יקר לבעליו ולציבור", "usage_context": "הצדקת שימוש יעיל בקרקע", "block_types": ["block-yod"], "source_decision": ""},
|
||||
{"phrase": "נסכם כי", "usage_context": "מעבר לסיכום ביניים", "block_types": ["block-yod"], "source_decision": ""},
|
||||
{"phrase": "נחזור על כך כי", "usage_context": "חזרה אמפתית על קביעה חשובה", "block_types": ["block-yod"], "source_decision": ""},
|
||||
]
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════
|
||||
# Data: Case Law
|
||||
# ═══════════════════════════════════════════════════════════════════
|
||||
|
||||
CASE_LAW = [
|
||||
{
|
||||
"case_number": "עע\"מ 3975/22",
|
||||
"case_name": "ב. קרן-נכסים",
|
||||
"court": "בית המשפט העליון",
|
||||
"subject_tags": ["proprietary_claims", "feasibility"],
|
||||
"summary": "פסק דין מנחה בנושא בדיקת היתכנות קניינית — מתי ועדה צריכה לבחון זכויות קניין לפני מתן היתר.",
|
||||
"key_quote": "",
|
||||
},
|
||||
{
|
||||
"case_number": "ערר (מרכז) 1011-03-25",
|
||||
"case_name": "נגאח עבד אל קאדר",
|
||||
"court": "ועדת ערר מרכז",
|
||||
"subject_tags": ["proprietary_claims", "consolidating_decision"],
|
||||
"summary": "החלטה מאחדת בנושא טענות קנייניות — ריכזה את כל הפסיקה בנושא.",
|
||||
"key_quote": "",
|
||||
},
|
||||
{
|
||||
"case_number": "ערר 1071/25",
|
||||
"case_name": "מינץ",
|
||||
"court": "ועדת ערר ירושלים",
|
||||
"subject_tags": ["self_reference", "previous_decision"],
|
||||
"summary": "החלטה קודמת של ועדת הערר עצמה — שימוש כתקדים פנימי.",
|
||||
"key_quote": "",
|
||||
},
|
||||
{
|
||||
"case_number": "ערר 1192/18",
|
||||
"case_name": "אילן",
|
||||
"court": "ועדת ערר ירושלים",
|
||||
"subject_tags": ["preservation", "nuisance"],
|
||||
"summary": "שימור ומטרדים — איזון בין שימור מבנים לזכויות שכנים.",
|
||||
"key_quote": "",
|
||||
},
|
||||
{
|
||||
"case_number": "ערר 1009-02-24",
|
||||
"case_name": "מובשוביץ",
|
||||
"court": "ועדת ערר ירושלים",
|
||||
"subject_tags": ["urban_renewal", "tama_38"],
|
||||
"summary": 'התחדשות עירונית — ציטוט נרחב (~400 מילים) בהחלטת בית הכרם.',
|
||||
"key_quote": "",
|
||||
},
|
||||
{
|
||||
"case_number": "ערר 1156/18",
|
||||
"case_name": "ארד",
|
||||
"court": "ועדת ערר ירושלים",
|
||||
"subject_tags": ["construction_nuisance"],
|
||||
"summary": "מטרדי בנייה — מתי מטרד בנייה מצדיק התערבות.",
|
||||
"key_quote": "",
|
||||
},
|
||||
{
|
||||
"case_number": "ערר 1169/19",
|
||||
"case_name": "זוהר",
|
||||
"court": "ועדת ערר ירושלים",
|
||||
"subject_tags": ["construction_nuisance"],
|
||||
"summary": "מטרדי בנייה — המשך קו הפסיקה של ערר ארד.",
|
||||
"key_quote": "",
|
||||
},
|
||||
{
|
||||
"case_number": "ערר (ירושלים) 1078+1083/24",
|
||||
"case_name": "אריאלי",
|
||||
"court": "ועדת ערר ירושלים",
|
||||
"subject_tags": ["structure_example", "proceedings_block"],
|
||||
"summary": "שימשה כמודל מבני — פרק הליכים נפרד (31 סעיפים), מבנה מפורט.",
|
||||
"key_quote": "",
|
||||
},
|
||||
{
|
||||
"case_number": "ערר אדלר",
|
||||
"case_name": "אדלר",
|
||||
"court": "ועדת ערר ירושלים",
|
||||
"subject_tags": ["consolidating_decision"],
|
||||
"summary": "החלטה מאחדת שצוטטה בבית הכרם — טכניקת ציטוט דרך החלטה מרכזת.",
|
||||
"key_quote": "",
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════
|
||||
# Data: Statutory Provisions
|
||||
# ═══════════════════════════════════════════════════════════════════
|
||||
|
||||
STATUTORY_PROVISIONS = [
|
||||
{
|
||||
"statute_name": "חוק התכנון והבנייה, תשכ\"ה-1965",
|
||||
"section_number": "152(א)(2)",
|
||||
"section_title": "זכות ערר על אישור תכנית",
|
||||
"full_text": "",
|
||||
"common_usage": "שאלת סף — האם קיימת זכות ערר. כלי אסטרטגי, לא חובה.",
|
||||
"subject_tags": ["threshold", "right_to_appeal"],
|
||||
},
|
||||
{
|
||||
"statute_name": "חוק התכנון והבנייה, תשכ\"ה-1965",
|
||||
"section_number": "149",
|
||||
"section_title": "הקלה",
|
||||
"full_text": "",
|
||||
"common_usage": "בקשות להקלה — סטייה מתכנית בניין עיר.",
|
||||
"subject_tags": ["deviation", "relief"],
|
||||
},
|
||||
{
|
||||
"statute_name": "חוק התכנון והבנייה, תשכ\"ה-1965",
|
||||
"section_number": "145",
|
||||
"section_title": "היתר בנייה",
|
||||
"full_text": "",
|
||||
"common_usage": "עררים על סירוב/אישור היתר בנייה.",
|
||||
"subject_tags": ["building_permit"],
|
||||
},
|
||||
{
|
||||
"statute_name": "חוק התכנון והבנייה, תשכ\"ה-1965",
|
||||
"section_number": "196-198",
|
||||
"section_title": "היטל השבחה",
|
||||
"full_text": "",
|
||||
"common_usage": "עררי היטל השבחה (8xxx) — חיוב בגין עליית שווי מקרקעין.",
|
||||
"subject_tags": ["betterment_levy"],
|
||||
},
|
||||
{
|
||||
"statute_name": "חוק התכנון והבנייה, תשכ\"ה-1965",
|
||||
"section_number": "197",
|
||||
"section_title": "פיצויים בגין ירידת ערך",
|
||||
"full_text": "",
|
||||
"common_usage": "עררי פיצויים (9xxx) — תביעה בגין ירידת ערך מקרקעין בשל תכנית.",
|
||||
"subject_tags": ["compensation", "depreciation"],
|
||||
},
|
||||
{
|
||||
"statute_name": "תמ\"א 38",
|
||||
"section_number": "תיקון 2 + 3",
|
||||
"section_title": "חיזוק מבנים מפני רעידות אדמה",
|
||||
"full_text": "",
|
||||
"common_usage": "חיזוק/הריסה ובנייה מחדש. אינטרס חלש יותר בבית בודד.",
|
||||
"subject_tags": ["tama_38", "seismic_reinforcement"],
|
||||
},
|
||||
{
|
||||
"statute_name": "חוק המקרקעין, תשכ\"ט-1969",
|
||||
"section_number": "71ב(א)(1)",
|
||||
"section_title": "רוב הדרוש לשינוי ברכוש משותף",
|
||||
"full_text": "",
|
||||
"common_usage": "בדיקת היתכנות קניינית — האם יש רוב לשינוי ברכוש משותף.",
|
||||
"subject_tags": ["proprietary_claims", "common_property"],
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════
|
||||
# Import Logic
|
||||
# ═══════════════════════════════════════════════════════════════════
|
||||
|
||||
async def seed_lessons(conn) -> int:
|
||||
count = 0
|
||||
for l in LESSONS:
|
||||
existing = await conn.fetchval(
|
||||
"SELECT id FROM lessons_learned WHERE lesson_title = $1", l["lesson_title"]
|
||||
)
|
||||
if existing:
|
||||
continue
|
||||
await conn.execute(
|
||||
"""INSERT INTO lessons_learned (lesson_title, lesson_text, category, applies_to, source_case, severity)
|
||||
VALUES ($1, $2, $3, $4, $5, $6)""",
|
||||
l["lesson_title"], l["lesson_text"], l["category"],
|
||||
json.dumps(l["applies_to"]), l["source_case"], l["severity"],
|
||||
)
|
||||
count += 1
|
||||
return count
|
||||
|
||||
|
||||
async def seed_phrases(conn) -> int:
|
||||
count = 0
|
||||
for p in TRANSITION_PHRASES:
|
||||
existing = await conn.fetchval(
|
||||
"SELECT id FROM transition_phrases WHERE phrase = $1", p["phrase"]
|
||||
)
|
||||
if existing:
|
||||
continue
|
||||
await conn.execute(
|
||||
"""INSERT INTO transition_phrases (phrase, usage_context, block_types, source_decision)
|
||||
VALUES ($1, $2, $3, $4)""",
|
||||
p["phrase"], p["usage_context"],
|
||||
json.dumps(p["block_types"]), p["source_decision"],
|
||||
)
|
||||
count += 1
|
||||
return count
|
||||
|
||||
|
||||
async def seed_case_law(conn) -> int:
|
||||
count = 0
|
||||
for c in CASE_LAW:
|
||||
existing = await conn.fetchval(
|
||||
"SELECT id FROM case_law WHERE case_number = $1", c["case_number"]
|
||||
)
|
||||
if existing:
|
||||
continue
|
||||
await conn.execute(
|
||||
"""INSERT INTO case_law (case_number, case_name, court, subject_tags, summary, key_quote)
|
||||
VALUES ($1, $2, $3, $4, $5, $6)""",
|
||||
c["case_number"], c["case_name"], c["court"],
|
||||
json.dumps(c["subject_tags"]), c["summary"], c.get("key_quote", ""),
|
||||
)
|
||||
count += 1
|
||||
return count
|
||||
|
||||
|
||||
async def seed_statutes(conn) -> int:
|
||||
count = 0
|
||||
for s in STATUTORY_PROVISIONS:
|
||||
existing = await conn.fetchval(
|
||||
"""SELECT id FROM statutory_provisions
|
||||
WHERE statute_name = $1 AND section_number = $2""",
|
||||
s["statute_name"], s["section_number"],
|
||||
)
|
||||
if existing:
|
||||
continue
|
||||
await conn.execute(
|
||||
"""INSERT INTO statutory_provisions
|
||||
(statute_name, section_number, section_title, full_text, common_usage, subject_tags)
|
||||
VALUES ($1, $2, $3, $4, $5, $6)""",
|
||||
s["statute_name"], s["section_number"], s["section_title"],
|
||||
s["full_text"], s["common_usage"], json.dumps(s["subject_tags"]),
|
||||
)
|
||||
count += 1
|
||||
return count
|
||||
|
||||
|
||||
async def main():
|
||||
await init_schema()
|
||||
pool = await get_pool()
|
||||
|
||||
async with pool.acquire() as conn:
|
||||
n_lessons = await seed_lessons(conn)
|
||||
n_phrases = await seed_phrases(conn)
|
||||
n_case_law = await seed_case_law(conn)
|
||||
n_statutes = await seed_statutes(conn)
|
||||
|
||||
await close_pool()
|
||||
|
||||
print(f"✓ lessons_learned: {n_lessons} inserted")
|
||||
print(f"✓ transition_phrases: {n_phrases} inserted")
|
||||
print(f"✓ case_law: {n_case_law} inserted")
|
||||
print(f"✓ statutory_provisions: {n_statutes} inserted")
|
||||
print(f" Total: {n_lessons + n_phrases + n_case_law + n_statutes} records")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
40
scripts/test-search.py
Normal file
40
scripts/test-search.py
Normal file
@@ -0,0 +1,40 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Test semantic search functions."""
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "mcp-server" / "src"))
|
||||
|
||||
from legal_mcp.services.db import search_similar_paragraphs, search_similar_case_law, search_precedents, init_schema
|
||||
from legal_mcp.services.embeddings import embed_query
|
||||
|
||||
|
||||
async def main():
|
||||
await init_schema()
|
||||
|
||||
queries = [
|
||||
"טענות קנייניות רוב דרוש בעלי דירות רכוש משותף",
|
||||
"חניה תנועה חניות מצוקת חניה",
|
||||
"היטל השבחה שמאי מכריע התערבות",
|
||||
]
|
||||
|
||||
for query in queries:
|
||||
print(f'=== שאילתה: "{query}" ===')
|
||||
emb = await embed_query(query)
|
||||
results = await search_precedents(emb, limit=3)
|
||||
|
||||
if not results:
|
||||
print(" אין תוצאות")
|
||||
else:
|
||||
for i, r in enumerate(results):
|
||||
score = r["score"]
|
||||
cn = r["case_number"]
|
||||
rtype = r["type"]
|
||||
content = r["content"][:120].replace("\n", " ")
|
||||
print(f" {i+1}. [{rtype}] {score:.3f} | {cn} | {content}")
|
||||
print()
|
||||
|
||||
|
||||
asyncio.run(main())
|
||||
257
scripts/validate-decision.py
Normal file
257
scripts/validate-decision.py
Normal file
@@ -0,0 +1,257 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Validate a decision against block-schema rules.
|
||||
|
||||
Usage: python validate-decision.py <case_number>
|
||||
|
||||
Checks:
|
||||
1. Neutral background (block-vav) — no party quotes or value words
|
||||
2. Weight compliance — blocks within expected ranges
|
||||
3. Structural integrity — all required blocks present
|
||||
4. Claims coverage — every claim in block-zayin addressed in block-yod
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "mcp-server" / "src"))
|
||||
|
||||
from legal_mcp.services.db import get_pool, init_schema, close_pool
|
||||
|
||||
|
||||
# Value/judgment words that shouldn't appear in neutral background
|
||||
VALUE_WORDS = [
|
||||
"חריג", "חטא", "בעייתי", "מזעזע", "שערורייתי", "מגוחך",
|
||||
"נפשע", "פגום", "חמור", "מקומם", "בלתי סביר", "מופרז",
|
||||
"מגונה", "פסול", "נלוז", "מטריד",
|
||||
]
|
||||
|
||||
# Party quote indicators
|
||||
QUOTE_INDICATORS = [
|
||||
r"לטענת\s+(העוררי|המשיב|מבקשי)",
|
||||
r"לדברי\s+(העוררי|המשיב|מבקשי)",
|
||||
r"העורר\s+טוען",
|
||||
r"המשיבה\s+טוענת",
|
||||
r"לשיטת\s+(העוררי|המשיב)",
|
||||
]
|
||||
|
||||
# Expected weight ranges per block type (for רישוי appeals)
|
||||
WEIGHT_RANGES_LICENSING = {
|
||||
"block-he": (0.5, 5),
|
||||
"block-vav": (3, 40),
|
||||
"block-zayin": (13, 40),
|
||||
"block-chet": (0, 15),
|
||||
"block-tet": (0, 15),
|
||||
"block-yod": (30, 75),
|
||||
"block-yod-alef": (1, 10),
|
||||
"block-yod-bet": (0, 2),
|
||||
}
|
||||
|
||||
# Expected weight ranges for היטל השבחה
|
||||
WEIGHT_RANGES_LEVY = {
|
||||
"block-he": (0, 5),
|
||||
"block-vav": (2, 20),
|
||||
"block-zayin": (15, 40),
|
||||
"block-chet": (0, 25),
|
||||
"block-tet": (0, 15),
|
||||
"block-yod": (25, 75),
|
||||
"block-yod-alef": (1, 10),
|
||||
"block-yod-bet": (0, 3),
|
||||
}
|
||||
|
||||
|
||||
def check_neutral_background(content: str) -> list[str]:
|
||||
"""Check block-vav for neutrality violations."""
|
||||
issues = []
|
||||
if not content:
|
||||
return issues
|
||||
|
||||
lines = content.split("\n")
|
||||
for i, line in enumerate(lines):
|
||||
# Check value words
|
||||
for word in VALUE_WORDS:
|
||||
if word in line:
|
||||
issues.append(f"מילת שיפוט ברקע (שורה {i+1}): \"{word}\" — \"{line[:80]}...\"")
|
||||
|
||||
# Check party quotes
|
||||
for pattern in QUOTE_INDICATORS:
|
||||
if re.search(pattern, line):
|
||||
match = re.search(pattern, line).group()
|
||||
issues.append(f"ציטוט מצד ברקע (שורה {i+1}): \"{match}\" — \"{line[:80]}...\"")
|
||||
|
||||
return issues
|
||||
|
||||
|
||||
def check_weight_compliance(blocks: list[dict], appeal_type: str) -> list[str]:
|
||||
"""Check block weights are within expected ranges."""
|
||||
issues = []
|
||||
ranges = WEIGHT_RANGES_LEVY if appeal_type == "levy" else WEIGHT_RANGES_LICENSING
|
||||
|
||||
total_words = sum(b["word_count"] for b in blocks)
|
||||
if total_words == 0:
|
||||
return ["אין תוכן בהחלטה"]
|
||||
|
||||
for block in blocks:
|
||||
bid = block["block_id"]
|
||||
if bid in ranges and block["word_count"] > 0:
|
||||
weight = block["word_count"] / total_words * 100
|
||||
low, high = ranges[bid]
|
||||
if weight < low:
|
||||
issues.append(f"בלוק {bid} ({block['title']}): משקל {weight:.1f}% — מתחת לטווח ({low}-{high}%)")
|
||||
elif weight > high:
|
||||
issues.append(f"בלוק {bid} ({block['title']}): משקל {weight:.1f}% — מעל לטווח ({low}-{high}%)")
|
||||
|
||||
return issues
|
||||
|
||||
|
||||
def check_structural_integrity(blocks: list[dict]) -> list[str]:
|
||||
"""Check all required blocks are present."""
|
||||
issues = []
|
||||
required = ["block-he", "block-zayin", "block-yod"]
|
||||
block_ids = {b["block_id"] for b in blocks if b["word_count"] > 0}
|
||||
|
||||
for req in required:
|
||||
if req not in block_ids:
|
||||
issues.append(f"בלוק חובה חסר: {req}")
|
||||
|
||||
# Check discussion is the heaviest block
|
||||
yod = next((b for b in blocks if b["block_id"] == "block-yod"), None)
|
||||
if yod:
|
||||
max_block = max((b for b in blocks if b["block_id"] not in ("block-alef", "block-bet", "block-gimel", "block-dalet")),
|
||||
key=lambda x: x["word_count"], default=None)
|
||||
if max_block and max_block["block_id"] != "block-yod":
|
||||
issues.append(f"בלוק הדיון (י) אינו הבלוק הגדול ביותר — {max_block['title']} ({max_block['word_count']} מילים) גדול יותר")
|
||||
|
||||
return issues
|
||||
|
||||
|
||||
def check_no_duplication(vav_content: str, yod_content: str) -> list[str]:
|
||||
"""Check block-yod doesn't repeat block-vav content."""
|
||||
issues = []
|
||||
if not vav_content or not yod_content:
|
||||
return issues
|
||||
|
||||
# Find sentences from background that appear verbatim in discussion
|
||||
vav_sentences = [s.strip() for s in re.split(r'[.!?]', vav_content) if len(s.strip()) > 30]
|
||||
for sent in vav_sentences:
|
||||
if sent in yod_content:
|
||||
issues.append(f"כפילות: משפט מהרקע חוזר בדיון — \"{sent[:60]}...\"")
|
||||
|
||||
return issues
|
||||
|
||||
|
||||
async def main():
|
||||
if len(sys.argv) < 2:
|
||||
print("שימוש: python validate-decision.py <מספר_תיק>")
|
||||
sys.exit(1)
|
||||
|
||||
case_number = sys.argv[1]
|
||||
await init_schema()
|
||||
pool = await get_pool()
|
||||
|
||||
async with pool.acquire() as conn:
|
||||
case = await conn.fetchrow(
|
||||
"SELECT * FROM cases WHERE case_number = $1", case_number
|
||||
)
|
||||
if not case:
|
||||
print(f"תיק {case_number} לא נמצא")
|
||||
sys.exit(1)
|
||||
|
||||
decision = await conn.fetchrow(
|
||||
"SELECT * FROM decisions WHERE case_id = $1",
|
||||
case["id"],
|
||||
)
|
||||
if not decision:
|
||||
print(f"אין החלטה לתיק {case_number}")
|
||||
sys.exit(1)
|
||||
|
||||
blocks = await conn.fetch(
|
||||
"""SELECT block_id, title, content, word_count, weight_percent
|
||||
FROM decision_blocks WHERE decision_id = $1
|
||||
ORDER BY block_index""",
|
||||
decision["id"],
|
||||
)
|
||||
blocks = [dict(b) for b in blocks]
|
||||
|
||||
claims_count = await conn.fetchval(
|
||||
"SELECT count(*) FROM claims WHERE case_id = $1", case["id"]
|
||||
)
|
||||
|
||||
await close_pool()
|
||||
|
||||
# Determine appeal type
|
||||
num = case_number.split("/")[0].split("+")[0].split("-")[0]
|
||||
if num.startswith("8"):
|
||||
appeal_type = "levy"
|
||||
appeal_type_heb = "היטל השבחה"
|
||||
elif num.startswith("9"):
|
||||
appeal_type = "compensation"
|
||||
appeal_type_heb = "פיצויים"
|
||||
else:
|
||||
appeal_type = "licensing"
|
||||
appeal_type_heb = "רישוי ובנייה"
|
||||
|
||||
print(f"{'='*60}")
|
||||
print(f"ולידציה: {case_number} — {case['title']}")
|
||||
print(f"סוג: {appeal_type_heb} | מילים: {decision['total_words']} | טענות: {claims_count}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
all_issues = []
|
||||
|
||||
# 1. Neutral background
|
||||
vav = next((b for b in blocks if b["block_id"] == "block-vav"), None)
|
||||
issues = check_neutral_background(vav["content"] if vav else "")
|
||||
if issues:
|
||||
print(f"\n❌ רקע ניטרלי — {len(issues)} בעיות:")
|
||||
for i in issues:
|
||||
print(f" • {i}")
|
||||
all_issues.extend(issues)
|
||||
else:
|
||||
print("\n✅ רקע ניטרלי — תקין")
|
||||
|
||||
# 2. Weight compliance
|
||||
issues = check_weight_compliance(blocks, appeal_type)
|
||||
if issues:
|
||||
print(f"\n⚠ משקלות — {len(issues)} חריגות:")
|
||||
for i in issues:
|
||||
print(f" • {i}")
|
||||
all_issues.extend(issues)
|
||||
else:
|
||||
print("\n✅ משקלות — בטווח")
|
||||
|
||||
# 3. Structural integrity
|
||||
issues = check_structural_integrity(blocks)
|
||||
if issues:
|
||||
print(f"\n❌ מבנה — {len(issues)} בעיות:")
|
||||
for i in issues:
|
||||
print(f" • {i}")
|
||||
all_issues.extend(issues)
|
||||
else:
|
||||
print("\n✅ מבנה — תקין")
|
||||
|
||||
# 4. No duplication
|
||||
yod = next((b for b in blocks if b["block_id"] == "block-yod"), None)
|
||||
issues = check_no_duplication(
|
||||
vav["content"] if vav else "",
|
||||
yod["content"] if yod else "",
|
||||
)
|
||||
if issues:
|
||||
print(f"\n⚠ כפילויות — {len(issues)} נמצאו:")
|
||||
for i in issues:
|
||||
print(f" • {i}")
|
||||
all_issues.extend(issues)
|
||||
else:
|
||||
print("\n✅ ללא כפילויות — תקין")
|
||||
|
||||
# Summary
|
||||
print(f"\n{'='*60}")
|
||||
if all_issues:
|
||||
print(f"סה\"כ: {len(all_issues)} בעיות נמצאו")
|
||||
else:
|
||||
print("✅ ההחלטה עומדת בכל הכללים")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user