Includes: - docs/: architecture, block-schema, migration-plan, product-specification - scripts/: bidi_table, decompose-decisions, extract-claims, seed-knowledge, etc. - skill-legal-decision/: SKILL.md + references + block-schema - skill-legal-assistant/: SKILL.md - skill-legal-docx/: SKILL.md + references - .claude/commands/: bidi-table skill - .taskmaster/: task config + PRDs - .gitignore: exclude legacy/, kiryat-yearim/, node_modules/, memory/ Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
135 lines
4.6 KiB
Python
135 lines
4.6 KiB
Python
#!/usr/bin/env python3
|
|
"""Extract case law citations from block-yod and link to case_law table."""
|
|
|
|
import asyncio
|
|
import re
|
|
import sys
|
|
from pathlib import Path
|
|
from uuid import UUID
|
|
|
|
sys.path.insert(0, str(Path(__file__).parent.parent / "mcp-server" / "src"))
|
|
|
|
from legal_mcp.services.db import get_pool, init_schema, close_pool
|
|
|
|
# Patterns for Israeli case law citations
|
|
CITATION_PATTERNS = [
|
|
# עע"מ, בג"ץ, ע"א, etc.
|
|
re.compile(r'(עע"מ|בג"ץ|ע"א|בר"ם|עת"מ|עמ"נ|ע"ע|רע"א|דנ"א|בש"א)\s*(\d[\d/\-]+)'),
|
|
# ערר with number
|
|
re.compile(r'ערר\s*\(?\s*(?:מרכז|ירושלים|חי\'?|ת"א|דרום|צפון)?\s*\)?\s*(\d[\d/\-]+)'),
|
|
# ערר without district
|
|
re.compile(r'ערר\s+(\d{3,5}[\-/]\d{2,4})'),
|
|
]
|
|
|
|
|
|
def extract_citations_from_text(text: str) -> list[dict]:
|
|
"""Find all case law citations in text."""
|
|
citations = []
|
|
seen = set()
|
|
|
|
for pattern in CITATION_PATTERNS:
|
|
for match in pattern.finditer(text):
|
|
full_match = match.group(0)
|
|
if full_match in seen:
|
|
continue
|
|
seen.add(full_match)
|
|
|
|
# Get surrounding context (50 chars before and after)
|
|
start = max(0, match.start() - 50)
|
|
end = min(len(text), match.end() + 100)
|
|
context = text[start:end].replace("\n", " ")
|
|
|
|
citations.append({
|
|
"citation_text": full_match,
|
|
"context": context,
|
|
})
|
|
|
|
return citations
|
|
|
|
|
|
async def main():
|
|
await init_schema()
|
|
pool = await get_pool()
|
|
|
|
async with pool.acquire() as conn:
|
|
# Get all block-yod content with decision info
|
|
blocks = await conn.fetch(
|
|
"""SELECT db.content, d.id as decision_id, c.case_number
|
|
FROM decision_blocks db
|
|
JOIN decisions d ON d.id = db.decision_id
|
|
JOIN cases c ON c.id = d.case_id
|
|
WHERE db.block_id = 'block-yod' AND db.word_count > 0
|
|
ORDER BY c.case_number"""
|
|
)
|
|
|
|
# Get existing case_law for matching
|
|
case_laws = await conn.fetch("SELECT id, case_number, case_name FROM case_law")
|
|
case_law_map = {}
|
|
for cl in case_laws:
|
|
# Index by various forms of the case number
|
|
case_law_map[cl["case_number"]] = cl["id"]
|
|
# Also index by short number (e.g., "3975/22" from "עע"מ 3975/22")
|
|
parts = cl["case_number"].split()
|
|
if len(parts) > 1:
|
|
case_law_map[parts[-1]] = cl["id"]
|
|
|
|
total_citations = 0
|
|
total_linked = 0
|
|
|
|
for block in blocks:
|
|
case_number = block["case_number"]
|
|
decision_id = block["decision_id"]
|
|
text = block["content"]
|
|
|
|
citations = extract_citations_from_text(text)
|
|
|
|
if not citations:
|
|
continue
|
|
|
|
print(f"\n{case_number}: {len(citations)} ציטוטים נמצאו")
|
|
|
|
async with pool.acquire() as conn:
|
|
for cit in citations:
|
|
total_citations += 1
|
|
|
|
# Try to match to case_law table
|
|
case_law_id = None
|
|
for key, cl_id in case_law_map.items():
|
|
if key in cit["citation_text"] or cit["citation_text"] in key:
|
|
case_law_id = cl_id
|
|
break
|
|
|
|
if case_law_id:
|
|
# Check if already exists
|
|
existing = await conn.fetchval(
|
|
"""SELECT id FROM case_law_citations
|
|
WHERE case_law_id = $1 AND decision_id = $2""",
|
|
case_law_id, decision_id,
|
|
)
|
|
if not existing:
|
|
await conn.execute(
|
|
"""INSERT INTO case_law_citations
|
|
(case_law_id, decision_id, citation_type, context_text)
|
|
VALUES ($1, $2, 'support', $3)""",
|
|
case_law_id, decision_id, cit["context"],
|
|
)
|
|
total_linked += 1
|
|
print(f" ✅ {cit['citation_text'][:40]} → קושר לפסיקה")
|
|
else:
|
|
print(f" ⬜ {cit['citation_text'][:40]} — לא נמצא ב-DB")
|
|
|
|
# Summary
|
|
async with pool.acquire() as conn:
|
|
total_in_db = await conn.fetchval("SELECT count(*) FROM case_law_citations")
|
|
|
|
await close_pool()
|
|
|
|
print(f"\n{'='*50}")
|
|
print(f"סה\"כ ציטוטים שנמצאו: {total_citations}")
|
|
print(f"סה\"כ קושרו לפסיקה ב-DB: {total_linked}")
|
|
print(f"סה\"כ ב-case_law_citations: {total_in_db}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|