Add docs, scripts, skills, commands, and taskmaster config to repo
Includes: - docs/: architecture, block-schema, migration-plan, product-specification - scripts/: bidi_table, decompose-decisions, extract-claims, seed-knowledge, etc. - skill-legal-decision/: SKILL.md + references + block-schema - skill-legal-assistant/: SKILL.md - skill-legal-docx/: SKILL.md + references - .claude/commands/: bidi-table skill - .taskmaster/: task config + PRDs - .gitignore: exclude legacy/, kiryat-yearim/, node_modules/, memory/ Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
134
scripts/extract-citations.py
Normal file
134
scripts/extract-citations.py
Normal file
@@ -0,0 +1,134 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Extract case law citations from block-yod and link to case_law table."""
|
||||
|
||||
import asyncio
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from uuid import UUID
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "mcp-server" / "src"))
|
||||
|
||||
from legal_mcp.services.db import get_pool, init_schema, close_pool
|
||||
|
||||
# Patterns for Israeli case law citations
|
||||
CITATION_PATTERNS = [
|
||||
# עע"מ, בג"ץ, ע"א, etc.
|
||||
re.compile(r'(עע"מ|בג"ץ|ע"א|בר"ם|עת"מ|עמ"נ|ע"ע|רע"א|דנ"א|בש"א)\s*(\d[\d/\-]+)'),
|
||||
# ערר with number
|
||||
re.compile(r'ערר\s*\(?\s*(?:מרכז|ירושלים|חי\'?|ת"א|דרום|צפון)?\s*\)?\s*(\d[\d/\-]+)'),
|
||||
# ערר without district
|
||||
re.compile(r'ערר\s+(\d{3,5}[\-/]\d{2,4})'),
|
||||
]
|
||||
|
||||
|
||||
def extract_citations_from_text(text: str) -> list[dict]:
|
||||
"""Find all case law citations in text."""
|
||||
citations = []
|
||||
seen = set()
|
||||
|
||||
for pattern in CITATION_PATTERNS:
|
||||
for match in pattern.finditer(text):
|
||||
full_match = match.group(0)
|
||||
if full_match in seen:
|
||||
continue
|
||||
seen.add(full_match)
|
||||
|
||||
# Get surrounding context (50 chars before and after)
|
||||
start = max(0, match.start() - 50)
|
||||
end = min(len(text), match.end() + 100)
|
||||
context = text[start:end].replace("\n", " ")
|
||||
|
||||
citations.append({
|
||||
"citation_text": full_match,
|
||||
"context": context,
|
||||
})
|
||||
|
||||
return citations
|
||||
|
||||
|
||||
async def main():
|
||||
await init_schema()
|
||||
pool = await get_pool()
|
||||
|
||||
async with pool.acquire() as conn:
|
||||
# Get all block-yod content with decision info
|
||||
blocks = await conn.fetch(
|
||||
"""SELECT db.content, d.id as decision_id, c.case_number
|
||||
FROM decision_blocks db
|
||||
JOIN decisions d ON d.id = db.decision_id
|
||||
JOIN cases c ON c.id = d.case_id
|
||||
WHERE db.block_id = 'block-yod' AND db.word_count > 0
|
||||
ORDER BY c.case_number"""
|
||||
)
|
||||
|
||||
# Get existing case_law for matching
|
||||
case_laws = await conn.fetch("SELECT id, case_number, case_name FROM case_law")
|
||||
case_law_map = {}
|
||||
for cl in case_laws:
|
||||
# Index by various forms of the case number
|
||||
case_law_map[cl["case_number"]] = cl["id"]
|
||||
# Also index by short number (e.g., "3975/22" from "עע"מ 3975/22")
|
||||
parts = cl["case_number"].split()
|
||||
if len(parts) > 1:
|
||||
case_law_map[parts[-1]] = cl["id"]
|
||||
|
||||
total_citations = 0
|
||||
total_linked = 0
|
||||
|
||||
for block in blocks:
|
||||
case_number = block["case_number"]
|
||||
decision_id = block["decision_id"]
|
||||
text = block["content"]
|
||||
|
||||
citations = extract_citations_from_text(text)
|
||||
|
||||
if not citations:
|
||||
continue
|
||||
|
||||
print(f"\n{case_number}: {len(citations)} ציטוטים נמצאו")
|
||||
|
||||
async with pool.acquire() as conn:
|
||||
for cit in citations:
|
||||
total_citations += 1
|
||||
|
||||
# Try to match to case_law table
|
||||
case_law_id = None
|
||||
for key, cl_id in case_law_map.items():
|
||||
if key in cit["citation_text"] or cit["citation_text"] in key:
|
||||
case_law_id = cl_id
|
||||
break
|
||||
|
||||
if case_law_id:
|
||||
# Check if already exists
|
||||
existing = await conn.fetchval(
|
||||
"""SELECT id FROM case_law_citations
|
||||
WHERE case_law_id = $1 AND decision_id = $2""",
|
||||
case_law_id, decision_id,
|
||||
)
|
||||
if not existing:
|
||||
await conn.execute(
|
||||
"""INSERT INTO case_law_citations
|
||||
(case_law_id, decision_id, citation_type, context_text)
|
||||
VALUES ($1, $2, 'support', $3)""",
|
||||
case_law_id, decision_id, cit["context"],
|
||||
)
|
||||
total_linked += 1
|
||||
print(f" ✅ {cit['citation_text'][:40]} → קושר לפסיקה")
|
||||
else:
|
||||
print(f" ⬜ {cit['citation_text'][:40]} — לא נמצא ב-DB")
|
||||
|
||||
# Summary
|
||||
async with pool.acquire() as conn:
|
||||
total_in_db = await conn.fetchval("SELECT count(*) FROM case_law_citations")
|
||||
|
||||
await close_pool()
|
||||
|
||||
print(f"\n{'='*50}")
|
||||
print(f"סה\"כ ציטוטים שנמצאו: {total_citations}")
|
||||
print(f"סה\"כ קושרו לפסיקה ב-DB: {total_linked}")
|
||||
print(f"סה\"כ ב-case_law_citations: {total_in_db}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user