Clean up scripts/: archive 17, delete 5, add SCRIPTS.md registry
Active scripts (5): auto-sync-cases.sh, backup-db.sh, restore-db.sh, notify.py, bidi_table.py Archived (17): one-time migration/seeding scripts whose functionality is now in MCP server or web API. Moved to scripts/.archive/ Deleted (5): zero-value scripts (duplicates, hardcoded single-case, debug scripts) Added scripts/SCRIPTS.md — registry of all scripts with purpose, status, and what superseded them. CLAUDE.md updated with rule: any script change requires SCRIPTS.md update. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
323
scripts/.archive/decompose-decisions.py
Normal file
323
scripts/.archive/decompose-decisions.py
Normal file
@@ -0,0 +1,323 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Decompose 6 final decisions into 12-block structure.
|
||||
|
||||
Uses heuristic parsing based on known section headers in Dafna's decisions.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from uuid import UUID
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "mcp-server" / "src"))
|
||||
|
||||
from legal_mcp.services.db import get_pool, init_schema, close_pool
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════
|
||||
# Block definitions with detection patterns
|
||||
# ═══════════════════════════════════════════════════════════════════
|
||||
|
||||
BLOCKS = [
|
||||
{
|
||||
"block_id": "block-alef",
|
||||
"block_index": 1,
|
||||
"title": "כותרת מוסדית",
|
||||
"generation_type": "template-fill",
|
||||
},
|
||||
{
|
||||
"block_id": "block-bet",
|
||||
"block_index": 2,
|
||||
"title": "הרכב הוועדה",
|
||||
"generation_type": "template-fill",
|
||||
},
|
||||
{
|
||||
"block_id": "block-gimel",
|
||||
"block_index": 3,
|
||||
"title": "צדדים",
|
||||
"generation_type": "template-fill",
|
||||
},
|
||||
{
|
||||
"block_id": "block-dalet",
|
||||
"block_index": 4,
|
||||
"title": "כותרת החלטה",
|
||||
"generation_type": "template-fill",
|
||||
},
|
||||
{
|
||||
"block_id": "block-he",
|
||||
"block_index": 5,
|
||||
"title": "פתיחה",
|
||||
"generation_type": "paraphrase",
|
||||
},
|
||||
{
|
||||
"block_id": "block-vav",
|
||||
"block_index": 6,
|
||||
"title": "רקע עובדתי",
|
||||
"generation_type": "reproduction",
|
||||
},
|
||||
{
|
||||
"block_id": "block-zayin",
|
||||
"block_index": 7,
|
||||
"title": "טענות הצדדים",
|
||||
"generation_type": "paraphrase",
|
||||
},
|
||||
{
|
||||
"block_id": "block-chet",
|
||||
"block_index": 8,
|
||||
"title": "הליכים בפני ועדת הערר",
|
||||
"generation_type": "reproduction",
|
||||
},
|
||||
{
|
||||
"block_id": "block-tet",
|
||||
"block_index": 9,
|
||||
"title": "תכניות חלות",
|
||||
"generation_type": "guided-synthesis",
|
||||
},
|
||||
{
|
||||
"block_id": "block-yod",
|
||||
"block_index": 10,
|
||||
"title": "דיון והכרעה",
|
||||
"generation_type": "rhetorical-construction",
|
||||
},
|
||||
{
|
||||
"block_id": "block-yod-alef",
|
||||
"block_index": 11,
|
||||
"title": "סיכום",
|
||||
"generation_type": "paraphrase",
|
||||
},
|
||||
{
|
||||
"block_id": "block-yod-bet",
|
||||
"block_index": 12,
|
||||
"title": "חתימות",
|
||||
"generation_type": "template-fill",
|
||||
},
|
||||
]
|
||||
|
||||
# Section header patterns (Hebrew)
|
||||
SECTION_PATTERNS = {
|
||||
"claims": re.compile(r"תמצית\s*טענות\s*הצדדים|טענות\s*הצדדים|טענות\s*העוררי"),
|
||||
"proceedings": re.compile(r"ההליכים\s*בפני\s*ועדת\s*הערר|הליכים\s*בפני\s*הוועדה|הדיון\s*בפני\s*ועדת\s*הערר"),
|
||||
"plans": re.compile(r"תכניות\s*חלות|המסגרת\s*התכנונית|הוראות\s*התכנית"),
|
||||
"discussion": re.compile(r"דיון\s*והכרעה|דיון|הכרעה"),
|
||||
"summary": re.compile(r"^סיכום$|^סוף\s*דבר$", re.MULTILINE),
|
||||
"appellant_claims": re.compile(r"טענות\s*העוררי|טענות\s*העורר"),
|
||||
"respondent_claims": re.compile(r"עמדת\s*הוועדה\s*המקומית|תגובת\s*המשיבה|עמדת\s*המשיב"),
|
||||
"permit_applicant": re.compile(r"עמדת\s*מבקש|עמדת\s*מגיש|עמדת\s*היזם"),
|
||||
"panel": re.compile(r"בפני[:\s]|יו\"ר"),
|
||||
"parties_vs": re.compile(r"\s*נגד\s*"),
|
||||
"decision_title": re.compile(r"^החלטה$", re.MULTILINE),
|
||||
"opening": re.compile(r"^לפנינו\s|^בפנינו\s"),
|
||||
"signature": re.compile(r"ניתנה?\s*(היום|פה\s*אחד|ביום)|חתימ"),
|
||||
}
|
||||
|
||||
|
||||
def find_section_start(text: str, pattern: re.Pattern) -> int:
|
||||
"""Find the character position where a section starts."""
|
||||
match = pattern.search(text)
|
||||
return match.start() if match else -1
|
||||
|
||||
|
||||
def decompose_decision(text: str) -> list[dict]:
|
||||
"""Parse decision text into blocks based on section headers."""
|
||||
lines = text.split("\n")
|
||||
total_len = len(text)
|
||||
|
||||
# Find key section boundaries
|
||||
pos_claims = find_section_start(text, SECTION_PATTERNS["claims"])
|
||||
pos_proceedings = find_section_start(text, SECTION_PATTERNS["proceedings"])
|
||||
pos_plans = find_section_start(text, SECTION_PATTERNS["plans"])
|
||||
pos_discussion = find_section_start(text, SECTION_PATTERNS["discussion"])
|
||||
pos_summary = find_section_start(text, SECTION_PATTERNS["summary"])
|
||||
pos_signature = find_section_start(text, SECTION_PATTERNS["signature"])
|
||||
pos_opening = find_section_start(text, SECTION_PATTERNS["opening"])
|
||||
pos_decision_title = find_section_start(text, SECTION_PATTERNS["decision_title"])
|
||||
pos_panel = find_section_start(text, SECTION_PATTERNS["panel"])
|
||||
pos_parties = find_section_start(text, SECTION_PATTERNS["parties_vs"])
|
||||
|
||||
# Build blocks based on what we found
|
||||
blocks = []
|
||||
|
||||
# Blocks א-ד: Header area (before the opening "לפנינו")
|
||||
header_end = pos_opening if pos_opening > 0 else pos_claims if pos_claims > 0 else 500
|
||||
header_text = text[:header_end].strip()
|
||||
|
||||
# Try to split header into institutional header, panel, parties, title
|
||||
if pos_panel > 0 and pos_panel < header_end:
|
||||
blocks.append({"block_id": "block-alef", "content": text[:pos_panel].strip()})
|
||||
|
||||
if pos_parties > 0 and pos_parties < header_end:
|
||||
blocks.append({"block_id": "block-bet", "content": text[pos_panel:pos_parties].strip()})
|
||||
if pos_decision_title > 0 and pos_decision_title < header_end:
|
||||
blocks.append({"block_id": "block-gimel", "content": text[pos_parties:pos_decision_title].strip()})
|
||||
blocks.append({"block_id": "block-dalet", "content": "החלטה"})
|
||||
else:
|
||||
blocks.append({"block_id": "block-gimel", "content": text[pos_parties:header_end].strip()})
|
||||
blocks.append({"block_id": "block-dalet", "content": "החלטה"})
|
||||
else:
|
||||
blocks.append({"block_id": "block-bet", "content": text[pos_panel:header_end].strip()})
|
||||
blocks.append({"block_id": "block-gimel", "content": ""})
|
||||
blocks.append({"block_id": "block-dalet", "content": "החלטה"})
|
||||
else:
|
||||
# Can't split — put everything in alef
|
||||
blocks.append({"block_id": "block-alef", "content": header_text})
|
||||
blocks.append({"block_id": "block-bet", "content": ""})
|
||||
blocks.append({"block_id": "block-gimel", "content": ""})
|
||||
blocks.append({"block_id": "block-dalet", "content": "החלטה"})
|
||||
|
||||
# Block ה: Opening — from "לפנינו" to claims section
|
||||
if pos_opening > 0:
|
||||
opening_end = pos_claims if pos_claims > pos_opening else pos_discussion if pos_discussion > pos_opening else total_len
|
||||
# Opening is usually just 1-3 paragraphs
|
||||
opening_text = text[pos_opening:min(pos_opening + 1000, opening_end)].strip()
|
||||
# Find end of first few paragraphs
|
||||
para_breaks = [i for i, c in enumerate(opening_text) if c == '\n' and i > 50]
|
||||
if len(para_breaks) >= 2:
|
||||
opening_text = opening_text[:para_breaks[1]].strip()
|
||||
blocks.append({"block_id": "block-he", "content": opening_text})
|
||||
|
||||
# Block ו: Background — from after opening to claims
|
||||
if pos_claims > pos_opening:
|
||||
bg_start = pos_opening + len(opening_text)
|
||||
blocks.append({"block_id": "block-vav", "content": text[bg_start:pos_claims].strip()})
|
||||
else:
|
||||
blocks.append({"block_id": "block-vav", "content": ""})
|
||||
else:
|
||||
blocks.append({"block_id": "block-he", "content": ""})
|
||||
blocks.append({"block_id": "block-vav", "content": ""})
|
||||
|
||||
# Block ז: Claims
|
||||
if pos_claims > 0:
|
||||
claims_end = pos_proceedings if pos_proceedings > pos_claims else pos_discussion if pos_discussion > pos_claims else pos_summary if pos_summary > pos_claims else total_len
|
||||
blocks.append({"block_id": "block-zayin", "content": text[pos_claims:claims_end].strip()})
|
||||
else:
|
||||
blocks.append({"block_id": "block-zayin", "content": ""})
|
||||
|
||||
# Block ח: Proceedings (optional)
|
||||
if pos_proceedings > 0:
|
||||
proc_end = pos_plans if pos_plans > pos_proceedings else pos_discussion if pos_discussion > pos_proceedings else pos_summary if pos_summary > pos_proceedings else total_len
|
||||
blocks.append({"block_id": "block-chet", "content": text[pos_proceedings:proc_end].strip()})
|
||||
else:
|
||||
blocks.append({"block_id": "block-chet", "content": ""})
|
||||
|
||||
# Block ט: Plans (optional)
|
||||
if pos_plans > 0 and pos_plans < (pos_discussion if pos_discussion > 0 else total_len):
|
||||
plans_end = pos_discussion if pos_discussion > pos_plans else pos_summary if pos_summary > pos_plans else total_len
|
||||
blocks.append({"block_id": "block-tet", "content": text[pos_plans:plans_end].strip()})
|
||||
else:
|
||||
blocks.append({"block_id": "block-tet", "content": ""})
|
||||
|
||||
# Block י: Discussion
|
||||
if pos_discussion > 0:
|
||||
disc_end = pos_summary if pos_summary > pos_discussion else pos_signature if pos_signature > pos_discussion else total_len
|
||||
blocks.append({"block_id": "block-yod", "content": text[pos_discussion:disc_end].strip()})
|
||||
else:
|
||||
blocks.append({"block_id": "block-yod", "content": ""})
|
||||
|
||||
# Block יא: Summary
|
||||
if pos_summary > 0:
|
||||
summ_end = pos_signature if pos_signature > pos_summary else total_len
|
||||
blocks.append({"block_id": "block-yod-alef", "content": text[pos_summary:summ_end].strip()})
|
||||
else:
|
||||
blocks.append({"block_id": "block-yod-alef", "content": ""})
|
||||
|
||||
# Block יב: Signatures
|
||||
if pos_signature > 0:
|
||||
blocks.append({"block_id": "block-yod-bet", "content": text[pos_signature:].strip()})
|
||||
else:
|
||||
blocks.append({"block_id": "block-yod-bet", "content": ""})
|
||||
|
||||
return blocks
|
||||
|
||||
|
||||
async def main():
|
||||
await init_schema()
|
||||
pool = await get_pool()
|
||||
|
||||
async with pool.acquire() as conn:
|
||||
decisions = await conn.fetch(
|
||||
"""SELECT d.id as decision_id, c.case_number, c.title, d.total_words,
|
||||
doc.extracted_text
|
||||
FROM decisions d
|
||||
JOIN cases c ON c.id = d.case_id
|
||||
JOIN documents doc ON doc.case_id = d.case_id AND doc.doc_type = 'decision'
|
||||
WHERE d.status = 'final'
|
||||
ORDER BY c.case_number"""
|
||||
)
|
||||
|
||||
for dec in decisions:
|
||||
decision_id = dec["decision_id"]
|
||||
case_number = dec["case_number"]
|
||||
text = dec["extracted_text"]
|
||||
total_words = len(text.split())
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"מפרק: {case_number} — {dec['title']}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
# Decompose
|
||||
blocks = decompose_decision(text)
|
||||
|
||||
# Merge with block metadata
|
||||
block_data = []
|
||||
for block_def in BLOCKS:
|
||||
matching = [b for b in blocks if b["block_id"] == block_def["block_id"]]
|
||||
content = matching[0]["content"] if matching else ""
|
||||
word_count = len(content.split()) if content else 0
|
||||
weight = round((word_count / total_words * 100), 2) if total_words > 0 and word_count > 0 else 0
|
||||
|
||||
block_data.append({
|
||||
**block_def,
|
||||
"content": content,
|
||||
"word_count": word_count,
|
||||
"weight_percent": weight,
|
||||
"status": "final" if content else "empty",
|
||||
})
|
||||
|
||||
# Print summary
|
||||
for b in block_data:
|
||||
status = "✅" if b["word_count"] > 0 else "⬜"
|
||||
print(f" {status} {b['block_id']:18s} | {b['title']:25s} | {b['word_count']:5d} מילים | {b['weight_percent']:5.1f}%")
|
||||
|
||||
# Store in DB
|
||||
async with pool.acquire() as conn:
|
||||
# Delete existing blocks for this decision
|
||||
await conn.execute(
|
||||
"DELETE FROM decision_blocks WHERE decision_id = $1", decision_id
|
||||
)
|
||||
|
||||
for b in block_data:
|
||||
await conn.execute(
|
||||
"""INSERT INTO decision_blocks
|
||||
(decision_id, block_id, block_index, title, content,
|
||||
word_count, weight_percent, generation_type, status)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)""",
|
||||
decision_id,
|
||||
b["block_id"], b["block_index"], b["title"],
|
||||
b["content"], b["word_count"], b["weight_percent"],
|
||||
b["generation_type"], b["status"],
|
||||
)
|
||||
|
||||
# Count paragraphs in discussion block
|
||||
discussion = [b for b in block_data if b["block_id"] == "block-yod"][0]
|
||||
if discussion["content"]:
|
||||
paragraphs = [p.strip() for p in discussion["content"].split("\n") if p.strip() and len(p.strip()) > 20]
|
||||
await conn.execute(
|
||||
"UPDATE decisions SET total_paragraphs = $1 WHERE id = $2",
|
||||
len(paragraphs), decision_id,
|
||||
)
|
||||
|
||||
# Final summary
|
||||
async with pool.acquire() as conn:
|
||||
block_count = await conn.fetchval("SELECT count(*) FROM decision_blocks")
|
||||
non_empty = await conn.fetchval("SELECT count(*) FROM decision_blocks WHERE status = 'final'")
|
||||
|
||||
await close_pool()
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"✅ סה\"כ בלוקים: {block_count} ({non_empty} עם תוכן)")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user