legal-ai/scripts/validate-decision.py

#!/usr/bin/env python3
"""Validate a decision against block-schema rules.

Usage: python validate-decision.py <case_number>

Checks:
1. Neutral background (block-vav) — no party quotes or value words
2. Weight compliance — blocks within expected ranges
3. Structural integrity — all required blocks present
4. Claims coverage — every claim in block-zayin addressed in block-yod
"""

import asyncio
import json
import re
import sys
from pathlib import Path

sys.path.insert(0, str(Path(__file__).parent.parent / "mcp-server" / "src"))

from legal_mcp.services.db import get_pool, init_schema, close_pool


# Value/judgment words that shouldn't appear in neutral background
VALUE_WORDS = [
    "חריג", "חטא", "בעייתי", "מזעזע", "שערורייתי", "מגוחך",
    "נפשע", "פגום", "חמור", "מקומם", "בלתי סביר", "מופרז",
    "מגונה", "פסול", "נלוז", "מטריד",
]

# Party quote indicators
QUOTE_INDICATORS = [
    r"לטענת\s+(העוררי|המשיב|מבקשי)",
    r"לדברי\s+(העוררי|המשיב|מבקשי)",
    r"העורר\s+טוען",
    r"המשיבה\s+טוענת",
    r"לשיטת\s+(העוררי|המשיב)",
]

# Expected weight ranges per block type (for רישוי appeals)
WEIGHT_RANGES_LICENSING = {
    "block-he": (0.5, 5),
    "block-vav": (3, 40),
    "block-zayin": (13, 40),
    "block-chet": (0, 15),
    "block-tet": (0, 15),
    "block-yod": (30, 75),
    "block-yod-alef": (1, 10),
    "block-yod-bet": (0, 2),
}

# Expected weight ranges for היטל השבחה
WEIGHT_RANGES_LEVY = {
    "block-he": (0, 5),
    "block-vav": (2, 20),
    "block-zayin": (15, 40),
    "block-chet": (0, 25),
    "block-tet": (0, 15),
    "block-yod": (25, 75),
    "block-yod-alef": (1, 10),
    "block-yod-bet": (0, 3),
}


def check_neutral_background(content: str) -> list[str]:
    """Check block-vav for neutrality violations."""
    issues = []
    if not content:
        return issues

    lines = content.split("\n")
    for i, line in enumerate(lines):
        # Check value words
        for word in VALUE_WORDS:
            if word in line:
                issues.append(f"מילת שיפוט ברקע (שורה {i+1}): \"{word}\" — \"{line[:80]}...\"")

        # Check party quotes
        for pattern in QUOTE_INDICATORS:
            if re.search(pattern, line):
                match = re.search(pattern, line).group()
                issues.append(f"ציטוט מצד ברקע (שורה {i+1}): \"{match}\" — \"{line[:80]}...\"")

    return issues


def check_weight_compliance(blocks: list[dict], appeal_type: str) -> list[str]:
    """Check block weights are within expected ranges."""
    issues = []
    ranges = WEIGHT_RANGES_LEVY if appeal_type == "levy" else WEIGHT_RANGES_LICENSING

    total_words = sum(b["word_count"] for b in blocks)
    if total_words == 0:
        return ["אין תוכן בהחלטה"]

    for block in blocks:
        bid = block["block_id"]
        if bid in ranges and block["word_count"] > 0:
            weight = block["word_count"] / total_words * 100
            low, high = ranges[bid]
            if weight < low:
                issues.append(f"בלוק {bid} ({block['title']}): משקל {weight:.1f}% — מתחת לטווח ({low}-{high}%)")
            elif weight > high:
                issues.append(f"בלוק {bid} ({block['title']}): משקל {weight:.1f}% — מעל לטווח ({low}-{high}%)")

    return issues


def check_structural_integrity(blocks: list[dict]) -> list[str]:
    """Check all required blocks are present."""
    issues = []
    required = ["block-he", "block-zayin", "block-yod"]
    block_ids = {b["block_id"] for b in blocks if b["word_count"] > 0}

    for req in required:
        if req not in block_ids:
            issues.append(f"בלוק חובה חסר: {req}")

    # Check discussion is the heaviest block
    yod = next((b for b in blocks if b["block_id"] == "block-yod"), None)
    if yod:
        max_block = max((b for b in blocks if b["block_id"] not in ("block-alef", "block-bet", "block-gimel", "block-dalet")),
                       key=lambda x: x["word_count"], default=None)
        if max_block and max_block["block_id"] != "block-yod":
            issues.append(f"בלוק הדיון (י) אינו הבלוק הגדול ביותר — {max_block['title']} ({max_block['word_count']} מילים) גדול יותר")

    return issues


def check_no_duplication(vav_content: str, yod_content: str) -> list[str]:
    """Check block-yod doesn't repeat block-vav content."""
    issues = []
    if not vav_content or not yod_content:
        return issues

    # Find sentences from background that appear verbatim in discussion
    vav_sentences = [s.strip() for s in re.split(r'[.!?]', vav_content) if len(s.strip()) > 30]
    for sent in vav_sentences:
        if sent in yod_content:
            issues.append(f"כפילות: משפט מהרקע חוזר בדיון — \"{sent[:60]}...\"")

    return issues


async def main():
    if len(sys.argv) < 2:
        print("שימוש: python validate-decision.py <מספר_תיק>")
        sys.exit(1)

    case_number = sys.argv[1]
    await init_schema()
    pool = await get_pool()

    async with pool.acquire() as conn:
        case = await conn.fetchrow(
            "SELECT * FROM cases WHERE case_number = $1", case_number
        )
        if not case:
            print(f"תיק {case_number} לא נמצא")
            sys.exit(1)

        decision = await conn.fetchrow(
            "SELECT * FROM decisions WHERE case_id = $1",
            case["id"],
        )
        if not decision:
            print(f"אין החלטה לתיק {case_number}")
            sys.exit(1)

        blocks = await conn.fetch(
            """SELECT block_id, title, content, word_count, weight_percent
               FROM decision_blocks WHERE decision_id = $1
               ORDER BY block_index""",
            decision["id"],
        )
        blocks = [dict(b) for b in blocks]

        claims_count = await conn.fetchval(
            "SELECT count(*) FROM claims WHERE case_id = $1", case["id"]
        )

    await close_pool()

    # Determine appeal type
    num = case_number.split("/")[0].split("+")[0].split("-")[0]
    if num.startswith("8"):
        appeal_type = "levy"
        appeal_type_heb = "היטל השבחה"
    elif num.startswith("9"):
        appeal_type = "compensation"
        appeal_type_heb = "פיצויים"
    else:
        appeal_type = "licensing"
        appeal_type_heb = "רישוי ובנייה"

    print(f"{'='*60}")
    print(f"ולידציה: {case_number} — {case['title']}")
    print(f"סוג: {appeal_type_heb} | מילים: {decision['total_words']} | טענות: {claims_count}")
    print(f"{'='*60}")

    all_issues = []

    # 1. Neutral background
    vav = next((b for b in blocks if b["block_id"] == "block-vav"), None)
    issues = check_neutral_background(vav["content"] if vav else "")
    if issues:
        print(f"\n❌ רקע ניטרלי — {len(issues)} בעיות:")
        for i in issues:
            print(f"  • {i}")
        all_issues.extend(issues)
    else:
        print("\n✅ רקע ניטרלי — תקין")

    # 2. Weight compliance
    issues = check_weight_compliance(blocks, appeal_type)
    if issues:
        print(f"\n⚠ משקלות — {len(issues)} חריגות:")
        for i in issues:
            print(f"  • {i}")
        all_issues.extend(issues)
    else:
        print("\n✅ משקלות — בטווח")

    # 3. Structural integrity
    issues = check_structural_integrity(blocks)
    if issues:
        print(f"\n❌ מבנה — {len(issues)} בעיות:")
        for i in issues:
            print(f"  • {i}")
        all_issues.extend(issues)
    else:
        print("\n✅ מבנה — תקין")

    # 4. No duplication
    yod = next((b for b in blocks if b["block_id"] == "block-yod"), None)
    issues = check_no_duplication(
        vav["content"] if vav else "",
        yod["content"] if yod else "",
    )
    if issues:
        print(f"\n⚠ כפילויות — {len(issues)} נמצאו:")
        for i in issues:
            print(f"  • {i}")
        all_issues.extend(issues)
    else:
        print("\n✅ ללא כפילויות — תקין")

    # Summary
    print(f"\n{'='*60}")
    if all_issues:
        print(f"סה\"כ: {len(all_issues)} בעיות נמצאו")
    else:
        print("✅ ההחלטה עומדת בכל הכללים")


if __name__ == "__main__":
    asyncio.run(main())