Fix plan regex: require numeric identifier after תב"ע

Previously matched any word after תב"ע (e.g., "תב"ע ואין", "תב"ע קיפחה").
Now requires a plan number (digits/hyphens) — reduces false positives from 24 to 4
on the Hecht case test.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-03 10:50:56 +00:00
parent d9e5ef0f46
commit 38a61712bc

View File

@@ -23,12 +23,12 @@ logger = logging.getLogger(__name__)
PLAN_PATTERNS = [
# תמ"א with number
re.compile(r'תמ"א\s*[\-]?\s*(\d+)(?:\s*[\-/]\s*(\S+))?'),
# תכנית מתאר with identifiers
# תכנית מתאר with identifiers (must have a number)
re.compile(r'תכנית\s+(?:מתאר\s+)?(?:ארצית|מחוזית|מקומית)?\s*(?:מס[\'"]?\s*)?(\d[\d/\-\.]+\S*)'),
# תב"ע with identifiers
re.compile(r'תב"ע\s*(?:מס[\'"]?\s*)?(\S+)'),
# Specific plan number patterns (e.g., 62/3, ירושלים 12345)
re.compile(r'תכנית\s+(\S+\s*\d[\d/\-\.]+\S*)'),
# תב"ע with plan number (must start with digit or contain hyphen+digits)
re.compile(r'תב"ע\s+(?:מס[\'"]?\s*)?(\d[\d/\-\.]+\S*)'),
# Plan number pattern (e.g., 102-1170893, 62/3)
re.compile(r'תכנית\s+(?:מס[\'"]?\s*)?(\d[\d/\-\.]+\S*)'),
]
# Case law (פסיקה)