From 38a61712bc894b7270802d9d338437248f73195a Mon Sep 17 00:00:00 2001 From: Chaim Date: Fri, 3 Apr 2026 10:50:56 +0000 Subject: [PATCH] =?UTF-8?q?Fix=20plan=20regex:=20require=20numeric=20ident?= =?UTF-8?q?ifier=20after=20=D7=AA=D7=91"=D7=A2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously matched any word after תב"ע (e.g., "תב"ע ואין", "תב"ע קיפחה"). Now requires a plan number (digits/hyphens) — reduces false positives from 24 to 4 on the Hecht case test. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../src/legal_mcp/services/references_extractor.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mcp-server/src/legal_mcp/services/references_extractor.py b/mcp-server/src/legal_mcp/services/references_extractor.py index 4f35f3f..f4b0bd2 100644 --- a/mcp-server/src/legal_mcp/services/references_extractor.py +++ b/mcp-server/src/legal_mcp/services/references_extractor.py @@ -23,12 +23,12 @@ logger = logging.getLogger(__name__) PLAN_PATTERNS = [ # תמ"א with number re.compile(r'תמ"א\s*[\-]?\s*(\d+)(?:\s*[\-/]\s*(\S+))?'), - # תכנית מתאר with identifiers + # תכנית מתאר with identifiers (must have a number) re.compile(r'תכנית\s+(?:מתאר\s+)?(?:ארצית|מחוזית|מקומית)?\s*(?:מס[\'"]?\s*)?(\d[\d/\-\.]+\S*)'), - # תב"ע with identifiers - re.compile(r'תב"ע\s*(?:מס[\'"]?\s*)?(\S+)'), - # Specific plan number patterns (e.g., 62/3, ירושלים 12345) - re.compile(r'תכנית\s+(\S+\s*\d[\d/\-\.]+\S*)'), + # תב"ע with plan number (must start with digit or contain hyphen+digits) + re.compile(r'תב"ע\s+(?:מס[\'"]?\s*)?(\d[\d/\-\.]+\S*)'), + # Plan number pattern (e.g., 102-1170893, 62/3) + re.compile(r'תכנית\s+(?:מס[\'"]?\s*)?(\d[\d/\-\.]+\S*)'), ] # Case law (פסיקה)