Merge pull request 'fix(nevo): strip preamble/mini-ratio from court rulings too (#86.1)' (#56) from fix/nevo-preamble-court-rulings into main
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m35s
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m35s
This commit was merged in pull request #56.
This commit is contained in:
@@ -358,8 +358,16 @@ def render_pages_for_multimodal(
|
|||||||
_NEVO_MARKERS = ("ספרות:", "חקיקה שאוזכרה:", "מיני-רציו:", "פסקי דין שאוזכרו:",
|
_NEVO_MARKERS = ("ספרות:", "חקיקה שאוזכרה:", "מיני-רציו:", "פסקי דין שאוזכרו:",
|
||||||
"כתבי עת:", "הועתק מנבו")
|
"כתבי עת:", "הועתק מנבו")
|
||||||
|
|
||||||
|
# Markers for where the actual decision body begins (everything before is Nevo
|
||||||
|
# preamble: bibliography + מיני-רציו). Two families:
|
||||||
|
# - ועדת ערר / district openings (בפנינו / הערר שבנדון / ...)
|
||||||
|
# - COURT-RULING openings (#86.1): a פסק-דין header or the authoring judge's
|
||||||
|
# line ("השופט/ת X:", "כב' השופט", "הנשיא"). Without these, Nevo court
|
||||||
|
# judgments — exactly the ones carrying a מיני-רציו — slipped through unstripped
|
||||||
|
# (e.g. בג"ץ 1764/05), risking that the extractor reads Nevo's answer key.
|
||||||
_DECISION_START = re.compile(
|
_DECISION_START = re.compile(
|
||||||
r"^(בפנינו|לפנינו|הערר שבנדון|ועדת הערר לתכנון|רקע עובדתי|עסקינן)",
|
r"^(בפנינו|לפנינו|לפניי|הערר שבנדון|ועדת הערר לתכנון|רקע עובדתי|עסקינן|"
|
||||||
|
r"פסק[- ]דין|פסק[- ]דינו|כב(?:וד)?['׳]?\s*השופט|המשנה לנשיא|הנשיא|השופט)",
|
||||||
re.MULTILINE,
|
re.MULTILINE,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -369,7 +377,9 @@ def strip_nevo_preamble(text: str) -> str:
|
|||||||
|
|
||||||
Returns the original text unchanged if no preamble is detected.
|
Returns the original text unchanged if no preamble is detected.
|
||||||
"""
|
"""
|
||||||
head = text[:400]
|
# Window wide enough to catch the Nevo markers even when a long court/parties
|
||||||
|
# header precedes them (court rulings push חקיקה שאוזכרה:/מיני-רציו: down).
|
||||||
|
head = text[:1500]
|
||||||
if not any(marker in head for marker in _NEVO_MARKERS):
|
if not any(marker in head for marker in _NEVO_MARKERS):
|
||||||
return text
|
return text
|
||||||
m = _DECISION_START.search(text)
|
m = _DECISION_START.search(text)
|
||||||
|
|||||||
57
mcp-server/tests/test_nevo_preamble.py
Normal file
57
mcp-server/tests/test_nevo_preamble.py
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from legal_mcp.services import extractor as ex
|
||||||
|
|
||||||
|
# Nevo preamble block shared by the Nevo-sourced cases.
|
||||||
|
_PREAMBLE = (
|
||||||
|
"חקיקה שאוזכרה:\n"
|
||||||
|
"חוק התכנון והבניה, תשכ\"ה-1965: סע' 197\n\n"
|
||||||
|
"מיני-רציו:\n"
|
||||||
|
"* העותרים לא הוכיחו טעם מיוחד.\n"
|
||||||
|
"ביהמ\"ש העליון דחה את העתירה בקובעו:\n"
|
||||||
|
"המחוקק הגביל את הזמן ל-3 שנים.\n\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_strips_court_ruling_judge_opening():
|
||||||
|
# #86.1: court rulings open with the authoring judge — previously NOT stripped.
|
||||||
|
text = _PREAMBLE + "השופט ס' ג'ובראן:\n\nהאם קיימים טעמים מיוחדים..."
|
||||||
|
out = ex.strip_nevo_preamble(text)
|
||||||
|
assert out.startswith("השופט ס' ג'ובראן:")
|
||||||
|
assert "מיני-רציו" not in out
|
||||||
|
assert "דחה את העתירה בקובעו" not in out
|
||||||
|
|
||||||
|
|
||||||
|
def test_strips_court_ruling_pdin_header():
|
||||||
|
text = _PREAMBLE + "פסק-דין\n\nלפנינו עתירה..."
|
||||||
|
out = ex.strip_nevo_preamble(text)
|
||||||
|
assert out.startswith("פסק-דין")
|
||||||
|
assert "מיני-רציו" not in out
|
||||||
|
|
||||||
|
|
||||||
|
def test_strips_vaada_opening_regression():
|
||||||
|
# existing behaviour must keep working
|
||||||
|
text = _PREAMBLE + "בפנינו ערר על החלטת הוועדה המקומית..."
|
||||||
|
out = ex.strip_nevo_preamble(text)
|
||||||
|
assert out.startswith("בפנינו ערר")
|
||||||
|
assert "מיני-רציו" not in out
|
||||||
|
|
||||||
|
|
||||||
|
def test_non_nevo_unchanged():
|
||||||
|
# no Nevo markers → returned as-is even though it has a judge line
|
||||||
|
text = "פסק דין\nהשופט כהן: בעניין שלפנינו..."
|
||||||
|
assert ex.strip_nevo_preamble(text) == text
|
||||||
|
|
||||||
|
|
||||||
|
def test_nevo_markers_but_no_body_start_unchanged():
|
||||||
|
# markers present but nothing that looks like a decision body → leave intact
|
||||||
|
text = "מיני-רציו:\n* תקציר בלבד ללא גוף החלטה\n"
|
||||||
|
assert ex.strip_nevo_preamble(text) == text
|
||||||
|
|
||||||
|
|
||||||
|
def test_markers_past_400_chars_still_detected():
|
||||||
|
# a long court/parties header pushes the markers past the old 400-char window
|
||||||
|
header = "בבית המשפט העליון " + ("x " * 200) + "\n" # ~600 chars
|
||||||
|
text = header + _PREAMBLE + "השופטת ע' ארבל:\n\nגוף ההחלטה..."
|
||||||
|
out = ex.strip_nevo_preamble(text)
|
||||||
|
assert out.startswith("השופטת ע' ארבל:")
|
||||||
Reference in New Issue
Block a user