feat(chunker): דפוסי טענות לפסיקת בית-המשפט — parties_claims #297

Merged
chaim merged 1 commits from worktree-chunker-court-patterns into main 2026-06-17 17:21:05 +00:00
2 changed files with 13 additions and 8 deletions
Showing only changes of commit 9618dc895b - Show all commits

View File

@@ -22,18 +22,23 @@ from legal_mcp import config
# court rulings use slightly different vocabulary (פסק דין, נימוקים, סוף דבר).
SECTION_PATTERNS = [
(r"רקע\s*עובדתי|רקע\s*כללי|העובדות|הרקע", "facts"),
# appellant_claims: covers singular (עורר/עוררת, מערער/מערערת) and plural
# (עוררים/עוררין, מערערים). Previously only plural was matched, so headers
# like "טענות העורר:" were silently absorbed into the preceding section.
# parties_claims: bilateral section common in Supreme Court / administrative
# court decisions ("טענות הצדדים", "טיעוני הצדדים"). Not split by side.
(
r"(?:טענות|עיקר\s*טענות)\s*ה(?:עוררי[םן]|עורר[ת]?|מערערי[םן]|מערער[ת]?)",
r"(?:טענות|טיעוני|עמדות)\s*הצדדים",
"parties_claims",
),
# appellant_claims: covers singular (עורר/עוררת, מערער/מערערת) and plural
# (עוררים/עוררין, מערערים), plus court-format verb "טיעוני".
(
r"(?:טענות|עיקר\s*טענות|טיעוני)\s*ה(?:עוררי[םן]|עורר[ת]?|מערערי[םן]|מערער[ת]?)",
"appellant_claims",
),
# respondent_claims: covers singular (משיב/משיבה) and plural (משיבים/משיבין),
# plus verb forms תשובת/תגובת. "טענות המשיבה:" (feminine singular) was the
# root cause of halacha 8181-21 index-11 being extracted from party claims.
# plus verb forms תשובת/תגובת/טיעוני. "טענות המשיבה:" (feminine singular) was
# the root cause of halacha 8181-21 index-11 being extracted from party claims.
(
r"(?:טענות|תשובת|תגובת|עיקר\s*טענות)\s*ה(?:משיבי[םן]|משיב[ה]?)",
r"(?:טענות|תשובת|תגובת|עיקר\s*טענות|טיעוני)\s*ה(?:משיבי[םן]|משיב[ה]?)",
"respondent_claims",
),
(r"דיון\s*והכרעה|דיון|הכרעה|ניתוח\s*משפטי|המסגרת\s*המשפטית|נימוקים", "legal_analysis"),

View File

@@ -101,7 +101,7 @@ EXTRACTABLE_SECTIONS = ("legal_analysis", "ruling", "conclusion")
# reasoning that merely landed under 'other' is still reached. Raises precision
# on the dominant Facts↔Reasoning confusion class (#81.6; INV-LRN2
# quality-at-source; LegalSeg / rhetorical-role labeling).
NON_REASONING_SECTIONS = ("facts", "appellant_claims", "respondent_claims", "intro")
NON_REASONING_SECTIONS = ("facts", "appellant_claims", "respondent_claims", "parties_claims", "intro")
# Two prompts — choose by source's is_binding flag.