Merge pull request 'feat(chunker): דפוסי טענות לפסיקת בית-המשפט — parties_claims' (#297) from worktree-chunker-court-patterns into main
This commit was merged in pull request #297.
This commit is contained in:
@@ -22,18 +22,23 @@ from legal_mcp import config
|
|||||||
# court rulings use slightly different vocabulary (פסק דין, נימוקים, סוף דבר).
|
# court rulings use slightly different vocabulary (פסק דין, נימוקים, סוף דבר).
|
||||||
SECTION_PATTERNS = [
|
SECTION_PATTERNS = [
|
||||||
(r"רקע\s*עובדתי|רקע\s*כללי|העובדות|הרקע", "facts"),
|
(r"רקע\s*עובדתי|רקע\s*כללי|העובדות|הרקע", "facts"),
|
||||||
# appellant_claims: covers singular (עורר/עוררת, מערער/מערערת) and plural
|
# parties_claims: bilateral section common in Supreme Court / administrative
|
||||||
# (עוררים/עוררין, מערערים). Previously only plural was matched, so headers
|
# court decisions ("טענות הצדדים", "טיעוני הצדדים"). Not split by side.
|
||||||
# like "טענות העורר:" were silently absorbed into the preceding section.
|
|
||||||
(
|
(
|
||||||
r"(?:טענות|עיקר\s*טענות)\s*ה(?:עוררי[םן]|עורר[ת]?|מערערי[םן]|מערער[ת]?)",
|
r"(?:טענות|טיעוני|עמדות)\s*הצדדים",
|
||||||
|
"parties_claims",
|
||||||
|
),
|
||||||
|
# appellant_claims: covers singular (עורר/עוררת, מערער/מערערת) and plural
|
||||||
|
# (עוררים/עוררין, מערערים), plus court-format verb "טיעוני".
|
||||||
|
(
|
||||||
|
r"(?:טענות|עיקר\s*טענות|טיעוני)\s*ה(?:עוררי[םן]|עורר[ת]?|מערערי[םן]|מערער[ת]?)",
|
||||||
"appellant_claims",
|
"appellant_claims",
|
||||||
),
|
),
|
||||||
# respondent_claims: covers singular (משיב/משיבה) and plural (משיבים/משיבין),
|
# respondent_claims: covers singular (משיב/משיבה) and plural (משיבים/משיבין),
|
||||||
# plus verb forms תשובת/תגובת. "טענות המשיבה:" (feminine singular) was the
|
# plus verb forms תשובת/תגובת/טיעוני. "טענות המשיבה:" (feminine singular) was
|
||||||
# root cause of halacha 8181-21 index-11 being extracted from party claims.
|
# the root cause of halacha 8181-21 index-11 being extracted from party claims.
|
||||||
(
|
(
|
||||||
r"(?:טענות|תשובת|תגובת|עיקר\s*טענות)\s*ה(?:משיבי[םן]|משיב[ה]?)",
|
r"(?:טענות|תשובת|תגובת|עיקר\s*טענות|טיעוני)\s*ה(?:משיבי[םן]|משיב[ה]?)",
|
||||||
"respondent_claims",
|
"respondent_claims",
|
||||||
),
|
),
|
||||||
(r"דיון\s*והכרעה|דיון|הכרעה|ניתוח\s*משפטי|המסגרת\s*המשפטית|נימוקים", "legal_analysis"),
|
(r"דיון\s*והכרעה|דיון|הכרעה|ניתוח\s*משפטי|המסגרת\s*המשפטית|נימוקים", "legal_analysis"),
|
||||||
|
|||||||
@@ -101,7 +101,7 @@ EXTRACTABLE_SECTIONS = ("legal_analysis", "ruling", "conclusion")
|
|||||||
# reasoning that merely landed under 'other' is still reached. Raises precision
|
# reasoning that merely landed under 'other' is still reached. Raises precision
|
||||||
# on the dominant Facts↔Reasoning confusion class (#81.6; INV-LRN2
|
# on the dominant Facts↔Reasoning confusion class (#81.6; INV-LRN2
|
||||||
# quality-at-source; LegalSeg / rhetorical-role labeling).
|
# quality-at-source; LegalSeg / rhetorical-role labeling).
|
||||||
NON_REASONING_SECTIONS = ("facts", "appellant_claims", "respondent_claims", "intro")
|
NON_REASONING_SECTIONS = ("facts", "appellant_claims", "respondent_claims", "parties_claims", "intro")
|
||||||
|
|
||||||
|
|
||||||
# Two prompts — choose by source's is_binding flag.
|
# Two prompts — choose by source's is_binding flag.
|
||||||
|
|||||||
Reference in New Issue
Block a user