feat(halacha): rhetorical-role pre-filter — fallback excludes facts/arguments (#81.6)
All checks were successful
G12 Leak-Guard / leak-guard (pull_request) Successful in 5s
All checks were successful
G12 Leak-Guard / leak-guard (pull_request) Successful in 5s
חילוץ-הלכות מוגבל למקטעי הנמקה/הכרעה בלבד (INV-LRN2 quality-at-source). הפער שנסגר: מסלול ה-fallback (כשה-chunker לא תייג שום מקטע כ-extractable, כותרות לא-תקניות → הכול 'other') נפל קודם ל**כל** ה-chunks — והחזיר בדיוק את המקטעים שהמסנן הראשי מחריג (רקע עובדתי + טענות הצדדים). בלבול Facts↔Reasoning הוא מחלקת-השגיאה הדומיננטית (LegalSeg), כך שהזנת עובדות לחילוץ פוגעת ישירות ב-precision. - NON_REASONING_SECTIONS = (facts, appellant_claims, respondent_claims, intro) - _select_extractable_chunks(): מרכז את מדיניות-הבחירה (primary + fallback) בפונקציה אחת המשמשת גם את הבחירה הראשית וגם את ה-re-read לקביעת-סטטוס (G2 — מקור-אמת יחיד, אין מסלול מקביל). ה-fallback מחריג את NON_REASONING_SECTIONS ועדיין מגיע להנמקה שנחתה תחת 'other'. invariants: G1 (נרמול-במקור, לא תיקון-בקריאה) · G2 (אין מסלול מקביל) · INV-LRN2 (quality-at-source). tests: 4 חדשות (primary/fallback-excludes-args/all-nonreasoning/disjoint-sets) + 61 בדיקות-הלכה קיימות עוברות. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
137
mcp-server/tests/test_halacha_rhetorical_prefilter.py
Normal file
137
mcp-server/tests/test_halacha_rhetorical_prefilter.py
Normal file
@@ -0,0 +1,137 @@
|
||||
"""Tests for TaskMaster #81.6 — rhetorical-role pre-filter on halacha extraction.
|
||||
|
||||
Only reasoning/decision sections should feed halacha extraction (INV-LRN2
|
||||
quality-at-source). The historical bug: when the chunker labeled *nothing* as an
|
||||
extractable section (non-standard headings → everything 'other'), the fallback
|
||||
took ALL chunks — re-admitting the factual background and the parties'
|
||||
arguments, exactly the sections the primary filter excludes. The dominant
|
||||
extraction error class is Facts↔Reasoning confusion (LegalSeg), so feeding facts
|
||||
into extraction directly lowers precision.
|
||||
|
||||
Fix: ``_select_extractable_chunks`` — the fallback now excludes
|
||||
``NON_REASONING_SECTIONS`` (facts / appellant_claims / respondent_claims /
|
||||
intro) while still reaching reasoning that merely landed under 'other'.
|
||||
|
||||
Runs fully OFFLINE — monkeypatches ``db.list_precedent_chunks`` so no Postgres
|
||||
is needed (same style as ``test_halacha_reextract_preserves_approved.py``).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from uuid import uuid4
|
||||
|
||||
import pytest
|
||||
|
||||
from legal_mcp.services import db, halacha_extractor
|
||||
|
||||
|
||||
def _chunk(idx: int, section_type: str) -> dict:
|
||||
return {
|
||||
"id": uuid4(),
|
||||
"chunk_index": idx,
|
||||
"content": f"chunk-{idx}-{section_type}",
|
||||
"section_type": section_type,
|
||||
"page_number": None,
|
||||
"halacha_extracted_at": None,
|
||||
}
|
||||
|
||||
|
||||
def _patch_chunks(monkeypatch: pytest.MonkeyPatch, all_chunks: list[dict]) -> list[dict]:
|
||||
"""Patch db.list_precedent_chunks to filter ``all_chunks`` like Postgres would.
|
||||
|
||||
Returns a list that records every call's ``section_types`` argument so a
|
||||
test can assert whether the unfiltered fallback query was issued.
|
||||
"""
|
||||
calls: list = []
|
||||
|
||||
async def _fake(case_law_id, section_types=None): # noqa: ANN001
|
||||
calls.append(section_types)
|
||||
if section_types:
|
||||
return [c for c in all_chunks if c["section_type"] in section_types]
|
||||
return list(all_chunks)
|
||||
|
||||
monkeypatch.setattr(db, "list_precedent_chunks", _fake)
|
||||
return calls
|
||||
|
||||
|
||||
def _run(coro):
|
||||
loop = asyncio.new_event_loop()
|
||||
try:
|
||||
return loop.run_until_complete(coro)
|
||||
finally:
|
||||
loop.close()
|
||||
|
||||
|
||||
def test_primary_path_returns_only_reasoning_sections(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""When extractable sections exist, return exactly them — no fallback."""
|
||||
all_chunks = [
|
||||
_chunk(0, "facts"),
|
||||
_chunk(1, "legal_analysis"),
|
||||
_chunk(2, "appellant_claims"),
|
||||
_chunk(3, "ruling"),
|
||||
_chunk(4, "conclusion"),
|
||||
]
|
||||
calls = _patch_chunks(monkeypatch, all_chunks)
|
||||
|
||||
chunks, used_fallback = _run(
|
||||
halacha_extractor._select_extractable_chunks(uuid4()),
|
||||
)
|
||||
|
||||
assert used_fallback is False
|
||||
got = sorted(c["section_type"] for c in chunks)
|
||||
assert got == ["conclusion", "legal_analysis", "ruling"]
|
||||
# Only the filtered query ran — the unfiltered fallback was never issued.
|
||||
assert calls == [halacha_extractor.EXTRACTABLE_SECTIONS]
|
||||
|
||||
|
||||
def test_fallback_excludes_facts_and_arguments(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""No targeted section → fall back, but never to facts/arguments/intro."""
|
||||
all_chunks = [
|
||||
_chunk(0, "intro"),
|
||||
_chunk(1, "facts"),
|
||||
_chunk(2, "appellant_claims"),
|
||||
_chunk(3, "respondent_claims"),
|
||||
_chunk(4, "other"), # reasoning that landed under an unexpected label
|
||||
_chunk(5, "other"),
|
||||
]
|
||||
calls = _patch_chunks(monkeypatch, all_chunks)
|
||||
|
||||
chunks, used_fallback = _run(
|
||||
halacha_extractor._select_extractable_chunks(uuid4()),
|
||||
)
|
||||
|
||||
assert used_fallback is True
|
||||
# Only the 'other' chunks survive — facts / arguments / intro are dropped.
|
||||
assert {c["section_type"] for c in chunks} == {"other"}
|
||||
assert len(chunks) == 2
|
||||
# Both queries ran: the filtered primary (empty), then the unfiltered fallback.
|
||||
assert calls == [halacha_extractor.EXTRACTABLE_SECTIONS, None]
|
||||
|
||||
|
||||
def test_fallback_all_nonreasoning_extracts_nothing(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""A doc that is entirely facts/arguments/intro yields zero candidates —
|
||||
extraction never runs on the factual background."""
|
||||
all_chunks = [
|
||||
_chunk(0, "intro"),
|
||||
_chunk(1, "facts"),
|
||||
_chunk(2, "appellant_claims"),
|
||||
_chunk(3, "respondent_claims"),
|
||||
]
|
||||
_patch_chunks(monkeypatch, all_chunks)
|
||||
|
||||
chunks, used_fallback = _run(
|
||||
halacha_extractor._select_extractable_chunks(uuid4()),
|
||||
)
|
||||
|
||||
assert used_fallback is True
|
||||
assert chunks == []
|
||||
|
||||
|
||||
def test_non_reasoning_set_is_disjoint_from_extractable() -> None:
|
||||
"""The two policy sets must never overlap — a section cannot be both a
|
||||
reasoning candidate and a confidently-excluded one."""
|
||||
assert not (
|
||||
set(halacha_extractor.NON_REASONING_SECTIONS)
|
||||
& set(halacha_extractor.EXTRACTABLE_SECTIONS)
|
||||
)
|
||||
Reference in New Issue
Block a user