Merge pull request 'feat(halacha): סינון לפי תפקיד רטורי — fallback מחריג facts/טענות (#81.6)' (#187) from worktree-halacha-rhetorical-prefilter into main
This commit was merged in pull request #187.
This commit is contained in:
@@ -64,6 +64,15 @@ EXTRACTION_FAILURE_THRESHOLD = 0.5
|
|||||||
# never contain holdings, only positions, so we skip them.
|
# never contain holdings, only positions, so we skip them.
|
||||||
EXTRACTABLE_SECTIONS = ("legal_analysis", "ruling", "conclusion")
|
EXTRACTABLE_SECTIONS = ("legal_analysis", "ruling", "conclusion")
|
||||||
|
|
||||||
|
# Sections confidently classified as NON-reasoning (parties' positions, the
|
||||||
|
# factual background, the opening). The fallback path — taken when the chunker
|
||||||
|
# labeled nothing as an extractable section (non-standard headings → 'other') —
|
||||||
|
# excludes these so facts/arguments are NEVER fed into extraction, while
|
||||||
|
# reasoning that merely landed under 'other' is still reached. Raises precision
|
||||||
|
# on the dominant Facts↔Reasoning confusion class (#81.6; INV-LRN2
|
||||||
|
# quality-at-source; LegalSeg / rhetorical-role labeling).
|
||||||
|
NON_REASONING_SECTIONS = ("facts", "appellant_claims", "respondent_claims", "intro")
|
||||||
|
|
||||||
|
|
||||||
# Two prompts — choose by source's is_binding flag.
|
# Two prompts — choose by source's is_binding flag.
|
||||||
#
|
#
|
||||||
@@ -498,6 +507,39 @@ async def extract(case_law_id: UUID | str, force: bool = False,
|
|||||||
await pool.release(lock_conn)
|
await pool.release(lock_conn)
|
||||||
|
|
||||||
|
|
||||||
|
async def _select_extractable_chunks(
|
||||||
|
case_law_id: UUID,
|
||||||
|
) -> tuple[list[dict], bool]:
|
||||||
|
"""Pick the chunks that are candidates for halacha extraction (#81.6).
|
||||||
|
|
||||||
|
Rhetorical-role pre-filter (INV-LRN2 quality-at-source): only
|
||||||
|
reasoning/decision sections feed extraction.
|
||||||
|
|
||||||
|
Primary: chunks labeled as an extractable section
|
||||||
|
(``EXTRACTABLE_SECTIONS``). Fallback — taken only when the chunker labeled
|
||||||
|
*nothing* extractable (non-standard headings collapse everything to
|
||||||
|
'other') — is every chunk EXCEPT those confidently classified as
|
||||||
|
non-reasoning (``NON_REASONING_SECTIONS``: facts / parties' arguments /
|
||||||
|
intro). This preserves recall for reasoning that landed under 'other' while
|
||||||
|
never feeding the factual background or the parties' positions into
|
||||||
|
extraction. Previously the fallback took *all* chunks, re-admitting exactly
|
||||||
|
the sections the primary filter excludes.
|
||||||
|
|
||||||
|
Returns ``(chunks, used_fallback)`` so the caller can log the fallback once.
|
||||||
|
"""
|
||||||
|
chunks = await db.list_precedent_chunks(
|
||||||
|
case_law_id, section_types=EXTRACTABLE_SECTIONS,
|
||||||
|
)
|
||||||
|
if chunks:
|
||||||
|
return chunks, False
|
||||||
|
all_chunks = await db.list_precedent_chunks(case_law_id)
|
||||||
|
filtered = [
|
||||||
|
c for c in all_chunks
|
||||||
|
if c.get("section_type") not in NON_REASONING_SECTIONS
|
||||||
|
]
|
||||||
|
return filtered, True
|
||||||
|
|
||||||
|
|
||||||
async def _extract_impl(case_law_id: UUID, force: bool = False,
|
async def _extract_impl(case_law_id: UUID, force: bool = False,
|
||||||
effort: str | None = None) -> dict:
|
effort: str | None = None) -> dict:
|
||||||
"""Core extraction (caller holds the global advisory lock for the duration).
|
"""Core extraction (caller holds the global advisory lock for the duration).
|
||||||
@@ -514,22 +556,16 @@ async def _extract_impl(case_law_id: UUID, force: bool = False,
|
|||||||
|
|
||||||
is_binding = bool(record.get("is_binding"))
|
is_binding = bool(record.get("is_binding"))
|
||||||
|
|
||||||
# Try the targeted sections first (legal_analysis / ruling / conclusion).
|
# Rhetorical-role pre-filter (#81.6, INV-LRN2): only reasoning/decision
|
||||||
# If the chunker labeled everything as 'other' (common when a ruling
|
# sections are candidates. The fallback (no targeted section labeled)
|
||||||
# uses non-standard headings or the section markers aren't bracketed
|
# still excludes facts/arguments/intro — see _select_extractable_chunks.
|
||||||
# cleanly), fall back to ALL chunks — better to over-include than to
|
chunks, used_fallback = await _select_extractable_chunks(case_law_id)
|
||||||
# silently skip a ruling that has reasoning under an unexpected label.
|
if used_fallback and chunks:
|
||||||
chunks = await db.list_precedent_chunks(
|
logger.info(
|
||||||
case_law_id, section_types=EXTRACTABLE_SECTIONS,
|
"halacha_extractor: case_law=%s — no targeted sections, "
|
||||||
)
|
"falling back to %d non-argument chunks (facts/arguments excluded)",
|
||||||
if not chunks:
|
case_law_id, len(chunks),
|
||||||
chunks = await db.list_precedent_chunks(case_law_id)
|
)
|
||||||
if chunks:
|
|
||||||
logger.info(
|
|
||||||
"halacha_extractor: case_law=%s — no targeted sections, "
|
|
||||||
"falling back to all %d chunks",
|
|
||||||
case_law_id, len(chunks),
|
|
||||||
)
|
|
||||||
if not chunks:
|
if not chunks:
|
||||||
await db.set_case_law_halacha_status(case_law_id, "completed")
|
await db.set_case_law_halacha_status(case_law_id, "completed")
|
||||||
return {"status": "no_chunks", "extracted": 0, "stored": 0}
|
return {"status": "no_chunks", "extracted": 0, "stored": 0}
|
||||||
@@ -655,10 +691,10 @@ async def _extract_impl(case_law_id: UUID, force: bool = False,
|
|||||||
|
|
||||||
await asyncio.gather(*[_process(c) for c in pending])
|
await asyncio.gather(*[_process(c) for c in pending])
|
||||||
|
|
||||||
# Decide final status from what's LEFT (re-read checkpoints).
|
# Decide final status from what's LEFT (re-read checkpoints). Use the same
|
||||||
after = await db.list_precedent_chunks(case_law_id, section_types=EXTRACTABLE_SECTIONS)
|
# candidate-selection policy as above so the pending count matches the set
|
||||||
if not after:
|
# we actually extracted from (G2 — single source of truth, no parallel path).
|
||||||
after = await db.list_precedent_chunks(case_law_id)
|
after, _ = await _select_extractable_chunks(case_law_id)
|
||||||
still_pending = sum(1 for c in after if c.get("halacha_extracted_at") is None)
|
still_pending = sum(1 for c in after if c.get("halacha_extracted_at") is None)
|
||||||
total = len(await db.list_halachot(case_law_id=case_law_id, limit=10_000))
|
total = len(await db.list_halachot(case_law_id=case_law_id, limit=10_000))
|
||||||
|
|
||||||
|
|||||||
137
mcp-server/tests/test_halacha_rhetorical_prefilter.py
Normal file
137
mcp-server/tests/test_halacha_rhetorical_prefilter.py
Normal file
@@ -0,0 +1,137 @@
|
|||||||
|
"""Tests for TaskMaster #81.6 — rhetorical-role pre-filter on halacha extraction.
|
||||||
|
|
||||||
|
Only reasoning/decision sections should feed halacha extraction (INV-LRN2
|
||||||
|
quality-at-source). The historical bug: when the chunker labeled *nothing* as an
|
||||||
|
extractable section (non-standard headings → everything 'other'), the fallback
|
||||||
|
took ALL chunks — re-admitting the factual background and the parties'
|
||||||
|
arguments, exactly the sections the primary filter excludes. The dominant
|
||||||
|
extraction error class is Facts↔Reasoning confusion (LegalSeg), so feeding facts
|
||||||
|
into extraction directly lowers precision.
|
||||||
|
|
||||||
|
Fix: ``_select_extractable_chunks`` — the fallback now excludes
|
||||||
|
``NON_REASONING_SECTIONS`` (facts / appellant_claims / respondent_claims /
|
||||||
|
intro) while still reaching reasoning that merely landed under 'other'.
|
||||||
|
|
||||||
|
Runs fully OFFLINE — monkeypatches ``db.list_precedent_chunks`` so no Postgres
|
||||||
|
is needed (same style as ``test_halacha_reextract_preserves_approved.py``).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
from uuid import uuid4
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from legal_mcp.services import db, halacha_extractor
|
||||||
|
|
||||||
|
|
||||||
|
def _chunk(idx: int, section_type: str) -> dict:
|
||||||
|
return {
|
||||||
|
"id": uuid4(),
|
||||||
|
"chunk_index": idx,
|
||||||
|
"content": f"chunk-{idx}-{section_type}",
|
||||||
|
"section_type": section_type,
|
||||||
|
"page_number": None,
|
||||||
|
"halacha_extracted_at": None,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _patch_chunks(monkeypatch: pytest.MonkeyPatch, all_chunks: list[dict]) -> list[dict]:
|
||||||
|
"""Patch db.list_precedent_chunks to filter ``all_chunks`` like Postgres would.
|
||||||
|
|
||||||
|
Returns a list that records every call's ``section_types`` argument so a
|
||||||
|
test can assert whether the unfiltered fallback query was issued.
|
||||||
|
"""
|
||||||
|
calls: list = []
|
||||||
|
|
||||||
|
async def _fake(case_law_id, section_types=None): # noqa: ANN001
|
||||||
|
calls.append(section_types)
|
||||||
|
if section_types:
|
||||||
|
return [c for c in all_chunks if c["section_type"] in section_types]
|
||||||
|
return list(all_chunks)
|
||||||
|
|
||||||
|
monkeypatch.setattr(db, "list_precedent_chunks", _fake)
|
||||||
|
return calls
|
||||||
|
|
||||||
|
|
||||||
|
def _run(coro):
|
||||||
|
loop = asyncio.new_event_loop()
|
||||||
|
try:
|
||||||
|
return loop.run_until_complete(coro)
|
||||||
|
finally:
|
||||||
|
loop.close()
|
||||||
|
|
||||||
|
|
||||||
|
def test_primary_path_returns_only_reasoning_sections(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||||
|
"""When extractable sections exist, return exactly them — no fallback."""
|
||||||
|
all_chunks = [
|
||||||
|
_chunk(0, "facts"),
|
||||||
|
_chunk(1, "legal_analysis"),
|
||||||
|
_chunk(2, "appellant_claims"),
|
||||||
|
_chunk(3, "ruling"),
|
||||||
|
_chunk(4, "conclusion"),
|
||||||
|
]
|
||||||
|
calls = _patch_chunks(monkeypatch, all_chunks)
|
||||||
|
|
||||||
|
chunks, used_fallback = _run(
|
||||||
|
halacha_extractor._select_extractable_chunks(uuid4()),
|
||||||
|
)
|
||||||
|
|
||||||
|
assert used_fallback is False
|
||||||
|
got = sorted(c["section_type"] for c in chunks)
|
||||||
|
assert got == ["conclusion", "legal_analysis", "ruling"]
|
||||||
|
# Only the filtered query ran — the unfiltered fallback was never issued.
|
||||||
|
assert calls == [halacha_extractor.EXTRACTABLE_SECTIONS]
|
||||||
|
|
||||||
|
|
||||||
|
def test_fallback_excludes_facts_and_arguments(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||||
|
"""No targeted section → fall back, but never to facts/arguments/intro."""
|
||||||
|
all_chunks = [
|
||||||
|
_chunk(0, "intro"),
|
||||||
|
_chunk(1, "facts"),
|
||||||
|
_chunk(2, "appellant_claims"),
|
||||||
|
_chunk(3, "respondent_claims"),
|
||||||
|
_chunk(4, "other"), # reasoning that landed under an unexpected label
|
||||||
|
_chunk(5, "other"),
|
||||||
|
]
|
||||||
|
calls = _patch_chunks(monkeypatch, all_chunks)
|
||||||
|
|
||||||
|
chunks, used_fallback = _run(
|
||||||
|
halacha_extractor._select_extractable_chunks(uuid4()),
|
||||||
|
)
|
||||||
|
|
||||||
|
assert used_fallback is True
|
||||||
|
# Only the 'other' chunks survive — facts / arguments / intro are dropped.
|
||||||
|
assert {c["section_type"] for c in chunks} == {"other"}
|
||||||
|
assert len(chunks) == 2
|
||||||
|
# Both queries ran: the filtered primary (empty), then the unfiltered fallback.
|
||||||
|
assert calls == [halacha_extractor.EXTRACTABLE_SECTIONS, None]
|
||||||
|
|
||||||
|
|
||||||
|
def test_fallback_all_nonreasoning_extracts_nothing(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||||
|
"""A doc that is entirely facts/arguments/intro yields zero candidates —
|
||||||
|
extraction never runs on the factual background."""
|
||||||
|
all_chunks = [
|
||||||
|
_chunk(0, "intro"),
|
||||||
|
_chunk(1, "facts"),
|
||||||
|
_chunk(2, "appellant_claims"),
|
||||||
|
_chunk(3, "respondent_claims"),
|
||||||
|
]
|
||||||
|
_patch_chunks(monkeypatch, all_chunks)
|
||||||
|
|
||||||
|
chunks, used_fallback = _run(
|
||||||
|
halacha_extractor._select_extractable_chunks(uuid4()),
|
||||||
|
)
|
||||||
|
|
||||||
|
assert used_fallback is True
|
||||||
|
assert chunks == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_non_reasoning_set_is_disjoint_from_extractable() -> None:
|
||||||
|
"""The two policy sets must never overlap — a section cannot be both a
|
||||||
|
reasoning candidate and a confidently-excluded one."""
|
||||||
|
assert not (
|
||||||
|
set(halacha_extractor.NON_REASONING_SECTIONS)
|
||||||
|
& set(halacha_extractor.EXTRACTABLE_SECTIONS)
|
||||||
|
)
|
||||||
Reference in New Issue
Block a user