2026-06-11 15:52:53 +00:00
2 changed files with 193 additions and 20 deletions
--- a/mcp-server/src/legal_mcp/services/halacha_extractor.py
+++ b/mcp-server/src/legal_mcp/services/halacha_extractor.py
@@ -64,6 +64,15 @@ EXTRACTION_FAILURE_THRESHOLD = 0.5
 # never contain holdings, only positions, so we skip them.
 EXTRACTABLE_SECTIONS = ("legal_analysis", "ruling", "conclusion")
 # Sections confidently classified as NON-reasoning (parties' positions, the
 # factual background, the opening). The fallback path — taken when the chunker
 # labeled nothing as an extractable section (non-standard headings → 'other') —
 # excludes these so facts/arguments are NEVER fed into extraction, while
 # reasoning that merely landed under 'other' is still reached. Raises precision
 # on the dominant Facts↔Reasoning confusion class (#81.6; INV-LRN2
 # quality-at-source; LegalSeg / rhetorical-role labeling).
 NON_REASONING_SECTIONS = ("facts", "appellant_claims", "respondent_claims", "intro")
 # Two prompts — choose by source's is_binding flag.
 #
@@ -498,6 +507,39 @@ async def extract(case_law_id: UUID | str, force: bool = False,
        await pool.release(lock_conn)
 async def _select_extractable_chunks(
    case_law_id: UUID,
 ) -> tuple[list[dict], bool]:
    """Pick the chunks that are candidates for halacha extraction (#81.6).
    Rhetorical-role pre-filter (INV-LRN2 quality-at-source): only
    reasoning/decision sections feed extraction.
    Primary: chunks labeled as an extractable section
    (``EXTRACTABLE_SECTIONS``). Fallback — taken only when the chunker labeled
    *nothing* extractable (non-standard headings collapse everything to
    'other') — is every chunk EXCEPT those confidently classified as
    non-reasoning (``NON_REASONING_SECTIONS``: facts / parties' arguments /
    intro). This preserves recall for reasoning that landed under 'other' while
    never feeding the factual background or the parties' positions into
    extraction. Previously the fallback took *all* chunks, re-admitting exactly
    the sections the primary filter excludes.
    Returns ``(chunks, used_fallback)`` so the caller can log the fallback once.
    """
    chunks = await db.list_precedent_chunks(
        case_law_id, section_types=EXTRACTABLE_SECTIONS,
    )
    if chunks:
        return chunks, False
    all_chunks = await db.list_precedent_chunks(case_law_id)
    filtered = [
        c for c in all_chunks
        if c.get("section_type") not in NON_REASONING_SECTIONS
    ]
    return filtered, True
 async def _extract_impl(case_law_id: UUID, force: bool = False,
                        effort: str | None = None) -> dict:
    """Core extraction (caller holds the global advisory lock for the duration).
@@ -514,20 +556,14 @@ async def _extract_impl(case_law_id: UUID, force: bool = False,
    is_binding = bool(record.get("is_binding"))
-    # Try the targeted sections first (legal_analysis / ruling / conclusion).
+    # Rhetorical-role pre-filter (#81.6, INV-LRN2): only reasoning/decision
-    # If the chunker labeled everything as 'other' (common when a ruling
+    # sections are candidates. The fallback (no targeted section labeled)
-    # uses non-standard headings or the section markers aren't bracketed
+    # still excludes facts/arguments/intro — see _select_extractable_chunks.
-    # cleanly), fall back to ALL chunks — better to over-include than to
+    chunks, used_fallback = await _select_extractable_chunks(case_law_id)
-    # silently skip a ruling that has reasoning under an unexpected label.
+    if used_fallback and chunks:
    chunks = await db.list_precedent_chunks(
        case_law_id, section_types=EXTRACTABLE_SECTIONS,
    )
    if not chunks:
        chunks = await db.list_precedent_chunks(case_law_id)
        if chunks:
        logger.info(
            "halacha_extractor: case_law=%s — no targeted sections, "
-                "falling back to all %d chunks",
+            "falling back to %d non-argument chunks (facts/arguments excluded)",
            case_law_id, len(chunks),
        )
    if not chunks:
@@ -655,10 +691,10 @@ async def _extract_impl(case_law_id: UUID, force: bool = False,
    await asyncio.gather(*[_process(c) for c in pending])
-    # Decide final status from what's LEFT (re-read checkpoints).
+    # Decide final status from what's LEFT (re-read checkpoints). Use the same
-    after = await db.list_precedent_chunks(case_law_id, section_types=EXTRACTABLE_SECTIONS)
+    # candidate-selection policy as above so the pending count matches the set
-    if not after:
+    # we actually extracted from (G2 — single source of truth, no parallel path).
-        after = await db.list_precedent_chunks(case_law_id)
+    after, _ = await _select_extractable_chunks(case_law_id)
    still_pending = sum(1 for c in after if c.get("halacha_extracted_at") is None)
    total = len(await db.list_halachot(case_law_id=case_law_id, limit=10_000))
--- a/mcp-server/tests/test_halacha_rhetorical_prefilter.py
+++ b/mcp-server/tests/test_halacha_rhetorical_prefilter.py
@@ -0,0 +1,137 @@
 """Tests for TaskMaster #81.6 — rhetorical-role pre-filter on halacha extraction.
 Only reasoning/decision sections should feed halacha extraction (INV-LRN2
 quality-at-source). The historical bug: when the chunker labeled *nothing* as an
 extractable section (non-standard headings → everything 'other'), the fallback
 took ALL chunks — re-admitting the factual background and the parties'
 arguments, exactly the sections the primary filter excludes. The dominant
 extraction error class is Facts↔Reasoning confusion (LegalSeg), so feeding facts
 into extraction directly lowers precision.
 Fix: ``_select_extractable_chunks`` — the fallback now excludes
 ``NON_REASONING_SECTIONS`` (facts / appellant_claims / respondent_claims /
 intro) while still reaching reasoning that merely landed under 'other'.
 Runs fully OFFLINE — monkeypatches ``db.list_precedent_chunks`` so no Postgres
 is needed (same style as ``test_halacha_reextract_preserves_approved.py``).
 """
 from __future__ import annotations
 import asyncio
 from uuid import uuid4
 import pytest
 from legal_mcp.services import db, halacha_extractor
 def _chunk(idx: int, section_type: str) -> dict:
    return {
        "id": uuid4(),
        "chunk_index": idx,
        "content": f"chunk-{idx}-{section_type}",
        "section_type": section_type,
        "page_number": None,
        "halacha_extracted_at": None,
    }
 def _patch_chunks(monkeypatch: pytest.MonkeyPatch, all_chunks: list[dict]) -> list[dict]:
    """Patch db.list_precedent_chunks to filter ``all_chunks`` like Postgres would.
    Returns a list that records every call's ``section_types`` argument so a
    test can assert whether the unfiltered fallback query was issued.
    """
    calls: list = []
    async def _fake(case_law_id, section_types=None):  # noqa: ANN001
        calls.append(section_types)
        if section_types:
            return [c for c in all_chunks if c["section_type"] in section_types]
        return list(all_chunks)
    monkeypatch.setattr(db, "list_precedent_chunks", _fake)
    return calls
 def _run(coro):
    loop = asyncio.new_event_loop()
    try:
        return loop.run_until_complete(coro)
    finally:
        loop.close()
 def test_primary_path_returns_only_reasoning_sections(monkeypatch: pytest.MonkeyPatch) -> None:
    """When extractable sections exist, return exactly them — no fallback."""
    all_chunks = [
        _chunk(0, "facts"),
        _chunk(1, "legal_analysis"),
        _chunk(2, "appellant_claims"),
        _chunk(3, "ruling"),
        _chunk(4, "conclusion"),
    ]
    calls = _patch_chunks(monkeypatch, all_chunks)
    chunks, used_fallback = _run(
        halacha_extractor._select_extractable_chunks(uuid4()),
    )
    assert used_fallback is False
    got = sorted(c["section_type"] for c in chunks)
    assert got == ["conclusion", "legal_analysis", "ruling"]
    # Only the filtered query ran — the unfiltered fallback was never issued.
    assert calls == [halacha_extractor.EXTRACTABLE_SECTIONS]
 def test_fallback_excludes_facts_and_arguments(monkeypatch: pytest.MonkeyPatch) -> None:
    """No targeted section → fall back, but never to facts/arguments/intro."""
    all_chunks = [
        _chunk(0, "intro"),
        _chunk(1, "facts"),
        _chunk(2, "appellant_claims"),
        _chunk(3, "respondent_claims"),
        _chunk(4, "other"),   # reasoning that landed under an unexpected label
        _chunk(5, "other"),
    ]
    calls = _patch_chunks(monkeypatch, all_chunks)
    chunks, used_fallback = _run(
        halacha_extractor._select_extractable_chunks(uuid4()),
    )
    assert used_fallback is True
    # Only the 'other' chunks survive — facts / arguments / intro are dropped.
    assert {c["section_type"] for c in chunks} == {"other"}
    assert len(chunks) == 2
    # Both queries ran: the filtered primary (empty), then the unfiltered fallback.
    assert calls == [halacha_extractor.EXTRACTABLE_SECTIONS, None]
 def test_fallback_all_nonreasoning_extracts_nothing(monkeypatch: pytest.MonkeyPatch) -> None:
    """A doc that is entirely facts/arguments/intro yields zero candidates —
    extraction never runs on the factual background."""
    all_chunks = [
        _chunk(0, "intro"),
        _chunk(1, "facts"),
        _chunk(2, "appellant_claims"),
        _chunk(3, "respondent_claims"),
    ]
    _patch_chunks(monkeypatch, all_chunks)
    chunks, used_fallback = _run(
        halacha_extractor._select_extractable_chunks(uuid4()),
    )
    assert used_fallback is True
    assert chunks == []
 def test_non_reasoning_set_is_disjoint_from_extractable() -> None:
    """The two policy sets must never overlap — a section cannot be both a
    reasoning candidate and a confidently-excluded one."""
    assert not (
        set(halacha_extractor.NON_REASONING_SECTIONS)
        & set(halacha_extractor.EXTRACTABLE_SECTIONS)
    )