diff --git a/mcp-server/src/legal_mcp/config.py b/mcp-server/src/legal_mcp/config.py index d3470aa..1049531 100644 --- a/mcp-server/src/legal_mcp/config.py +++ b/mcp-server/src/legal_mcp/config.py @@ -163,6 +163,15 @@ HALACHA_NLI_ENABLED = os.environ.get("HALACHA_NLI_ENABLED", "true").lower() == " HALACHA_NLI_MODEL = os.environ.get("HALACHA_NLI_MODEL", HALACHA_EXTRACT_MODEL) HALACHA_NLI_EFFORT = os.environ.get("HALACHA_NLI_EFFORT", "low") +# Halacha over-extraction consolidation (#81.5) — after a precedent finishes +# extracting, a claude_session pass folds facets of the SAME legal question +# (below the #82 dedup cosine) into one canonical; the rest are marked rejected +# (reversible). Cross-chunk safety net for over-splitting. Runs through the local +# CLI (zero cost); fails OPEN. 'high' effort — folding needs careful judgment. +HALACHA_CONSOLIDATE_ENABLED = os.environ.get("HALACHA_CONSOLIDATE_ENABLED", "true").lower() == "true" +HALACHA_CONSOLIDATE_MODEL = os.environ.get("HALACHA_CONSOLIDATE_MODEL", HALACHA_EXTRACT_MODEL) +HALACHA_CONSOLIDATE_EFFORT = os.environ.get("HALACHA_CONSOLIDATE_EFFORT", "high") + # Google Cloud Vision (OCR for scanned PDFs) GOOGLE_CLOUD_VISION_API_KEY = os.environ.get("GOOGLE_CLOUD_VISION_API_KEY", "") diff --git a/mcp-server/src/legal_mcp/services/halacha_extractor.py b/mcp-server/src/legal_mcp/services/halacha_extractor.py index 6b3d500..f365cd5 100644 --- a/mcp-server/src/legal_mcp/services/halacha_extractor.py +++ b/mcp-server/src/legal_mcp/services/halacha_extractor.py @@ -305,6 +305,71 @@ async def _nli_check(items: list[dict]) -> list[str]: return halacha_quality.parse_nli_verdicts(raw, len(items)) +def _consolidation_priority(r: dict): + """Canonical = the row to KEEP within a fold group (lower sorts first).""" + status_rank = {"approved": 0, "published": 0, "pending_review": 1}.get( + r.get("review_status"), 2) + return ( + status_rank, + -float(r.get("confidence") or 0.0), + 0 if r.get("quote_verified") else 1, + -len(r.get("rule_statement") or ""), + str(r["id"]), + ) + + +async def _consolidate_precedent(case_law_id: UUID) -> int: + """#81.5 — fold facets of the SAME legal question into one canonical. + + Per-precedent claude_session pass (local CLI, zero cost). Keeps the best row + of each fold group; marks the rest ``rejected`` (reversible — out of the + active corpus AND the review queue, but recoverable). FOLD-ONLY. Fails OPEN: + any error / parse failure → 0 folds (never touches data on doubt). + """ + if not config.HALACHA_CONSOLIDATE_ENABLED: + return 0 + try: + rows = [ + r for r in await db.list_halachot(case_law_id=case_law_id, limit=10_000) + if r.get("review_status") != "rejected" + ] + if len(rows) < 2: + return 0 + by_idx = {r["halacha_index"]: r for r in rows} + raw = await claude_session.query_json( + halacha_quality.build_consolidation_prompt(rows), + system=halacha_quality.CONSOLIDATE_SYSTEM, + model=config.HALACHA_CONSOLIDATE_MODEL or None, + effort=config.HALACHA_CONSOLIDATE_EFFORT or None, + ) + groups = halacha_quality.parse_fold_groups(raw) + if not groups: + return 0 + canonicals: set[str] = set() + losers: set[str] = set() + for g in groups: + members = [by_idx[i] for i in g if i in by_idx] + if len(members) < 2: + continue + members.sort(key=_consolidation_priority) + canonicals.add(str(members[0]["id"])) + for m in members[1:]: + losers.add(str(m["id"])) + # Never reject a row that is the canonical of any group. + loser_ids = [i for i in losers if i not in canonicals] + if not loser_ids: + return 0 + return await db.update_halachot_batch( + loser_ids, "rejected", reviewer="auto-consolidated (#81.5 facet-fold)", + ) + except Exception as e: + logger.warning( + "halacha consolidation failed for %s (fail-open, no folds): %s", + case_law_id, e, + ) + return 0 + + async def _extract_chunk( chunk_text: str, section_type: str, @@ -585,7 +650,10 @@ async def _extract_impl(case_law_id: UUID, force: bool = False, return {"status": "partial", "extracted": total, "stored": stored_total, "pending_chunks": still_pending, "total_chunks": len(chunks)} - # All chunks done. + # All chunks done. #81.5: fold cross-chunk facets of one legal question + # (the prompt dedups within a chunk; this catches across chunks). + folded = await _consolidate_precedent(case_law_id) + stored = total verified = sum(1 for h in await db.list_halachot(case_law_id=case_law_id, limit=10_000) if h.get("quote_verified")) @@ -593,13 +661,14 @@ async def _extract_impl(case_law_id: UUID, force: bool = False, logger.info( "halacha_extractor: case_law=%s completed — %d halachot stored " - "(%d new this run), %d quote-verified, %d chunks", - case_law_id, total, stored_total, verified, len(chunks), + "(%d new this run), %d quote-verified, %d folded, %d chunks", + case_law_id, total, stored_total, verified, folded, len(chunks), ) return { "status": "completed", "extracted": total, "verified": verified, + "folded": folded, "stored": stored, "stored_this_run": stored_total, "total_chunks": len(chunks), diff --git a/mcp-server/src/legal_mcp/services/halacha_quality.py b/mcp-server/src/legal_mcp/services/halacha_quality.py index d865ace..92fc906 100644 --- a/mcp-server/src/legal_mcp/services/halacha_quality.py +++ b/mcp-server/src/legal_mcp/services/halacha_quality.py @@ -185,6 +185,66 @@ def parse_nli_verdicts(raw, n: int) -> list[str]: return out +# ── Over-extraction consolidation (fold facets of one legal question) — #81.5 ── +# +# #82 dedup-on-insert removes near-EXACT dups (cosine ≥ 0.93). #81.5 handles the +# remaining over-extraction: facets of the SAME legal question, phrased +# differently, that sit BELOW the dedup threshold (the שפר 14-vs-4 / 403-17→89 +# granularity gap). A per-precedent claude_session pass groups such facets; the +# extractor keeps one canonical per group and marks the rest rejected (reversible, +# out of the active corpus + review queue). FOLD-ONLY — never merges distinct +# legal questions, never invents. Fails OPEN (parse error → no folds). + +CONSOLIDATE_SYSTEM = ( + "אתה מאחד פנים-כפולים של הלכות שחולצו מאותו פסק דין. בהינתן רשימה ממוספרת של הלכות, " + "זהה קבוצות של הלכות שהן **אותה שאלה משפטית** בניסוחים או פנים שונים. " + "כללים: (1) אַחֵד רק הלכות שעונות על אותה שאלה משפטית בדיוק; (2) **אל תאַחֵד** הלכות " + "שעונות על שאלות משפטיות שונות (גם אם קרובות בנושא); (3) הלכה ייחודית — אל תכלול בשום קבוצה. " + 'החזר JSON array של קבוצות, כל קבוצה = array של מספרי-האינדקס שיש לאַחֵד (לפחות 2 חברים). ' + "לדוגמה: [[2,5,9],[14,18]]. אם אין מה לאַחֵד החזר []. ללא markdown, ללא הסבר." +) + + +def build_consolidation_prompt(items: list[dict]) -> str: + """Numbered list of a precedent's halachot (index + rule + reasoning).""" + blocks = [] + for h in items: + idx = h.get("halacha_index") + rule = (h.get("rule_statement") or "").strip() + reason = (h.get("reasoning_summary") or "").strip() + line = f"[{idx}] {rule}" + if reason: + line += f" (היגיון: {reason})" + blocks.append(line) + return "\n".join(blocks) + + +def parse_fold_groups(raw) -> list[list[int]]: + """Coerce judge output into a list of fold-groups (≥2 int indices each). + + Fails SAFE: any malformed shape → [] (no folding). Non-int / <2-member + groups are dropped. + """ + if not isinstance(raw, list): + return [] + groups: list[list[int]] = [] + for g in raw: + if not isinstance(g, list): + continue + members: list[int] = [] + for x in g: + try: + members.append(int(x)) + except (TypeError, ValueError): + continue + # dedup within group, preserve order + seen: set[int] = set() + members = [m for m in members if not (m in seen or seen.add(m))] + if len(members) >= 2: + groups.append(members) + return groups + + def compute_quality_flags( rule_statement: str, supporting_quote: str, diff --git a/mcp-server/tests/test_halacha_quality.py b/mcp-server/tests/test_halacha_quality.py index 7c7acab..7128ae0 100644 --- a/mcp-server/tests/test_halacha_quality.py +++ b/mcp-server/tests/test_halacha_quality.py @@ -146,3 +146,38 @@ def test_nli_check_empty(): import asyncio from legal_mcp.services import halacha_extractor as he assert asyncio.run(he._nli_check([])) == [] + + +# ── #81.5 consolidation — pure prompt + fold-group parser ── + +def test_build_consolidation_prompt(): + items = [ + {"halacha_index": 3, "rule_statement": "כלל גימל", "reasoning_summary": "כי"}, + {"halacha_index": 7, "rule_statement": "כלל זין", "reasoning_summary": ""}, + ] + p = hq.build_consolidation_prompt(items) + assert "[3] כלל גימל" in p and "[7] כלל זין" in p and "היגיון: כי" in p + + +@pytest.mark.parametrize("raw,expected", [ + ([[2, 5, 9], [14, 18]], [[2, 5, 9], [14, 18]]), + ([[2, 5], [7]], [[2, 5]]), # singleton group dropped + ([["2", "5"]], [[2, 5]]), # string ints coerced + ([[2, 2, 5]], [[2, 5]]), # dedup within group + ([], []), # nothing to fold + ("garbage", []), # non-list -> safe + (None, []), # None -> safe + ([[1, "x"], [3, 4]], [[3, 4]]), # drop group that falls below 2 valid +]) +def test_parse_fold_groups(raw, expected): + assert hq.parse_fold_groups(raw) == expected + + +def test_consolidation_priority_prefers_approved_then_confidence(): + from legal_mcp.services import halacha_extractor as he + approved = {"id": "a", "review_status": "approved", "confidence": 0.7, + "quote_verified": True, "rule_statement": "x"} + pending_hi = {"id": "b", "review_status": "pending_review", "confidence": 0.95, + "quote_verified": True, "rule_statement": "x"} + # approved sorts before higher-confidence pending → kept as canonical + assert min([approved, pending_hi], key=he._consolidation_priority)["id"] == "a"