feat(halacha): over-extraction consolidation — fold facets via claude_session (#81.5)
After a precedent finishes extracting, a claude_session pass folds facets of the SAME legal question (below #82's dedup cosine — the שפר 14-vs-4 / 403-17→89 granularity gap) into one canonical; the rest are marked 'rejected' (reversible: out of the active corpus AND the review queue, but recoverable). FOLD-ONLY — never merges distinct legal questions, never invents. - Engine: claude_session-as-judge (local CLI, zero cost), 'high' effort — folding needs careful judgment. One pass per precedent, runs in _extract_impl once all chunks are done (the prompt dedups within a chunk; this catches across chunks). - Pure, unit-tested helpers in halacha_quality: CONSOLIDATE_SYSTEM, build_consolidation_prompt, parse_fold_groups (fails SAFE → [] on any malformed shape; drops <2-member groups; coerces/dedups indices). - halacha_extractor._consolidate_precedent picks the canonical per group (approved>pending, higher confidence, quote_verified, longer) and rejects the rest via the existing update_halachot_batch (#84). Never rejects a canonical. Fails OPEN on any error (no CLI / parse fail → 0 folds, data untouched). - config: HALACHA_CONSOLIDATE_ENABLED/MODEL/EFFORT. Verified: suite 176 passed (10 new); integration vs dev DB — a 2-facet group folds to 1 canonical + 1 rejected (tagged), distinct rules untouched, claude error → 0 folds (fail-open). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -163,6 +163,15 @@ HALACHA_NLI_ENABLED = os.environ.get("HALACHA_NLI_ENABLED", "true").lower() == "
|
|||||||
HALACHA_NLI_MODEL = os.environ.get("HALACHA_NLI_MODEL", HALACHA_EXTRACT_MODEL)
|
HALACHA_NLI_MODEL = os.environ.get("HALACHA_NLI_MODEL", HALACHA_EXTRACT_MODEL)
|
||||||
HALACHA_NLI_EFFORT = os.environ.get("HALACHA_NLI_EFFORT", "low")
|
HALACHA_NLI_EFFORT = os.environ.get("HALACHA_NLI_EFFORT", "low")
|
||||||
|
|
||||||
|
# Halacha over-extraction consolidation (#81.5) — after a precedent finishes
|
||||||
|
# extracting, a claude_session pass folds facets of the SAME legal question
|
||||||
|
# (below the #82 dedup cosine) into one canonical; the rest are marked rejected
|
||||||
|
# (reversible). Cross-chunk safety net for over-splitting. Runs through the local
|
||||||
|
# CLI (zero cost); fails OPEN. 'high' effort — folding needs careful judgment.
|
||||||
|
HALACHA_CONSOLIDATE_ENABLED = os.environ.get("HALACHA_CONSOLIDATE_ENABLED", "true").lower() == "true"
|
||||||
|
HALACHA_CONSOLIDATE_MODEL = os.environ.get("HALACHA_CONSOLIDATE_MODEL", HALACHA_EXTRACT_MODEL)
|
||||||
|
HALACHA_CONSOLIDATE_EFFORT = os.environ.get("HALACHA_CONSOLIDATE_EFFORT", "high")
|
||||||
|
|
||||||
# Google Cloud Vision (OCR for scanned PDFs)
|
# Google Cloud Vision (OCR for scanned PDFs)
|
||||||
GOOGLE_CLOUD_VISION_API_KEY = os.environ.get("GOOGLE_CLOUD_VISION_API_KEY", "")
|
GOOGLE_CLOUD_VISION_API_KEY = os.environ.get("GOOGLE_CLOUD_VISION_API_KEY", "")
|
||||||
|
|
||||||
|
|||||||
@@ -305,6 +305,71 @@ async def _nli_check(items: list[dict]) -> list[str]:
|
|||||||
return halacha_quality.parse_nli_verdicts(raw, len(items))
|
return halacha_quality.parse_nli_verdicts(raw, len(items))
|
||||||
|
|
||||||
|
|
||||||
|
def _consolidation_priority(r: dict):
|
||||||
|
"""Canonical = the row to KEEP within a fold group (lower sorts first)."""
|
||||||
|
status_rank = {"approved": 0, "published": 0, "pending_review": 1}.get(
|
||||||
|
r.get("review_status"), 2)
|
||||||
|
return (
|
||||||
|
status_rank,
|
||||||
|
-float(r.get("confidence") or 0.0),
|
||||||
|
0 if r.get("quote_verified") else 1,
|
||||||
|
-len(r.get("rule_statement") or ""),
|
||||||
|
str(r["id"]),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _consolidate_precedent(case_law_id: UUID) -> int:
|
||||||
|
"""#81.5 — fold facets of the SAME legal question into one canonical.
|
||||||
|
|
||||||
|
Per-precedent claude_session pass (local CLI, zero cost). Keeps the best row
|
||||||
|
of each fold group; marks the rest ``rejected`` (reversible — out of the
|
||||||
|
active corpus AND the review queue, but recoverable). FOLD-ONLY. Fails OPEN:
|
||||||
|
any error / parse failure → 0 folds (never touches data on doubt).
|
||||||
|
"""
|
||||||
|
if not config.HALACHA_CONSOLIDATE_ENABLED:
|
||||||
|
return 0
|
||||||
|
try:
|
||||||
|
rows = [
|
||||||
|
r for r in await db.list_halachot(case_law_id=case_law_id, limit=10_000)
|
||||||
|
if r.get("review_status") != "rejected"
|
||||||
|
]
|
||||||
|
if len(rows) < 2:
|
||||||
|
return 0
|
||||||
|
by_idx = {r["halacha_index"]: r for r in rows}
|
||||||
|
raw = await claude_session.query_json(
|
||||||
|
halacha_quality.build_consolidation_prompt(rows),
|
||||||
|
system=halacha_quality.CONSOLIDATE_SYSTEM,
|
||||||
|
model=config.HALACHA_CONSOLIDATE_MODEL or None,
|
||||||
|
effort=config.HALACHA_CONSOLIDATE_EFFORT or None,
|
||||||
|
)
|
||||||
|
groups = halacha_quality.parse_fold_groups(raw)
|
||||||
|
if not groups:
|
||||||
|
return 0
|
||||||
|
canonicals: set[str] = set()
|
||||||
|
losers: set[str] = set()
|
||||||
|
for g in groups:
|
||||||
|
members = [by_idx[i] for i in g if i in by_idx]
|
||||||
|
if len(members) < 2:
|
||||||
|
continue
|
||||||
|
members.sort(key=_consolidation_priority)
|
||||||
|
canonicals.add(str(members[0]["id"]))
|
||||||
|
for m in members[1:]:
|
||||||
|
losers.add(str(m["id"]))
|
||||||
|
# Never reject a row that is the canonical of any group.
|
||||||
|
loser_ids = [i for i in losers if i not in canonicals]
|
||||||
|
if not loser_ids:
|
||||||
|
return 0
|
||||||
|
return await db.update_halachot_batch(
|
||||||
|
loser_ids, "rejected", reviewer="auto-consolidated (#81.5 facet-fold)",
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(
|
||||||
|
"halacha consolidation failed for %s (fail-open, no folds): %s",
|
||||||
|
case_law_id, e,
|
||||||
|
)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
async def _extract_chunk(
|
async def _extract_chunk(
|
||||||
chunk_text: str,
|
chunk_text: str,
|
||||||
section_type: str,
|
section_type: str,
|
||||||
@@ -585,7 +650,10 @@ async def _extract_impl(case_law_id: UUID, force: bool = False,
|
|||||||
return {"status": "partial", "extracted": total, "stored": stored_total,
|
return {"status": "partial", "extracted": total, "stored": stored_total,
|
||||||
"pending_chunks": still_pending, "total_chunks": len(chunks)}
|
"pending_chunks": still_pending, "total_chunks": len(chunks)}
|
||||||
|
|
||||||
# All chunks done.
|
# All chunks done. #81.5: fold cross-chunk facets of one legal question
|
||||||
|
# (the prompt dedups within a chunk; this catches across chunks).
|
||||||
|
folded = await _consolidate_precedent(case_law_id)
|
||||||
|
|
||||||
stored = total
|
stored = total
|
||||||
verified = sum(1 for h in await db.list_halachot(case_law_id=case_law_id, limit=10_000)
|
verified = sum(1 for h in await db.list_halachot(case_law_id=case_law_id, limit=10_000)
|
||||||
if h.get("quote_verified"))
|
if h.get("quote_verified"))
|
||||||
@@ -593,13 +661,14 @@ async def _extract_impl(case_law_id: UUID, force: bool = False,
|
|||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"halacha_extractor: case_law=%s completed — %d halachot stored "
|
"halacha_extractor: case_law=%s completed — %d halachot stored "
|
||||||
"(%d new this run), %d quote-verified, %d chunks",
|
"(%d new this run), %d quote-verified, %d folded, %d chunks",
|
||||||
case_law_id, total, stored_total, verified, len(chunks),
|
case_law_id, total, stored_total, verified, folded, len(chunks),
|
||||||
)
|
)
|
||||||
return {
|
return {
|
||||||
"status": "completed",
|
"status": "completed",
|
||||||
"extracted": total,
|
"extracted": total,
|
||||||
"verified": verified,
|
"verified": verified,
|
||||||
|
"folded": folded,
|
||||||
"stored": stored,
|
"stored": stored,
|
||||||
"stored_this_run": stored_total,
|
"stored_this_run": stored_total,
|
||||||
"total_chunks": len(chunks),
|
"total_chunks": len(chunks),
|
||||||
|
|||||||
@@ -185,6 +185,66 @@ def parse_nli_verdicts(raw, n: int) -> list[str]:
|
|||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
# ── Over-extraction consolidation (fold facets of one legal question) — #81.5 ──
|
||||||
|
#
|
||||||
|
# #82 dedup-on-insert removes near-EXACT dups (cosine ≥ 0.93). #81.5 handles the
|
||||||
|
# remaining over-extraction: facets of the SAME legal question, phrased
|
||||||
|
# differently, that sit BELOW the dedup threshold (the שפר 14-vs-4 / 403-17→89
|
||||||
|
# granularity gap). A per-precedent claude_session pass groups such facets; the
|
||||||
|
# extractor keeps one canonical per group and marks the rest rejected (reversible,
|
||||||
|
# out of the active corpus + review queue). FOLD-ONLY — never merges distinct
|
||||||
|
# legal questions, never invents. Fails OPEN (parse error → no folds).
|
||||||
|
|
||||||
|
CONSOLIDATE_SYSTEM = (
|
||||||
|
"אתה מאחד פנים-כפולים של הלכות שחולצו מאותו פסק דין. בהינתן רשימה ממוספרת של הלכות, "
|
||||||
|
"זהה קבוצות של הלכות שהן **אותה שאלה משפטית** בניסוחים או פנים שונים. "
|
||||||
|
"כללים: (1) אַחֵד רק הלכות שעונות על אותה שאלה משפטית בדיוק; (2) **אל תאַחֵד** הלכות "
|
||||||
|
"שעונות על שאלות משפטיות שונות (גם אם קרובות בנושא); (3) הלכה ייחודית — אל תכלול בשום קבוצה. "
|
||||||
|
'החזר JSON array של קבוצות, כל קבוצה = array של מספרי-האינדקס שיש לאַחֵד (לפחות 2 חברים). '
|
||||||
|
"לדוגמה: [[2,5,9],[14,18]]. אם אין מה לאַחֵד החזר []. ללא markdown, ללא הסבר."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def build_consolidation_prompt(items: list[dict]) -> str:
|
||||||
|
"""Numbered list of a precedent's halachot (index + rule + reasoning)."""
|
||||||
|
blocks = []
|
||||||
|
for h in items:
|
||||||
|
idx = h.get("halacha_index")
|
||||||
|
rule = (h.get("rule_statement") or "").strip()
|
||||||
|
reason = (h.get("reasoning_summary") or "").strip()
|
||||||
|
line = f"[{idx}] {rule}"
|
||||||
|
if reason:
|
||||||
|
line += f" (היגיון: {reason})"
|
||||||
|
blocks.append(line)
|
||||||
|
return "\n".join(blocks)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_fold_groups(raw) -> list[list[int]]:
|
||||||
|
"""Coerce judge output into a list of fold-groups (≥2 int indices each).
|
||||||
|
|
||||||
|
Fails SAFE: any malformed shape → [] (no folding). Non-int / <2-member
|
||||||
|
groups are dropped.
|
||||||
|
"""
|
||||||
|
if not isinstance(raw, list):
|
||||||
|
return []
|
||||||
|
groups: list[list[int]] = []
|
||||||
|
for g in raw:
|
||||||
|
if not isinstance(g, list):
|
||||||
|
continue
|
||||||
|
members: list[int] = []
|
||||||
|
for x in g:
|
||||||
|
try:
|
||||||
|
members.append(int(x))
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
continue
|
||||||
|
# dedup within group, preserve order
|
||||||
|
seen: set[int] = set()
|
||||||
|
members = [m for m in members if not (m in seen or seen.add(m))]
|
||||||
|
if len(members) >= 2:
|
||||||
|
groups.append(members)
|
||||||
|
return groups
|
||||||
|
|
||||||
|
|
||||||
def compute_quality_flags(
|
def compute_quality_flags(
|
||||||
rule_statement: str,
|
rule_statement: str,
|
||||||
supporting_quote: str,
|
supporting_quote: str,
|
||||||
|
|||||||
@@ -146,3 +146,38 @@ def test_nli_check_empty():
|
|||||||
import asyncio
|
import asyncio
|
||||||
from legal_mcp.services import halacha_extractor as he
|
from legal_mcp.services import halacha_extractor as he
|
||||||
assert asyncio.run(he._nli_check([])) == []
|
assert asyncio.run(he._nli_check([])) == []
|
||||||
|
|
||||||
|
|
||||||
|
# ── #81.5 consolidation — pure prompt + fold-group parser ──
|
||||||
|
|
||||||
|
def test_build_consolidation_prompt():
|
||||||
|
items = [
|
||||||
|
{"halacha_index": 3, "rule_statement": "כלל גימל", "reasoning_summary": "כי"},
|
||||||
|
{"halacha_index": 7, "rule_statement": "כלל זין", "reasoning_summary": ""},
|
||||||
|
]
|
||||||
|
p = hq.build_consolidation_prompt(items)
|
||||||
|
assert "[3] כלל גימל" in p and "[7] כלל זין" in p and "היגיון: כי" in p
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("raw,expected", [
|
||||||
|
([[2, 5, 9], [14, 18]], [[2, 5, 9], [14, 18]]),
|
||||||
|
([[2, 5], [7]], [[2, 5]]), # singleton group dropped
|
||||||
|
([["2", "5"]], [[2, 5]]), # string ints coerced
|
||||||
|
([[2, 2, 5]], [[2, 5]]), # dedup within group
|
||||||
|
([], []), # nothing to fold
|
||||||
|
("garbage", []), # non-list -> safe
|
||||||
|
(None, []), # None -> safe
|
||||||
|
([[1, "x"], [3, 4]], [[3, 4]]), # drop group that falls below 2 valid
|
||||||
|
])
|
||||||
|
def test_parse_fold_groups(raw, expected):
|
||||||
|
assert hq.parse_fold_groups(raw) == expected
|
||||||
|
|
||||||
|
|
||||||
|
def test_consolidation_priority_prefers_approved_then_confidence():
|
||||||
|
from legal_mcp.services import halacha_extractor as he
|
||||||
|
approved = {"id": "a", "review_status": "approved", "confidence": 0.7,
|
||||||
|
"quote_verified": True, "rule_statement": "x"}
|
||||||
|
pending_hi = {"id": "b", "review_status": "pending_review", "confidence": 0.95,
|
||||||
|
"quote_verified": True, "rule_statement": "x"}
|
||||||
|
# approved sorts before higher-confidence pending → kept as canonical
|
||||||
|
assert min([approved, pending_hi], key=he._consolidation_priority)["id"] == "a"
|
||||||
|
|||||||
Reference in New Issue
Block a user