From a571ad535bdde15b61034fc59cbda6606b5759cc Mon Sep 17 00:00:00 2001 From: Chaim Date: Sat, 6 Jun 2026 20:54:31 +0000 Subject: [PATCH] =?UTF-8?q?fix(#88+#87):=20=D7=A1=D7=A0=D7=9B=D7=A8=D7=95?= =?UTF-8?q?=D7=9F=20DB=E2=86=94file=20=D7=90=D7=95=D7=98=D7=95=D7=9E=D7=98?= =?UTF-8?q?=D7=99=20+=20claims=5Fcoverage=20=D7=9E=D7=91=D7=97=D7=99=D7=9F?= =?UTF-8?q?=20=D7=9B=D7=AA=D7=91-=D7=A2=D7=A8=D7=A8=20=D7=9E=D7=AA=D7=9B?= =?UTF-8?q?=D7=AA=D7=95=D7=91=D7=AA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit #88 (DB↔file, lessons #35): drafts/decision.md דרסה את עצמה רק ב-save_block_content; renumber_all_blocks + נתיבי store_block אחרים השאירו את הקובץ stale → QA נכשל פעמיים על אותה בעיה (CMPA-62). תיקון: _update_draft_file הפך ל-hook אוטומטי (מקבל decision_id, מאתר case פנימית) שנקרא מ-store_block (כל persist) ומ- renumber_all_blocks. legal-qa ממילא קורא מ-DB → שני הצדדים זהים תמיד. #87 (claims_coverage, 1033-25): טענות מתכתובת (claim_type='reply' — תגובה/ השלמת-טיעון) סומנו "לא נענו" כ-false-positive. תיקון: check_claims_coverage דורש מענה רק לטענות כתב-הערר (claim_type='claim', appellant); reply/תכתובת מוחרגות. בקבלה מלאה הסף מוקל (0.2→0.4) כי העורר זכה במלואו. Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/legal-decision-lessons.md | 1 + .../src/legal_mcp/services/block_writer.py | 44 ++++++++++++------- .../src/legal_mcp/services/qa_validator.py | 35 +++++++++++---- 3 files changed, 54 insertions(+), 26 deletions(-) diff --git a/docs/legal-decision-lessons.md b/docs/legal-decision-lessons.md index f2de85e..7ccb71d 100644 --- a/docs/legal-decision-lessons.md +++ b/docs/legal-decision-lessons.md @@ -463,6 +463,7 @@ The draft's biggest structural error was adding the "נבאר" doctrinal paragra - **Problem:** legal-writer updates `decision_blocks` in the DB, but legal-qa reads from `drafts/decision.md` on disk. In CMPA-62 the writer reported updating block headers in DB but the file did not re-sync, causing QA-2 to fail on exactly the same issue twice. - **Lesson:** Single source of truth is mandatory — either the writer must write to BOTH the DB and the decision.md file in one atomic step, or there must be an automatic `regenerate-draft` hook that runs after every block update so the file always reflects the latest DB state. Two unsynchronized sources will keep producing the same false-fail loop. - **Owner:** Infrastructure task — not a writer/QA prompt fix. +- **✅ RESOLVED (GAP-88, 2026-06-06):** `block_writer._update_draft_file` is now an automatic regenerate hook called from `store_block` (every persist) **and** `renumber_all_blocks` — so `drafts/decision.md` always reflects `decision_blocks`. legal-qa already validates against the DB; both sides are now identical. --- diff --git a/mcp-server/src/legal_mcp/services/block_writer.py b/mcp-server/src/legal_mcp/services/block_writer.py index 1faf203..6ad63fd 100644 --- a/mcp-server/src/legal_mcp/services/block_writer.py +++ b/mcp-server/src/legal_mcp/services/block_writer.py @@ -1088,37 +1088,39 @@ async def save_block_content(case_id: UUID, block_id: str, content: str) -> dict result["generation_type"] = "claude-code" result["model_used"] = "claude-code" - await store_block(UUID(decision["id"]), result) + await store_block(UUID(decision["id"]), result) # store_block syncs the file (#35) await db.mark_blocks_stale(case_id, False) - # Also write/update the draft file on disk - await _update_draft_file(case_id, UUID(decision["id"])) - return result -async def _update_draft_file(case_id: UUID, decision_id: UUID) -> None: - """Rebuild drafts/decision.md from all blocks in DB.""" - from pathlib import Path - - case = await db.get_case(case_id) - if not case: - return - - case_dir = config.find_case_dir(case["case_number"]) - draft_dir = case_dir / "drafts" - draft_dir.mkdir(parents=True, exist_ok=True) - +async def _update_draft_file(decision_id: UUID) -> None: + """Rebuild drafts/decision.md from all blocks in DB — the single + regenerate-draft hook (lessons #35 / GAP-88). Called after EVERY + decision_blocks mutation (store_block, renumber) so the on-disk file never + drifts from the DB. legal-qa validates against the DB; export and the chair + read the file — keeping them identical kills the "QA fails twice on the same + already-fixed issue" loop (CMPA-62). Resolves case from decision_id so no + caller has to thread case_id through.""" pool = await db.get_pool() async with pool.acquire() as conn: + case_row = await conn.fetchrow( + "SELECT c.case_number FROM decisions d JOIN cases c ON c.id = d.case_id " + "WHERE d.id = $1", + decision_id, + ) + if not case_row: + return rows = await conn.fetch( "SELECT content FROM decision_blocks WHERE decision_id = $1 AND content != '' ORDER BY block_index", decision_id, ) + draft_dir = config.find_case_dir(case_row["case_number"]) / "drafts" + draft_dir.mkdir(parents=True, exist_ok=True) draft_path = draft_dir / "decision.md" draft_path.write_text("\n\n".join(row["content"] for row in rows if row["content"]), encoding="utf-8") - logger.info("Draft file updated: %s (%d blocks)", draft_path, len(rows)) + logger.info("Draft file synced: %s (%d blocks)", draft_path, len(rows)) # ── Renumbering ─────────────────────────────────────────────────── @@ -1172,6 +1174,11 @@ async def renumber_all_blocks(decision_id: UUID) -> dict: ) updated += 1 + # #35 — renumber mutates content via raw UPDATE (bypasses store_block), so + # sync the draft file here too, otherwise the file keeps stale numbering. + if updated: + await _update_draft_file(decision_id) + return {"total_paragraphs": current_num - 1, "blocks_updated": updated} @@ -1204,6 +1211,9 @@ async def store_block(decision_id: UUID, block_result: dict) -> None: block_result["model_used"], block_result["temperature"], ) + # #35 — regenerate the on-disk draft on every persist so DB and file stay + # identical (legal-qa reads DB; export/chair read the file). + await _update_draft_file(decision_id) async def write_and_store_block( diff --git a/mcp-server/src/legal_mcp/services/qa_validator.py b/mcp-server/src/legal_mcp/services/qa_validator.py index 7008daf..f854912 100644 --- a/mcp-server/src/legal_mcp/services/qa_validator.py +++ b/mcp-server/src/legal_mcp/services/qa_validator.py @@ -104,7 +104,7 @@ CLAIMS_CHECK_PROMPT = """אתה בודק איכות החלטות משפטיות. """ -async def check_claims_coverage(blocks: list[dict], claims: list[dict]) -> dict: +async def check_claims_coverage(blocks: list[dict], claims: list[dict], outcome: str = "") -> dict: """בדיקה סמנטית (Claude) שכל טענה נענתה בדיון.""" yod = next((b for b in blocks if b["block_id"] == "block-yod"), None) if not yod or not yod.get("content"): @@ -114,16 +114,26 @@ async def check_claims_coverage(blocks: list[dict], claims: list[dict]) -> dict: if not claims: return {"name": "claims_coverage", "passed": True, "errors": [], "severity": "critical"} - # Filter: only APPELLANT claims from original pleadings. - # Committee/permit_applicant claims are defensive positions, not claims - # that need to be "addressed" in the discussion. + # #87/GAP-87 — only the appellant's claims from the APPEAL PLEADING itself + # must be addressed. claim_type: 'claim'=כתב ערר (mandatory), 'response'=כתב + # תשובה, 'reply'=תגובה/השלמת-טיעון/תכתובת (supplementary correspondence — NOT + # a standalone duty to answer, especially on full acceptance). Counting reply/ + # correspondence claims as "unanswered" produced false QA fails (1033-25). source_claims = [ c for c in claims if c.get("source_document", "") != "block-zayin" - and c.get("party_role") in ("appellant", "respondent") + and c.get("claim_type") == "claim" + and c.get("party_role") == "appellant" ] if not source_claims: - # Fallback: all non-block-zayin claims + # Fallback: appellant/respondent pleadings, excluding supplementary replies. + source_claims = [ + c for c in claims + if c.get("source_document", "") != "block-zayin" + and c.get("claim_type") != "reply" + and c.get("party_role") in ("appellant", "respondent") + ] + if not source_claims: source_claims = [c for c in claims if c.get("source_document", "") != "block-zayin"] if not source_claims: source_claims = claims @@ -165,9 +175,14 @@ async def check_claims_coverage(blocks: list[dict], claims: list[dict]) -> dict: total = len(source_claims) covered = len(addressed) + len(partial) + # On full acceptance the appellant prevailed in full — not every sub-claim + # needs individual treatment (the chair noted this for correspondence claims, + # 1033-25). Relax the missing-tolerance accordingly. + allowed_missing_ratio = 0.4 if outcome == "full_acceptance" else 0.2 + return { "name": "claims_coverage", - "passed": len(missing) <= total * 0.2, # Allow up to 20% missing + "passed": len(missing) <= total * allowed_missing_ratio, "errors": errors, "severity": "critical", "details": f"{covered}/{total} טענות נענו ({covered/total*100:.0f}%), {len(partial)} חלקית, {len(missing)} חסרות", @@ -361,8 +376,10 @@ async def validate_decision(case_id: UUID) -> dict: # Get claims claims = await db.get_claims(case_id) - # Determine appeal type + # Determine appeal type + outcome (outcome relaxes claims coverage on full acceptance — #87) appeal_type = case.get("appeal_type", "licensing") + from legal_mcp.services.lessons import canonical_outcome + outcome = canonical_outcome(decision.get("outcome", "") or "") # Run all checks # Run sync checks @@ -370,7 +387,7 @@ async def validate_decision(case_id: UUID) -> dict: check_neutral_background(blocks), ] # Async check: claims coverage with Claude - results.append(await check_claims_coverage(blocks, claims)) + results.append(await check_claims_coverage(blocks, claims, outcome)) # More sync checks results.extend([ check_weight_compliance(blocks, appeal_type), -- 2.49.1