From d05c1e3fce957790e6b129c940d59bbf01fe9438 Mon Sep 17 00:00:00 2001 From: Chaim Date: Thu, 11 Jun 2026 11:49:35 +0000 Subject: [PATCH] =?UTF-8?q?fix(extractors):=20disable=20tools=20on=20text?= =?UTF-8?q?=E2=86=92JSON=20claude=5Fsession=20calls=20(no=20error=5Fmax=5F?= =?UTF-8?q?turns)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit כל קריאות text→JSON ב-9 המחלצים העבירו את ברירת-המחדל של ה-CLI (כל הכלים פעילים). המודל פלט מדי פעם stop_reason:"tool_use", מה שמפיל את --max-turns 1 ל-error_max_turns ומאלץ retry — ~$0.12-0.16 לניסיון, × 3. נצפה ב-drain חילוץ-ההלכות (legal-halacha-drain, ‎15 כשלי error_max_turns ב-error.log). התשתית כבר קיימת: claude_session.query מקבל tools=""‎ לנטרול כל הכלים, ושני מחלצים (digest_metadata_extractor, bulletin_splitter) כבר משתמשים בו. כאן רק מיישרים את שאר המחלצים לאותו מסלול קנוני — אף קריאת חילוץ/שיפוט/סיווג טהורה לא צריכה כלי. מתוקנים (11 קריאות, 9 קבצים): halacha_extractor (×3: extract/NLI/consolidate), corroboration, claims_extractor, argument_aggregator, appraiser_facts_extractor, learning_loop, qa_validator, brainstorm, style_metadata_extractor. Invariants: מקיים INV-G2 (מסלול קנוני יחיד; סימטריה בין מחלצים-אחים) — לא מסלול מקביל חדש אלא שימוש עקבי בפרמטר הקיים. אין בליעה שקטה (§6) — נתיבי הכשל/retry נשמרים. ללא שינוי-ספ. בדיקות: 60/60 ב-tests/test_halacha_coerce.py + test_halacha_quality.py עוברות; py_compile נקי על כל 9 הקבצים. Co-Authored-By: Claude Opus 4.8 (1M context) --- mcp-server/src/legal_mcp/services/appraiser_facts_extractor.py | 2 +- mcp-server/src/legal_mcp/services/argument_aggregator.py | 2 +- mcp-server/src/legal_mcp/services/brainstorm.py | 2 +- mcp-server/src/legal_mcp/services/claims_extractor.py | 2 +- mcp-server/src/legal_mcp/services/corroboration.py | 1 + mcp-server/src/legal_mcp/services/halacha_extractor.py | 3 +++ mcp-server/src/legal_mcp/services/learning_loop.py | 2 +- mcp-server/src/legal_mcp/services/qa_validator.py | 2 +- mcp-server/src/legal_mcp/services/style_metadata_extractor.py | 2 +- 9 files changed, 11 insertions(+), 7 deletions(-) diff --git a/mcp-server/src/legal_mcp/services/appraiser_facts_extractor.py b/mcp-server/src/legal_mcp/services/appraiser_facts_extractor.py index c07571d..b2594f1 100644 --- a/mcp-server/src/legal_mcp/services/appraiser_facts_extractor.py +++ b/mcp-server/src/legal_mcp/services/appraiser_facts_extractor.py @@ -103,7 +103,7 @@ async def extract_facts_from_document( f"שמאי: {appraiser_name}{chunk_label}\n\n" f"--- תחילת שומה ---\n{chunk}\n--- סוף שומה ---" ) - result = await claude_session.query_json(prompt) + result = await claude_session.query_json(prompt, tools="") # no tool_use → no error_max_turns if not isinstance(result, list): logger.warning( "extract_facts_from_document: chunk %d returned non-list (%s) for doc=%s", diff --git a/mcp-server/src/legal_mcp/services/argument_aggregator.py b/mcp-server/src/legal_mcp/services/argument_aggregator.py index 7f5838d..25c8b4f 100644 --- a/mcp-server/src/legal_mcp/services/argument_aggregator.py +++ b/mcp-server/src/legal_mcp/services/argument_aggregator.py @@ -147,7 +147,7 @@ async def _aggregate_party( prompt = _build_prompt(party, propositions) try: - raw_result = await claude_session.query_json(prompt) + raw_result = await claude_session.query_json(prompt, tools="") # no tool_use → no error_max_turns except RuntimeError as e: # Surface CLI-unavailable specifically so the caller can report # cleanly instead of crashing the whole job. diff --git a/mcp-server/src/legal_mcp/services/brainstorm.py b/mcp-server/src/legal_mcp/services/brainstorm.py index 174bc93..08f8d40 100644 --- a/mcp-server/src/legal_mcp/services/brainstorm.py +++ b/mcp-server/src/legal_mcp/services/brainstorm.py @@ -134,7 +134,7 @@ async def generate_directions( {doc_context or '(אין מסמכים בתיק)'} """ - result = await claude_session.query_json(user_content) + result = await claude_session.query_json(user_content, tools="") # no tool_use → no error_max_turns if result is None: logger.warning("Failed to parse brainstorm response") return { diff --git a/mcp-server/src/legal_mcp/services/claims_extractor.py b/mcp-server/src/legal_mcp/services/claims_extractor.py index e7c9416..add1204 100644 --- a/mcp-server/src/legal_mcp/services/claims_extractor.py +++ b/mcp-server/src/legal_mcp/services/claims_extractor.py @@ -135,7 +135,7 @@ async def _extract_chunk( last_err: Exception | None = None for attempt in range(CHUNK_RETRY_ATTEMPTS + 1): try: - claims = await claude_session.query_json(prompt) + claims = await claude_session.query_json(prompt, tools="") # no tool_use → no error_max_turns except Exception as e: last_err = e logger.warning( diff --git a/mcp-server/src/legal_mcp/services/corroboration.py b/mcp-server/src/legal_mcp/services/corroboration.py index 91deb43..8a4e2c2 100644 --- a/mcp-server/src/legal_mcp/services/corroboration.py +++ b/mcp-server/src/legal_mcp/services/corroboration.py @@ -88,6 +88,7 @@ async def classify_treatment(cited_citation: str, context: str) -> str: user, system=_TREATMENT_PROMPT, model=config.HALACHA_EXTRACT_MODEL or None, effort=config.HALACHA_EXTRACT_EFFORT or None, + tools="", # pure text→JSON — no tool_use → no error_max_turns ) except Exception as e: logger.warning("classify_treatment failed: %s", e) diff --git a/mcp-server/src/legal_mcp/services/halacha_extractor.py b/mcp-server/src/legal_mcp/services/halacha_extractor.py index 33f73b7..2d232bb 100644 --- a/mcp-server/src/legal_mcp/services/halacha_extractor.py +++ b/mcp-server/src/legal_mcp/services/halacha_extractor.py @@ -309,6 +309,7 @@ async def _nli_check(items: list[dict]) -> list[str]: system=halacha_quality.NLI_SYSTEM, model=config.HALACHA_NLI_MODEL or None, effort=config.HALACHA_NLI_EFFORT or None, + tools="", # pure text→JSON — no tool_use → no error_max_turns ) except Exception as e: logger.warning("halacha NLI check failed (fail-open, no flags): %s", e) @@ -352,6 +353,7 @@ async def _consolidate_precedent(case_law_id: UUID) -> int: system=halacha_quality.CONSOLIDATE_SYSTEM, model=config.HALACHA_CONSOLIDATE_MODEL or None, effort=config.HALACHA_CONSOLIDATE_EFFORT or None, + tools="", # pure text→JSON — no tool_use → no error_max_turns ) groups = halacha_quality.parse_fold_groups(raw) if not groups: @@ -423,6 +425,7 @@ async def _extract_chunk( system=base_prompt, model=config.HALACHA_EXTRACT_MODEL or None, effort=(effort or config.HALACHA_EXTRACT_EFFORT) or None, + tools="", # pure text→JSON — no tool_use → no error_max_turns ) except Exception as e: last_err = e diff --git a/mcp-server/src/legal_mcp/services/learning_loop.py b/mcp-server/src/legal_mcp/services/learning_loop.py index 625a3ba..14fa9a7 100644 --- a/mcp-server/src/legal_mcp/services/learning_loop.py +++ b/mcp-server/src/legal_mcp/services/learning_loop.py @@ -89,7 +89,7 @@ async def analyze_changes(draft_text: str, final_text: str) -> dict: --- גרסה סופית --- {final_sample} """ - result = await claude_session.query_json(prompt) + result = await claude_session.query_json(prompt, tools="") # no tool_use → no error_max_turns if result is None: logger.warning("Failed to parse lessons response") return {"changes": [], "new_expressions": [], "overall_assessment": ""} diff --git a/mcp-server/src/legal_mcp/services/qa_validator.py b/mcp-server/src/legal_mcp/services/qa_validator.py index f854912..97b90f0 100644 --- a/mcp-server/src/legal_mcp/services/qa_validator.py +++ b/mcp-server/src/legal_mcp/services/qa_validator.py @@ -154,7 +154,7 @@ async def check_claims_coverage(blocks: list[dict], claims: list[dict], outcome: ## בלוק הדיון: {discussion}""" - parsed = await claude_session.query_json(prompt) + parsed = await claude_session.query_json(prompt, tools="") # no tool_use → no error_max_turns if parsed is None: logger.warning("Failed to parse claims check") # Fallback: assume all covered (don't block export on parse failure) diff --git a/mcp-server/src/legal_mcp/services/style_metadata_extractor.py b/mcp-server/src/legal_mcp/services/style_metadata_extractor.py index 0f5d466..68846db 100644 --- a/mcp-server/src/legal_mcp/services/style_metadata_extractor.py +++ b/mcp-server/src/legal_mcp/services/style_metadata_extractor.py @@ -119,7 +119,7 @@ async def extract_decision_metadata(corpus_id: UUID | str) -> dict: ) try: - result = await claude_session.query_json(user_msg, system=METADATA_PROMPT) + result = await claude_session.query_json(user_msg, system=METADATA_PROMPT, tools="") # no tool_use → no error_max_turns except Exception as e: logger.warning("style_metadata_extractor: query failed: %s", e) return {}