From 9e7492e761b41ae0bca5a282650b962a8bd9424e Mon Sep 17 00:00:00 2001 From: Chaim Date: Sat, 4 Apr 2026 13:00:34 +0000 Subject: [PATCH] Make classification and reference extraction non-fatal in document pipeline Text extraction, chunking and embedding proceed even if Claude API classification or reference extraction fails (e.g. API quota exceeded). Co-Authored-By: Claude Opus 4.6 (1M context) --- .../src/legal_mcp/services/processor.py | 88 ++++++++++--------- 1 file changed, 48 insertions(+), 40 deletions(-) diff --git a/mcp-server/src/legal_mcp/services/processor.py b/mcp-server/src/legal_mcp/services/processor.py index 0cad19a..5ea84bb 100644 --- a/mcp-server/src/legal_mcp/services/processor.py +++ b/mcp-server/src/legal_mcp/services/processor.py @@ -37,37 +37,41 @@ async def process_document(document_id: UUID, case_id: UUID) -> dict: page_count=page_count, ) - # Step 1.5: Classify document and identify parties - logger.info("Classifying document") - case_number = "" - if case_id: - case = await db.get_case(case_id) - if case: - case_number = case.get("case_number", "") - classification_result = await classifier.classify_and_identify(text, case_number) - await db.update_document( - document_id, - metadata=classification_result, - ) - logger.info( - "Classification: %s (confidence: %.2f), parties found: %d appellants, %d respondents", - classification_result["classification"].get("doc_type", "?"), - classification_result["classification"].get("confidence", 0), - len(classification_result["parties"].get("appellants", [])), - len(classification_result["parties"].get("respondents", [])), - ) + # Step 1.5: Classify document and identify parties (non-fatal) + classification_result = {} + try: + logger.info("Classifying document") + case_number = "" + if case_id: + case = await db.get_case(case_id) + if case: + case_number = case.get("case_number", "") + classification_result = await classifier.classify_and_identify(text, case_number) + await db.update_document( + document_id, + metadata=classification_result, + ) + logger.info( + "Classification: %s (confidence: %.2f), parties found: %d appellants, %d respondents", + classification_result["classification"].get("doc_type", "?"), + classification_result["classification"].get("confidence", 0), + len(classification_result["parties"].get("appellants", [])), + len(classification_result["parties"].get("respondents", [])), + ) - # Step 1.6: Update case parties if empty - if case_id and case: - parties = classification_result.get("parties", {}) - updates = {} - if not case.get("appellants") and parties.get("appellants"): - updates["appellants"] = parties["appellants"] - if not case.get("respondents") and parties.get("respondents"): - updates["respondents"] = parties["respondents"] - if updates: - await db.update_case(case_id, **updates) - logger.info("Updated case parties: %s", updates) + # Update case parties if empty + if case_id and case: + parties = classification_result.get("parties", {}) + updates = {} + if not case.get("appellants") and parties.get("appellants"): + updates["appellants"] = parties["appellants"] + if not case.get("respondents") and parties.get("respondents"): + updates["respondents"] = parties["respondents"] + if updates: + await db.update_case(case_id, **updates) + logger.info("Updated case parties: %s", updates) + except Exception as e: + logger.warning("Classification failed (non-fatal): %s", e) # Step 2: Chunk logger.info("Chunking document (%d chars)", len(text)) @@ -96,16 +100,20 @@ async def process_document(document_id: UUID, case_id: UUID) -> dict: stored = await db.store_chunks(document_id, case_id, chunk_dicts) - # Step 5: Extract references (plans, case law, legislation) - logger.info("Extracting legal references") - refs_result = await references_extractor.extract_and_link_references( - document_id, case_id, text, - ) - logger.info( - "References found: %d plans, %d case law (%d linked), %d legislation", - refs_result["plans"], refs_result["case_law"], - refs_result["case_law_linked"], refs_result["legislation"], - ) + # Step 5: Extract references (plans, case law, legislation) — non-fatal + refs_result = {"plans": 0, "case_law": 0, "case_law_linked": 0, "legislation": 0} + try: + logger.info("Extracting legal references") + refs_result = await references_extractor.extract_and_link_references( + document_id, case_id, text, + ) + logger.info( + "References found: %d plans, %d case law (%d linked), %d legislation", + refs_result["plans"], refs_result["case_law"], + refs_result["case_law_linked"], refs_result["legislation"], + ) + except Exception as e: + logger.warning("Reference extraction failed (non-fatal): %s", e) await db.update_document(document_id, extraction_status="completed")