Add full decision writing pipeline: classify, extract, brainstorm, write, QA, export

New services (11 files): - classifier.py: auto doc-type classification + party identification (Claude Haiku) - claims_extractor.py: claim extraction from pleadings (Claude Sonnet + regex) - references_extractor.py: plan/case-law/legislation detection (regex) - brainstorm.py: direction generation with 2-3 options (Claude Sonnet) - block_writer.py: 12-block decision writer (template + Claude Sonnet/Opus) - docx_exporter.py: DOCX export with David font, RTL, headings - qa_validator.py: 6 QA checks with export blocking on critical failure - learning_loop.py: draft vs final comparison + lesson extraction - metrics.py: KPIs dashboard per case and global - audit.py: action audit log - cli.py: standalone CLI with 11 commands Updated pipeline: extract → classify → chunk → embed → store → extract_references New MCP tools: 29 total (was 16) New DB tables: audit_log, decisions CRUD, claims CRUD Config: Infisical support, external service allowlist Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-03 10:21:47 +00:00
parent df7cc4f5a5
commit d9e5ef0f46
21 changed files with 3957 additions and 14 deletions
--- a/mcp-server/src/legal_mcp/tools/documents.py
+++ b/mcp-server/src/legal_mcp/tools/documents.py
@@ -15,7 +15,7 @@ from legal_mcp.services import db, processor
 async def document_upload(
    case_number: str,
    file_path: str,
-    doc_type: str = "appeal",
+    doc_type: str = "auto",
    title: str = "",
 ) -> str:
    """העלאה ועיבוד מסמך לתיק ערר. מחלץ טקסט, יוצר chunks ו-embeddings.
@@ -23,7 +23,7 @@ async def document_upload(
    Args:
        case_number: מספר תיק הערר
        file_path: נתיב מלא לקובץ (PDF, DOCX, RTF, TXT)
-        doc_type: סוג מסמך (appeal=כתב ערר, response=תשובה, decision=החלטה, reference=מסמך עזר, exhibit=נספח)
+        doc_type: סוג מסמך (auto=סיווג אוטומטי, appeal=כתב ערר, response=תשובה, protocol=פרוטוקול, plan=תכנית, permit=היתר, court_decision=פסק דין, decision=החלטת ועדה, appraisal=שומה, objection=התנגדות, exhibit=נספח, reference=מסמך עזר)
        title: שם המסמך (אם ריק, ייקח משם הקובץ)
    """
    case = await db.get_case_by_number(case_number)
@@ -44,17 +44,29 @@ async def document_upload(
    dest = case_dir / source.name
    shutil.copy2(str(source), str(dest))

+    # For auto classification, start with "reference" — will be updated after processing
+    initial_doc_type = doc_type if doc_type != "auto" else "reference"
+
    # Create document record
    doc = await db.create_document(
        case_id=case_id,
-        doc_type=doc_type,
+        doc_type=initial_doc_type,
        title=title,
        file_path=str(dest),
    )

-    # Process document (extract → chunk → embed → store)
+    # Process document (extract → classify → chunk → embed → store)
    result = await processor.process_document(UUID(doc["id"]), case_id)

+    # If auto-classification, update doc_type from classification result
+    actual_doc_type = initial_doc_type
+    if doc_type == "auto" and result.get("classification"):
+        classified_type = result["classification"].get("classification", {}).get("doc_type", "")
+        if classified_type:
+            actual_doc_type = classified_type
+            await db.update_document(UUID(doc["id"]), doc_type=classified_type)
+            doc["doc_type"] = classified_type
+
    # Git commit
    repo_dir = config.CASES_DIR / case_number
    if repo_dir.exists():
@@ -62,10 +74,16 @@ async def document_upload(
        doc_type_hebrew = {
            "appeal": "כתב ערר",
            "response": "תשובה",
+            "protocol": "פרוטוקול",
+            "plan": "תכנית",
+            "permit": "היתר",
+            "court_decision": "פסק דין",
            "decision": "החלטה",
-            "reference": "מסמך עזר",
+            "appraisal": "שומה",
+            "objection": "התנגדות",
            "exhibit": "נספח",
-        }.get(doc_type, doc_type)
+            "reference": "מסמך עזר",
+        }.get(actual_doc_type, actual_doc_type)
        subprocess.run(
            ["git", "commit", "-m", f"הוספת {doc_type_hebrew}: {title}"],
            cwd=repo_dir,
@@ -216,3 +234,135 @@ async def document_list(case_number: str) -> str:
        return f"אין מסמכים בתיק {case_number}."

    return json.dumps(docs, default=str, ensure_ascii=False, indent=2)
+
+
+async def extract_references(
+    case_number: str,
+    doc_title: str = "",
+) -> str:
+    """זיהוי תכניות, פסיקה וחקיקה מתוך מסמכי תיק.
+
+    Args:
+        case_number: מספר תיק הערר
+        doc_title: שם מסמך ספציפי (אם ריק, מזהה בכל המסמכים)
+    """
+    from legal_mcp.services import references_extractor
+
+    case = await db.get_case_by_number(case_number)
+    if not case:
+        return f"תיק {case_number} לא נמצא."
+
+    case_id = UUID(case["id"])
+    docs = await db.list_documents(case_id)
+    if not docs:
+        return f"אין מסמכים בתיק {case_number}."
+
+    if doc_title:
+        docs = [d for d in docs if doc_title.lower() in d["title"].lower()]
+
+    results = []
+    for doc in docs:
+        text = await db.get_document_text(UUID(doc["id"]))
+        if not text:
+            continue
+
+        refs = await references_extractor.extract_and_link_references(
+            UUID(doc["id"]), case_id, text,
+        )
+        results.append({
+            "document": doc["title"],
+            "plans": refs["plans"],
+            "case_law": refs["case_law"],
+            "case_law_linked": refs["case_law_linked"],
+            "legislation": refs["legislation"],
+        })
+
+    return json.dumps(results, default=str, ensure_ascii=False, indent=2)
+
+
+async def extract_claims(
+    case_number: str,
+    doc_title: str = "",
+    party_hint: str = "",
+) -> str:
+    """חילוץ טענות מכתב טענות בתיק ושמירה ב-DB.
+
+    Args:
+        case_number: מספר תיק הערר
+        doc_title: שם מסמך ספציפי (אם ריק, מחלץ מכל כתבי הטענות)
+        party_hint: שם הצד המגיש (אם ידוע)
+    """
+    from legal_mcp.services import claims_extractor
+
+    case = await db.get_case_by_number(case_number)
+    if not case:
+        return f"תיק {case_number} לא נמצא."
+
+    case_id = UUID(case["id"])
+    docs = await db.list_documents(case_id)
+    if not docs:
+        return f"אין מסמכים בתיק {case_number}."
+
+    # Filter to claims documents (appeal, response) or specific doc
+    if doc_title:
+        docs = [d for d in docs if doc_title.lower() in d["title"].lower()]
+    else:
+        docs = [d for d in docs if d["doc_type"] in ("appeal", "response", "objection")]
+
+    if not docs:
+        return "לא נמצאו כתבי טענות בתיק."
+
+    results = []
+    for doc in docs:
+        text = await db.get_document_text(UUID(doc["id"]))
+        if not text:
+            continue
+
+        result = await claims_extractor.extract_and_store_claims(
+            case_id=case_id,
+            document_id=UUID(doc["id"]),
+            text=text,
+            doc_type=doc["doc_type"],
+            party_hint=party_hint,
+        )
+        results.append(result)
+
+    return json.dumps(results, default=str, ensure_ascii=False, indent=2)
+
+
+async def get_claims(case_number: str, party_role: str = "") -> str:
+    """שליפת טענות שחולצו לתיק.
+
+    Args:
+        case_number: מספר תיק הערר
+        party_role: סינון לפי צד (appellant/respondent/committee/permit_applicant). ריק = הכל.
+    """
+    case = await db.get_case_by_number(case_number)
+    if not case:
+        return f"תיק {case_number} לא נמצא."
+
+    claims = await db.get_claims(
+        UUID(case["id"]),
+        party_role=party_role if party_role else None,
+    )
+
+    if not claims:
+        return f"אין טענות בתיק {case_number}."
+
+    # Format for display
+    role_hebrew = {
+        "appellant": "עוררים",
+        "respondent": "משיבים",
+        "committee": "ועדה מקומית",
+        "permit_applicant": "מבקשי היתר",
+        "appraiser": "שמאי",
+    }
+    formatted = []
+    for c in claims:
+        formatted.append({
+            "party": role_hebrew.get(c["party_role"], c["party_role"]),
+            "claim": c["claim_text"],
+            "source": c.get("source_document", ""),
+        })
+
+    return json.dumps(formatted, default=str, ensure_ascii=False, indent=2)