Add full decision writing pipeline: classify, extract, brainstorm, write, QA, export

New services (11 files):
- classifier.py: auto doc-type classification + party identification (Claude Haiku)
- claims_extractor.py: claim extraction from pleadings (Claude Sonnet + regex)
- references_extractor.py: plan/case-law/legislation detection (regex)
- brainstorm.py: direction generation with 2-3 options (Claude Sonnet)
- block_writer.py: 12-block decision writer (template + Claude Sonnet/Opus)
- docx_exporter.py: DOCX export with David font, RTL, headings
- qa_validator.py: 6 QA checks with export blocking on critical failure
- learning_loop.py: draft vs final comparison + lesson extraction
- metrics.py: KPIs dashboard per case and global
- audit.py: action audit log
- cli.py: standalone CLI with 11 commands

Updated pipeline: extract → classify → chunk → embed → store → extract_references
New MCP tools: 29 total (was 16)
New DB tables: audit_log, decisions CRUD, claims CRUD
Config: Infisical support, external service allowlist

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-03 10:21:47 +00:00
parent df7cc4f5a5
commit d9e5ef0f46
21 changed files with 3957 additions and 14 deletions

View File

@@ -15,7 +15,7 @@ from legal_mcp.services import db, processor
async def document_upload(
case_number: str,
file_path: str,
doc_type: str = "appeal",
doc_type: str = "auto",
title: str = "",
) -> str:
"""העלאה ועיבוד מסמך לתיק ערר. מחלץ טקסט, יוצר chunks ו-embeddings.
@@ -23,7 +23,7 @@ async def document_upload(
Args:
case_number: מספר תיק הערר
file_path: נתיב מלא לקובץ (PDF, DOCX, RTF, TXT)
doc_type: סוג מסמך (appeal=כתב ערר, response=תשובה, decision=החלטה, reference=מסמך עזר, exhibit=נספח)
doc_type: סוג מסמך (auto=סיווג אוטומטי, appeal=כתב ערר, response=תשובה, protocol=פרוטוקול, plan=תכנית, permit=היתר, court_decision=פסק דין, decision=החלטת ועדה, appraisal=שומה, objection=התנגדות, exhibit=נספח, reference=מסמך עזר)
title: שם המסמך (אם ריק, ייקח משם הקובץ)
"""
case = await db.get_case_by_number(case_number)
@@ -44,17 +44,29 @@ async def document_upload(
dest = case_dir / source.name
shutil.copy2(str(source), str(dest))
# For auto classification, start with "reference" — will be updated after processing
initial_doc_type = doc_type if doc_type != "auto" else "reference"
# Create document record
doc = await db.create_document(
case_id=case_id,
doc_type=doc_type,
doc_type=initial_doc_type,
title=title,
file_path=str(dest),
)
# Process document (extract → chunk → embed → store)
# Process document (extract → classify → chunk → embed → store)
result = await processor.process_document(UUID(doc["id"]), case_id)
# If auto-classification, update doc_type from classification result
actual_doc_type = initial_doc_type
if doc_type == "auto" and result.get("classification"):
classified_type = result["classification"].get("classification", {}).get("doc_type", "")
if classified_type:
actual_doc_type = classified_type
await db.update_document(UUID(doc["id"]), doc_type=classified_type)
doc["doc_type"] = classified_type
# Git commit
repo_dir = config.CASES_DIR / case_number
if repo_dir.exists():
@@ -62,10 +74,16 @@ async def document_upload(
doc_type_hebrew = {
"appeal": "כתב ערר",
"response": "תשובה",
"protocol": "פרוטוקול",
"plan": "תכנית",
"permit": "היתר",
"court_decision": "פסק דין",
"decision": "החלטה",
"reference": "מסמך עזר",
"appraisal": "שומה",
"objection": "התנגדות",
"exhibit": "נספח",
}.get(doc_type, doc_type)
"reference": "מסמך עזר",
}.get(actual_doc_type, actual_doc_type)
subprocess.run(
["git", "commit", "-m", f"הוספת {doc_type_hebrew}: {title}"],
cwd=repo_dir,
@@ -216,3 +234,135 @@ async def document_list(case_number: str) -> str:
return f"אין מסמכים בתיק {case_number}."
return json.dumps(docs, default=str, ensure_ascii=False, indent=2)
async def extract_references(
case_number: str,
doc_title: str = "",
) -> str:
"""זיהוי תכניות, פסיקה וחקיקה מתוך מסמכי תיק.
Args:
case_number: מספר תיק הערר
doc_title: שם מסמך ספציפי (אם ריק, מזהה בכל המסמכים)
"""
from legal_mcp.services import references_extractor
case = await db.get_case_by_number(case_number)
if not case:
return f"תיק {case_number} לא נמצא."
case_id = UUID(case["id"])
docs = await db.list_documents(case_id)
if not docs:
return f"אין מסמכים בתיק {case_number}."
if doc_title:
docs = [d for d in docs if doc_title.lower() in d["title"].lower()]
results = []
for doc in docs:
text = await db.get_document_text(UUID(doc["id"]))
if not text:
continue
refs = await references_extractor.extract_and_link_references(
UUID(doc["id"]), case_id, text,
)
results.append({
"document": doc["title"],
"plans": refs["plans"],
"case_law": refs["case_law"],
"case_law_linked": refs["case_law_linked"],
"legislation": refs["legislation"],
})
return json.dumps(results, default=str, ensure_ascii=False, indent=2)
async def extract_claims(
case_number: str,
doc_title: str = "",
party_hint: str = "",
) -> str:
"""חילוץ טענות מכתב טענות בתיק ושמירה ב-DB.
Args:
case_number: מספר תיק הערר
doc_title: שם מסמך ספציפי (אם ריק, מחלץ מכל כתבי הטענות)
party_hint: שם הצד המגיש (אם ידוע)
"""
from legal_mcp.services import claims_extractor
case = await db.get_case_by_number(case_number)
if not case:
return f"תיק {case_number} לא נמצא."
case_id = UUID(case["id"])
docs = await db.list_documents(case_id)
if not docs:
return f"אין מסמכים בתיק {case_number}."
# Filter to claims documents (appeal, response) or specific doc
if doc_title:
docs = [d for d in docs if doc_title.lower() in d["title"].lower()]
else:
docs = [d for d in docs if d["doc_type"] in ("appeal", "response", "objection")]
if not docs:
return "לא נמצאו כתבי טענות בתיק."
results = []
for doc in docs:
text = await db.get_document_text(UUID(doc["id"]))
if not text:
continue
result = await claims_extractor.extract_and_store_claims(
case_id=case_id,
document_id=UUID(doc["id"]),
text=text,
doc_type=doc["doc_type"],
party_hint=party_hint,
)
results.append(result)
return json.dumps(results, default=str, ensure_ascii=False, indent=2)
async def get_claims(case_number: str, party_role: str = "") -> str:
"""שליפת טענות שחולצו לתיק.
Args:
case_number: מספר תיק הערר
party_role: סינון לפי צד (appellant/respondent/committee/permit_applicant). ריק = הכל.
"""
case = await db.get_case_by_number(case_number)
if not case:
return f"תיק {case_number} לא נמצא."
claims = await db.get_claims(
UUID(case["id"]),
party_role=party_role if party_role else None,
)
if not claims:
return f"אין טענות בתיק {case_number}."
# Format for display
role_hebrew = {
"appellant": "עוררים",
"respondent": "משיבים",
"committee": "ועדה מקומית",
"permit_applicant": "מבקשי היתר",
"appraiser": "שמאי",
}
formatted = []
for c in claims:
formatted.append({
"party": role_hebrew.get(c["party_role"], c["party_role"]),
"claim": c["claim_text"],
"source": c.get("source_document", ""),
})
return json.dumps(formatted, default=str, ensure_ascii=False, indent=2)