Benchmark results on Hebrew legal docs (case 1130-25): - Google Vision: 1s/page, $0.001/page, high accuracy - Claude Opus Vision: 90s/page, $0.05/page, poor accuracy - PyMuPDF broken OCR layers now detected via quality check Changes: - extractor.py: Google Vision OCR with Hebrew language hint (300 DPI) - extractor.py: text quality detection (word length, words-per-line, Hebrew ratio) - extractor.py: Hebrew abbreviation quote fixer (15 known patterns) - config.py: add GOOGLE_CLOUD_VISION_API_KEY, remove ANTHROPIC_API_KEY - pyproject.toml: add google-cloud-vision, remove anthropic Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
28 lines
607 B
TOML
28 lines
607 B
TOML
[project]
|
|
name = "legal-mcp"
|
|
version = "0.1.0"
|
|
description = "MCP server for AI-assisted legal decision drafting"
|
|
requires-python = ">=3.10"
|
|
dependencies = [
|
|
"mcp[cli]>=1.0.0",
|
|
"asyncpg>=0.29.0",
|
|
"pgvector>=0.3.0",
|
|
"voyageai>=0.3.0",
|
|
"python-dotenv>=1.0.0",
|
|
"pydantic>=2.0.0",
|
|
"pymupdf>=1.25.0",
|
|
"python-docx>=1.1.0",
|
|
"striprtf>=0.0.26",
|
|
"redis>=5.0.0",
|
|
"rq>=1.16.0",
|
|
"pillow>=10.0.0",
|
|
"google-cloud-vision>=3.7.0",
|
|
]
|
|
|
|
[build-system]
|
|
requires = ["setuptools>=68.0"]
|
|
build-backend = "setuptools.build_meta"
|
|
|
|
[tool.setuptools.packages.find]
|
|
where = ["src"]
|