Initial commit: MCP server + web upload interface
Ezer Mishpati - AI legal decision drafting system with: - MCP server (FastMCP) with document processing pipeline - Web upload interface (FastAPI) for file upload and classification - pgvector-based semantic search - Hebrew legal document chunking and embedding
This commit is contained in:
55
mcp-server/src/legal_mcp/services/embeddings.py
Normal file
55
mcp-server/src/legal_mcp/services/embeddings.py
Normal file
@@ -0,0 +1,55 @@
|
||||
"""Embedding service using Voyage AI API."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
import voyageai
|
||||
|
||||
from legal_mcp import config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_client: voyageai.Client | None = None
|
||||
|
||||
|
||||
def _get_client() -> voyageai.Client:
|
||||
global _client
|
||||
if _client is None:
|
||||
_client = voyageai.Client(api_key=config.VOYAGE_API_KEY)
|
||||
return _client
|
||||
|
||||
|
||||
async def embed_texts(texts: list[str], input_type: str = "document") -> list[list[float]]:
|
||||
"""Embed a batch of texts using Voyage AI.
|
||||
|
||||
Args:
|
||||
texts: List of texts to embed (max 128 per call).
|
||||
input_type: "document" for indexing, "query" for search queries.
|
||||
|
||||
Returns:
|
||||
List of embedding vectors (1024 dimensions each).
|
||||
"""
|
||||
if not texts:
|
||||
return []
|
||||
|
||||
client = _get_client()
|
||||
all_embeddings = []
|
||||
|
||||
# Voyage AI supports up to 128 texts per batch
|
||||
for i in range(0, len(texts), 128):
|
||||
batch = texts[i : i + 128]
|
||||
result = client.embed(
|
||||
batch,
|
||||
model=config.VOYAGE_MODEL,
|
||||
input_type=input_type,
|
||||
)
|
||||
all_embeddings.extend(result.embeddings)
|
||||
|
||||
return all_embeddings
|
||||
|
||||
|
||||
async def embed_query(query: str) -> list[float]:
|
||||
"""Embed a single search query."""
|
||||
results = await embed_texts([query], input_type="query")
|
||||
return results[0]
|
||||
Reference in New Issue
Block a user