Files
legal-ai/mcp-server/src/legal_mcp/services/embeddings.py
Chaim 6f515dc2cb Initial commit: MCP server + web upload interface
Ezer Mishpati - AI legal decision drafting system with:
- MCP server (FastMCP) with document processing pipeline
- Web upload interface (FastAPI) for file upload and classification
- pgvector-based semantic search
- Hebrew legal document chunking and embedding
2026-03-23 12:33:07 +00:00

56 lines
1.3 KiB
Python

"""Embedding service using Voyage AI API."""
from __future__ import annotations
import logging
import voyageai
from legal_mcp import config
logger = logging.getLogger(__name__)
_client: voyageai.Client | None = None
def _get_client() -> voyageai.Client:
global _client
if _client is None:
_client = voyageai.Client(api_key=config.VOYAGE_API_KEY)
return _client
async def embed_texts(texts: list[str], input_type: str = "document") -> list[list[float]]:
"""Embed a batch of texts using Voyage AI.
Args:
texts: List of texts to embed (max 128 per call).
input_type: "document" for indexing, "query" for search queries.
Returns:
List of embedding vectors (1024 dimensions each).
"""
if not texts:
return []
client = _get_client()
all_embeddings = []
# Voyage AI supports up to 128 texts per batch
for i in range(0, len(texts), 128):
batch = texts[i : i + 128]
result = client.embed(
batch,
model=config.VOYAGE_MODEL,
input_type=input_type,
)
all_embeddings.extend(result.embeddings)
return all_embeddings
async def embed_query(query: str) -> list[float]:
"""Embed a single search query."""
results = await embed_texts([query], input_type="query")
return results[0]