Initial commit: MCP server + web upload interface
Ezer Mishpati - AI legal decision drafting system with: - MCP server (FastMCP) with document processing pipeline - Web upload interface (FastAPI) for file upload and classification - pgvector-based semantic search - Hebrew legal document chunking and embedding
This commit is contained in:
124
mcp-server/src/legal_mcp/tools/search.py
Normal file
124
mcp-server/src/legal_mcp/tools/search.py
Normal file
@@ -0,0 +1,124 @@
|
||||
"""MCP tools for RAG search over legal documents and decisions."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from uuid import UUID
|
||||
|
||||
from legal_mcp.services import db, embeddings
|
||||
|
||||
|
||||
async def search_decisions(
|
||||
query: str,
|
||||
limit: int = 10,
|
||||
section_type: str = "",
|
||||
) -> str:
|
||||
"""חיפוש סמנטי בהחלטות קודמות ובמסמכים.
|
||||
|
||||
Args:
|
||||
query: שאילתת חיפוש בעברית (לדוגמה: "שימוש חורג למסחר באזור מגורים")
|
||||
limit: מספר תוצאות מקסימלי
|
||||
section_type: סינון לפי סוג סעיף (facts, legal_analysis, conclusion, ruling, וכו'). ריק = הכל
|
||||
"""
|
||||
query_emb = await embeddings.embed_query(query)
|
||||
results = await db.search_similar(
|
||||
query_embedding=query_emb,
|
||||
limit=limit,
|
||||
section_type=section_type or None,
|
||||
)
|
||||
|
||||
if not results:
|
||||
return "לא נמצאו תוצאות."
|
||||
|
||||
formatted = []
|
||||
for r in results:
|
||||
formatted.append({
|
||||
"score": round(float(r["score"]), 4),
|
||||
"case_number": r["case_number"],
|
||||
"document": r["document_title"],
|
||||
"section": r["section_type"],
|
||||
"page": r["page_number"],
|
||||
"content": r["content"],
|
||||
})
|
||||
|
||||
return json.dumps(formatted, ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
async def search_case_documents(
|
||||
case_number: str,
|
||||
query: str,
|
||||
limit: int = 10,
|
||||
) -> str:
|
||||
"""חיפוש סמנטי בתוך מסמכי תיק ספציפי.
|
||||
|
||||
Args:
|
||||
case_number: מספר תיק הערר
|
||||
query: שאילתת חיפוש
|
||||
limit: מספר תוצאות מקסימלי
|
||||
"""
|
||||
case = await db.get_case_by_number(case_number)
|
||||
if not case:
|
||||
return f"תיק {case_number} לא נמצא."
|
||||
|
||||
query_emb = await embeddings.embed_query(query)
|
||||
results = await db.search_similar(
|
||||
query_embedding=query_emb,
|
||||
limit=limit,
|
||||
case_id=UUID(case["id"]),
|
||||
)
|
||||
|
||||
if not results:
|
||||
return f"לא נמצאו תוצאות בתיק {case_number}."
|
||||
|
||||
formatted = []
|
||||
for r in results:
|
||||
formatted.append({
|
||||
"score": round(float(r["score"]), 4),
|
||||
"document": r["document_title"],
|
||||
"section": r["section_type"],
|
||||
"page": r["page_number"],
|
||||
"content": r["content"],
|
||||
})
|
||||
|
||||
return json.dumps(formatted, ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
async def find_similar_cases(
|
||||
description: str,
|
||||
limit: int = 5,
|
||||
) -> str:
|
||||
"""מציאת תיקים דומים על בסיס תיאור.
|
||||
|
||||
Args:
|
||||
description: תיאור התיק או הנושא (לדוגמה: "ערר על סירוב להיתר בנייה לתוספת קומה")
|
||||
limit: מספר תוצאות מקסימלי
|
||||
"""
|
||||
query_emb = await embeddings.embed_query(description)
|
||||
results = await db.search_similar(
|
||||
query_embedding=query_emb,
|
||||
limit=limit * 3, # Get more to deduplicate by case
|
||||
)
|
||||
|
||||
if not results:
|
||||
return "לא נמצאו תיקים דומים."
|
||||
|
||||
# Deduplicate by case_number, keep best score per case
|
||||
seen_cases = {}
|
||||
for r in results:
|
||||
cn = r["case_number"]
|
||||
if cn not in seen_cases or r["score"] > seen_cases[cn]["score"]:
|
||||
seen_cases[cn] = r
|
||||
|
||||
# Sort by score and limit
|
||||
top_cases = sorted(seen_cases.values(), key=lambda x: x["score"], reverse=True)[:limit]
|
||||
|
||||
formatted = []
|
||||
for r in top_cases:
|
||||
formatted.append({
|
||||
"score": round(float(r["score"]), 4),
|
||||
"case_number": r["case_number"],
|
||||
"document": r["document_title"],
|
||||
"relevant_section": r["content"][:500],
|
||||
})
|
||||
|
||||
return json.dumps(formatted, ensure_ascii=False, indent=2)
|
||||
Reference in New Issue
Block a user