legal-ai/scripts/test_retrieval_by_name.py

#!/usr/bin/env python
"""Repro + regression test for retrieval-by-name (RC-A, tasks #52).

Bug: searching the precedent corpus by a bare case NAME ("אגסי") fails to
surface the decision itself, because the lexical tsvector covers only chunk
content + halacha text — not case_name / case_number. A name query therefore
matches decisions that *cite* the case, not the case.

Run with the MCP venv:
    DOTENV_PATH=/home/chaim/.env DATA_DIR=/home/chaim/legal-ai/data \
      mcp-server/.venv/bin/python scripts/test_retrieval_by_name.py

Exit 0 = all assertions pass. Non-zero = failure (prints what was found).
"""
import asyncio
import sys

sys.path.insert(0, "/home/chaim/legal-ai/mcp-server/src")

from legal_mcp.services import embeddings, hybrid_search  # noqa: E402

AGASI_ID = "1a87efe5-6e13-4ed4-a9ec-3f2f7d61e4ec"
# Vinfeld CITES Agasi (its halacha quote names אגסי) but is NOT Agasi.
# An exact name match must rank the case itself above any case citing it.
VINFELD_ID = "bd5d849c-c15f-43c3-96ab-d44337af9cb5"
NAME_QUERY = "אגסי"
SUBSTANTIVE_QUERY = 'פטור היטל השבחה לפי סעיף 19(ג)(1) שתי דירות 140 מ"ר אחת מושכרת'


def _ids(rows):
    return [str(r.get("case_law_id")) for r in rows]


def _rank_of(rows, cid):
    for i, r in enumerate(rows, 1):
        if str(r.get("case_law_id")) == cid:
            return i
    return None


async def _search(query, source_kind, limit=10):
    query_emb = await embeddings.embed_query(query)
    return await hybrid_search.search_precedent_library_hybrid(
        query,
        query_emb,
        source_kind=source_kind,
        limit=limit,
        include_halachot=True,
    )


async def main():
    results = {"pass": [], "fail": []}

    # 1) THE BUG: bare-name query must rank the case ITSELF (Agasi) above any
    #    case that merely CITES it (Vinfeld), and within the top 3.
    rows = await _search(NAME_QUERY, "internal_committee", limit=10)
    a_rank = _rank_of(rows, AGASI_ID)
    v_rank = _rank_of(rows, VINFELD_ID)
    ok = bool(a_rank) and a_rank <= 3 and (v_rank is None or a_rank < v_rank)
    msg = (f"[name/internal] query='{NAME_QUERY}' -> Agasi rank={a_rank}, "
           f"Vinfeld(citer) rank={v_rank} (top ids: {_ids(rows)[:5]})")
    (results["pass"] if ok else results["fail"]).append(msg)

    # 2) REGRESSION: substantive query must still find Agasi with a real score.
    rows = await _search(SUBSTANTIVE_QUERY, "internal_committee", limit=10)
    rank = _rank_of(rows, AGASI_ID)
    top_score = float(rows[0]["score"]) if rows else 0.0
    msg = f"[substantive/internal] Agasi rank={rank}, top_score={top_score:.3f}"
    (results["pass"] if rank and rank <= 8 else results["fail"]).append(msg)

    # 3) REGRESSION: substantive query in the full precedent library still works
    #    (Vinfeld/נווה שלום etc. should surface; just assert non-empty + has betterment content).
    rows = await _search(SUBSTANTIVE_QUERY, "external_upload", limit=10)
    msg = f"[substantive/external] returned {len(rows)} rows (top ids: {_ids(rows)[:3]})"
    (results["pass"] if len(rows) >= 3 else results["fail"]).append(msg)

    print("\n=== PASS ===")
    for m in results["pass"]:
        print("  ✓", m)
    print("=== FAIL ===")
    for m in results["fail"]:
        print("  ✗", m)

    return 1 if results["fail"] else 0


if __name__ == "__main__":
    sys.exit(asyncio.run(main()))