Files
legal-ai/data/eval/baseline.json
Chaim 4debe9995b chore(#15): adopt MULTIMODAL_TEXT_WEIGHT=0.65 + close #15, open #80
A/B eval (eval_retrieval.py, 86-query gold-set) showed the 0.5 default was
mis-tuned: the image side was too heavy and dragged precedent_library recall
0.971 -> 0.885. Sweep 0.5..0.75 — at 0.65 multimodal beats text-only on every
overall metric AND every corpus (R@5 0.994 vs 0.989, nDCG@5 0.960 vs 0.944,
MRR 0.954 vs 0.936). Dafna approved.

- MULTIMODAL_TEXT_WEIGHT=0.65 set in Coolify (legal-ai, runtime) + redeploy.
- baseline.json updated to the 0.65 config (future regression reference).
- #15 done (premise was stale — multimodal already default on 110 docs; the
  win was tuning the weight, not the backfill).
- #80 opened: the costly 140-doc legacy backfill is deferred until a targeted
  image-answer gold-set proves the table/image value prop (untested here).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-03 08:45:06 +00:00

70 lines
1.4 KiB
JSON

{
"gold_size": 86,
"retrieval_config": {
"MULTIMODAL_ENABLED": true,
"VOYAGE_RERANK_ENABLED": false,
"VOYAGE_MODEL": "voyage-3",
"MULTIMODAL_TEXT_WEIGHT": 0.65,
"MULTIMODAL_RRF_K": 60,
"BM25_HYBRID_ENABLED": true
},
"overall": {
"P@5": 0.2465,
"R@5": 0.9938,
"nDCG@5": 0.9597,
"P@10": 0.1244,
"R@10": 0.9961,
"nDCG@10": 0.9611,
"MRR": 0.9535
},
"by_corpus": {
"internal_decisions": {
"P@5": 0.2037,
"R@5": 1.0,
"nDCG@5": 0.978,
"P@10": 0.1019,
"R@10": 1.0,
"nDCG@10": 0.978,
"MRR": 0.9722
},
"precedent_library": {
"P@5": 0.3188,
"R@5": 0.9833,
"nDCG@5": 0.9288,
"P@10": 0.1625,
"R@10": 0.9896,
"nDCG@10": 0.9326,
"MRR": 0.9219
}
},
"by_practice_area": {
"betterment_levy": {
"P@5": 0.2051,
"R@5": 1.0,
"nDCG@5": 0.9621,
"P@10": 0.1026,
"R@10": 1.0,
"nDCG@10": 0.9621,
"MRR": 0.9487
},
"compensation_197": {
"P@5": 0.2,
"R@5": 1.0,
"nDCG@5": 1.0,
"P@10": 0.1,
"R@10": 1.0,
"nDCG@10": 1.0,
"MRR": 1.0
},
"rishuy_uvniya": {
"P@5": 0.2059,
"R@5": 1.0,
"nDCG@5": 0.9976,
"P@10": 0.1029,
"R@10": 1.0,
"nDCG@10": 0.9976,
"MRR": 1.0
}
},
"generated_at": "20260603T084350Z"
}