A/B eval (eval_retrieval.py, 86-query gold-set) showed the 0.5 default was mis-tuned: the image side was too heavy and dragged precedent_library recall 0.971 -> 0.885. Sweep 0.5..0.75 — at 0.65 multimodal beats text-only on every overall metric AND every corpus (R@5 0.994 vs 0.989, nDCG@5 0.960 vs 0.944, MRR 0.954 vs 0.936). Dafna approved. - MULTIMODAL_TEXT_WEIGHT=0.65 set in Coolify (legal-ai, runtime) + redeploy. - baseline.json updated to the 0.65 config (future regression reference). - #15 done (premise was stale — multimodal already default on 110 docs; the win was tuning the weight, not the backfill). - #80 opened: the costly 140-doc legacy backfill is deferred until a targeted image-answer gold-set proves the table/image value prop (untested here). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
70 lines
1.4 KiB
JSON
70 lines
1.4 KiB
JSON
{
|
|
"gold_size": 86,
|
|
"retrieval_config": {
|
|
"MULTIMODAL_ENABLED": true,
|
|
"VOYAGE_RERANK_ENABLED": false,
|
|
"VOYAGE_MODEL": "voyage-3",
|
|
"MULTIMODAL_TEXT_WEIGHT": 0.65,
|
|
"MULTIMODAL_RRF_K": 60,
|
|
"BM25_HYBRID_ENABLED": true
|
|
},
|
|
"overall": {
|
|
"P@5": 0.2465,
|
|
"R@5": 0.9938,
|
|
"nDCG@5": 0.9597,
|
|
"P@10": 0.1244,
|
|
"R@10": 0.9961,
|
|
"nDCG@10": 0.9611,
|
|
"MRR": 0.9535
|
|
},
|
|
"by_corpus": {
|
|
"internal_decisions": {
|
|
"P@5": 0.2037,
|
|
"R@5": 1.0,
|
|
"nDCG@5": 0.978,
|
|
"P@10": 0.1019,
|
|
"R@10": 1.0,
|
|
"nDCG@10": 0.978,
|
|
"MRR": 0.9722
|
|
},
|
|
"precedent_library": {
|
|
"P@5": 0.3188,
|
|
"R@5": 0.9833,
|
|
"nDCG@5": 0.9288,
|
|
"P@10": 0.1625,
|
|
"R@10": 0.9896,
|
|
"nDCG@10": 0.9326,
|
|
"MRR": 0.9219
|
|
}
|
|
},
|
|
"by_practice_area": {
|
|
"betterment_levy": {
|
|
"P@5": 0.2051,
|
|
"R@5": 1.0,
|
|
"nDCG@5": 0.9621,
|
|
"P@10": 0.1026,
|
|
"R@10": 1.0,
|
|
"nDCG@10": 0.9621,
|
|
"MRR": 0.9487
|
|
},
|
|
"compensation_197": {
|
|
"P@5": 0.2,
|
|
"R@5": 1.0,
|
|
"nDCG@5": 1.0,
|
|
"P@10": 0.1,
|
|
"R@10": 1.0,
|
|
"nDCG@10": 1.0,
|
|
"MRR": 1.0
|
|
},
|
|
"rishuy_uvniya": {
|
|
"P@5": 0.2059,
|
|
"R@5": 1.0,
|
|
"nDCG@5": 0.9976,
|
|
"P@10": 0.1029,
|
|
"R@10": 1.0,
|
|
"nDCG@10": 0.9976,
|
|
"MRR": 1.0
|
|
}
|
|
},
|
|
"generated_at": "20260603T084350Z"
|
|
} |