From 7826ff49106c2d7bab136742bdfdcfe69fc795e5 Mon Sep 17 00:00:00 2001 From: Chaim Date: Sat, 30 May 2026 11:54:52 +0000 Subject: [PATCH] fix(cases): tolerant case_number lookup so agents see case documents MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported: an agent claimed the case had no documents because document_list returned empty — but the documents exist. Root cause: get_case_by_number did an exact `WHERE case_number = $1`, so any formatting variant of the number silently failed to resolve. Verified on 8137-24 (9 docs): "8137/24", "ערר 8137-24", leading/trailing space, and "בל\"מ 8126/03/25" all returned "תיק לא נמצא", which the agent read as "no documents" and went blind. Add _normalize_case_number (strip leading proceeding-type prefix to the first digit, trim, unify '/'→'-') and a normalized fallback in the lookup query (exact match preferred via ORDER BY). One fix covers every case_number-scoped tool (document_list, extract_references, search_case_documents, get_claims, drafting, ...). Bogus numbers still correctly resolve to "not found". (#58) Co-Authored-By: Claude Opus 4.8 (1M context) --- .taskmaster/tasks/tasks.json | 18 ++++++++++++--- mcp-server/src/legal_mcp/services/db.py | 30 ++++++++++++++++++++++++- 2 files changed, 44 insertions(+), 4 deletions(-) diff --git a/.taskmaster/tasks/tasks.json b/.taskmaster/tasks/tasks.json index ee8eaf3..20dca90 100644 --- a/.taskmaster/tasks/tasks.json +++ b/.taskmaster/tasks/tasks.json @@ -1988,13 +1988,25 @@ "testStrategy": "אחרי re-chunk לתיק לדוגמה: 0 chunks<50 לאותו case_law_id; search_internal_decisions עדיין מחזיר את התיק; ספירת chunks סבירה.", "subtasks": [], "updatedAt": "2026-05-30T11:19:06.142606+00:00" + }, + { + "id": "58", + "title": "[Case access] get_case_by_number שביר לפורמט — סוכן 'עיוור' למסמכי תיק", + "description": "דווח ע\"י chaim: סוכן כתב שחסרים מסמכי תיק כי document_list החזיר ריק, אך המסמכים קיימים. שורש: get_case_by_number (db.py) עושה 'WHERE case_number=$1' התאמה מדויקת בלבד. אומת — 8137-24 מחזיר 9 מסמכים, אבל 8137/24 / 'ערר 8137-24' / רווחים / zero-pad → 'תיק לא נמצא'. הסוכן מקבל את המספר בפורמט שונה (כותרת issue, לוכסן, תחילית ערר/בל\"מ) → התאמה נכשלת → 'אין מסמכים'. משפיע על כל הכלים מבוססי case_number (document_list, extract_references, search_case_documents, get_claims, draft, וכו'). תיקון: נורמליזציה (strip prefix לתחילת ספרה, trim, '/'→'-') + fallback בשאילתה. תיקון נקודה-אחת מתקן את כל הכלים.", + "status": "done", + "priority": "high", + "dependencies": [], + "details": "db.py: get_case_by_number (~שורה לאחר get_case). להוסיף _normalize_case_number + שאילתה עם OR על replace(trim(case_number),'/','-')=norm, ORDER BY exact-first. בדיקה: כל הווריאציות של 8137-24 מחזירות 9 מסמכים.", + "testStrategy": "document_list על 7 וריאציות פורמט של תיק קיים → כולן מחזירות את אותם מסמכים; תיק לא-קיים אמיתי עדיין מחזיר 'לא נמצא'.", + "subtasks": [], + "updatedAt": "2026-05-30T11:54:34.291Z" } ], "metadata": { "version": "1.0.0", - "lastModified": "2026-05-30T11:19:23.923Z", - "taskCount": 57, - "completedCount": 52, + "lastModified": "2026-05-30T11:54:34.291Z", + "taskCount": 58, + "completedCount": 53, "tags": [ "legal-ai" ] diff --git a/mcp-server/src/legal_mcp/services/db.py b/mcp-server/src/legal_mcp/services/db.py index 4c98f1b..a3f22ae 100644 --- a/mcp-server/src/legal_mcp/services/db.py +++ b/mcp-server/src/legal_mcp/services/db.py @@ -5,6 +5,7 @@ from __future__ import annotations import asyncio import json import logging +import re from datetime import date from uuid import UUID, uuid4 @@ -1192,11 +1193,38 @@ async def get_active_draft_path(case_id: UUID) -> str | None: return row["active_draft_path"] if row else None +def _normalize_case_number(s: str) -> str: + """Canonicalise a case number for tolerant lookup. + + Agents receive the number in many shapes — from a Paperclip issue + title ("ערר 8137/24"), with a slash instead of a dash, padded, or with + surrounding whitespace. Stored values are bare ("8137-24"). Without + this, get_case_by_number's exact match silently fails and the agent + concludes the case has no documents (see #58). Strategy: drop any + leading proceeding-type prefix (everything before the first digit), + trim, and unify '/' → '-'. + """ + s = (s or "").strip() + m = re.search(r"\d", s) + if m: + s = s[m.start():] + return s.strip().replace("/", "-") + + async def get_case_by_number(case_number: str) -> dict | None: pool = await get_pool() + norm = _normalize_case_number(case_number) async with pool.acquire() as conn: + # Exact match first (fast path + preferred); fall back to a + # separator/prefix-normalised comparison so common formatting + # variants still resolve to the right case. See #58. row = await conn.fetchrow( - "SELECT * FROM cases WHERE case_number = $1", case_number + """SELECT * FROM cases + WHERE case_number = $1 + OR replace(btrim(case_number), '/', '-') = $2 + ORDER BY (case_number = $1) DESC, created_at + LIMIT 1""", + case_number, norm, ) if row is None: return None