From 7826ff49106c2d7bab136742bdfdcfe69fc795e5 Mon Sep 17 00:00:00 2001
From: Chaim <chaim@marcus-law.co.il>
Date: Sat, 30 May 2026 11:54:52 +0000
Subject: [PATCH] fix(cases): tolerant case_number lookup so agents see case
 documents
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reported: an agent claimed the case had no documents because document_list
returned empty — but the documents exist. Root cause: get_case_by_number did
an exact `WHERE case_number = $1`, so any formatting variant of the number
silently failed to resolve. Verified on 8137-24 (9 docs): "8137/24",
"ערר 8137-24", leading/trailing space, and "בל\"מ 8126/03/25" all returned
"תיק לא נמצא", which the agent read as "no documents" and went blind.

Add _normalize_case_number (strip leading proceeding-type prefix to the first
digit, trim, unify '/'→'-') and a normalized fallback in the lookup query
(exact match preferred via ORDER BY). One fix covers every case_number-scoped
tool (document_list, extract_references, search_case_documents, get_claims,
drafting, ...). Bogus numbers still correctly resolve to "not found". (#58)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .taskmaster/tasks/tasks.json            | 18 ++++++++++++---
 mcp-server/src/legal_mcp/services/db.py | 30 ++++++++++++++++++++++++-
 2 files changed, 44 insertions(+), 4 deletions(-)

diff --git a/.taskmaster/tasks/tasks.json b/.taskmaster/tasks/tasks.json
index ee8eaf3..20dca90 100644
--- a/.taskmaster/tasks/tasks.json
+++ b/.taskmaster/tasks/tasks.json
@@ -1988,13 +1988,25 @@
         "testStrategy": "אחרי re-chunk לתיק לדוגמה: 0 chunks<50 לאותו case_law_id; search_internal_decisions עדיין מחזיר את התיק; ספירת chunks סבירה.",
         "subtasks": [],
         "updatedAt": "2026-05-30T11:19:06.142606+00:00"
+      },
+      {
+        "id": "58",
+        "title": "[Case access] get_case_by_number שביר לפורמט — סוכן 'עיוור' למסמכי תיק",
+        "description": "דווח ע\"י chaim: סוכן כתב שחסרים מסמכי תיק כי document_list החזיר ריק, אך המסמכים קיימים. שורש: get_case_by_number (db.py) עושה 'WHERE case_number=$1' התאמה מדויקת בלבד. אומת — 8137-24 מחזיר 9 מסמכים, אבל 8137/24 / 'ערר 8137-24' / רווחים / zero-pad → 'תיק לא נמצא'. הסוכן מקבל את המספר בפורמט שונה (כותרת issue, לוכסן, תחילית ערר/בל\"מ) → התאמה נכשלת → 'אין מסמכים'. משפיע על כל הכלים מבוססי case_number (document_list, extract_references, search_case_documents, get_claims, draft, וכו'). תיקון: נורמליזציה (strip prefix לתחילת ספרה, trim, '/'→'-') + fallback בשאילתה. תיקון נקודה-אחת מתקן את כל הכלים.",
+        "status": "done",
+        "priority": "high",
+        "dependencies": [],
+        "details": "db.py: get_case_by_number (~שורה לאחר get_case). להוסיף _normalize_case_number + שאילתה עם OR על replace(trim(case_number),'/','-')=norm, ORDER BY exact-first. בדיקה: כל הווריאציות של 8137-24 מחזירות 9 מסמכים.",
+        "testStrategy": "document_list על 7 וריאציות פורמט של תיק קיים → כולן מחזירות את אותם מסמכים; תיק לא-קיים אמיתי עדיין מחזיר 'לא נמצא'.",
+        "subtasks": [],
+        "updatedAt": "2026-05-30T11:54:34.291Z"
       }
     ],
     "metadata": {
       "version": "1.0.0",
-      "lastModified": "2026-05-30T11:19:23.923Z",
-      "taskCount": 57,
-      "completedCount": 52,
+      "lastModified": "2026-05-30T11:54:34.291Z",
+      "taskCount": 58,
+      "completedCount": 53,
       "tags": [
         "legal-ai"
       ]
diff --git a/mcp-server/src/legal_mcp/services/db.py b/mcp-server/src/legal_mcp/services/db.py
index 4c98f1b..a3f22ae 100644
--- a/mcp-server/src/legal_mcp/services/db.py
+++ b/mcp-server/src/legal_mcp/services/db.py
@@ -5,6 +5,7 @@ from __future__ import annotations
 import asyncio
 import json
 import logging
+import re
 from datetime import date
 from uuid import UUID, uuid4
 
@@ -1192,11 +1193,38 @@ async def get_active_draft_path(case_id: UUID) -> str | None:
     return row["active_draft_path"] if row else None
 
 
+def _normalize_case_number(s: str) -> str:
+    """Canonicalise a case number for tolerant lookup.
+
+    Agents receive the number in many shapes — from a Paperclip issue
+    title ("ערר 8137/24"), with a slash instead of a dash, padded, or with
+    surrounding whitespace. Stored values are bare ("8137-24"). Without
+    this, get_case_by_number's exact match silently fails and the agent
+    concludes the case has no documents (see #58). Strategy: drop any
+    leading proceeding-type prefix (everything before the first digit),
+    trim, and unify '/' → '-'.
+    """
+    s = (s or "").strip()
+    m = re.search(r"\d", s)
+    if m:
+        s = s[m.start():]
+    return s.strip().replace("/", "-")
+
+
 async def get_case_by_number(case_number: str) -> dict | None:
     pool = await get_pool()
+    norm = _normalize_case_number(case_number)
     async with pool.acquire() as conn:
+        # Exact match first (fast path + preferred); fall back to a
+        # separator/prefix-normalised comparison so common formatting
+        # variants still resolve to the right case. See #58.
         row = await conn.fetchrow(
-            "SELECT * FROM cases WHERE case_number = $1", case_number
+            """SELECT * FROM cases
+               WHERE case_number = $1
+                  OR replace(btrim(case_number), '/', '-') = $2
+               ORDER BY (case_number = $1) DESC, created_at
+               LIMIT 1""",
+            case_number, norm,
         )
     if row is None:
         return None