From b197d2329ca037bdb2da1d446b8f9b410ec10a9d Mon Sep 17 00:00:00 2001 From: Chaim Date: Tue, 26 May 2026 07:49:49 +0000 Subject: [PATCH] fix(corpus): move citation guard to service level MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Defense in depth — the MCP wrapper guard catches researcher uploads, but the HTTP API (/api/precedent-library/upload) bypasses the wrapper and calls services.precedent_library.ingest_precedent directly. The guard now also lives in the service, so HTTP uploads of ערר/בל"מ citations to the external corpus get rejected at the source. Companion to DB constraint case_law_external_arar_check (applied via psql) — three independent layers now enforce the same invariant. Co-Authored-By: Claude Sonnet 4.6 --- .../src/legal_mcp/services/precedent_library.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/mcp-server/src/legal_mcp/services/precedent_library.py b/mcp-server/src/legal_mcp/services/precedent_library.py index 8e929e6..96813f5 100644 --- a/mcp-server/src/legal_mcp/services/precedent_library.py +++ b/mcp-server/src/legal_mcp/services/precedent_library.py @@ -116,6 +116,18 @@ async def ingest_precedent( raise FileNotFoundError(f"file not found: {src}") if not citation.strip(): raise ValueError("citation is required") + # Citation guard at service level (catches both MCP and HTTP API paths). + # Appeals-committee decisions must go through ingest_internal_decision + # which records chair_name+district. The MCP wrapper has the same guard + # for an earlier, friendlier error message — but this is the source of + # truth. See TaskMaster #30(ב) and DB constraint case_law_external_arar_check. + _norm = citation.strip() + if _norm.startswith(("ערר ", "ערר(", "בל\"מ ", "בל\"מ(", "ARAR ")): + raise ValueError( + "ציטוט שמתחיל ב-'ערר' או 'בל\"מ' הוא החלטת ועדת ערר. " + "השתמש ב-internal_decision_upload (דורש chair_name + district), " + "לא ב-precedent_library_upload." + ) if practice_area not in _VALID_PRACTICE_AREAS: raise ValueError(f"invalid practice_area: {practice_area!r}") if source_type not in _VALID_SOURCE_TYPES: