From 96ea54dc6e0ac8682c1792abdabebc1969dce296 Mon Sep 17 00:00:00 2001 From: Chaim Date: Sat, 4 Apr 2026 15:35:16 +0000 Subject: [PATCH] Add claim_type field: distinguish claims vs responses vs replies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Legal documents have 3 types of assertions: - claim: from appeal documents (כתב ערר) - response: from original responses (כתב תשובה) - reply: from supplementary responses (תגובה, השלמת טיעון) DB: added claim_type column to claims table Extractor: _infer_claim_type() auto-detects from doc_type + title Updated existing 113 records: 29 claims, 28 responses, 56 replies Co-Authored-By: Claude Opus 4.6 (1M context) --- .../legal_mcp/services/claims_extractor.py | 24 +++++++++++++++++++ mcp-server/src/legal_mcp/services/db.py | 5 ++-- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/mcp-server/src/legal_mcp/services/claims_extractor.py b/mcp-server/src/legal_mcp/services/claims_extractor.py index 694f740..a31a0d8 100644 --- a/mcp-server/src/legal_mcp/services/claims_extractor.py +++ b/mcp-server/src/legal_mcp/services/claims_extractor.py @@ -114,6 +114,25 @@ async def extract_claims_with_ai( return [c for c in claims if "party_role" in c and "claim_text" in c] +def _infer_claim_type(doc_type: str, source_name: str) -> str: + """Determine claim_type from document type and title. + + - 'claim' = from appeal documents (כתב ערר) + - 'response' = from original response documents (כתב תשובה) + - 'reply' = from supplementary responses (תגובה, השלמת טיעון) + """ + name_lower = source_name.lower() if source_name else "" + if doc_type == "appeal" or "כתב ערר" in name_lower: + return "claim" + if "כתב תשובה" in name_lower: + return "response" + if any(kw in name_lower for kw in ["תגובת", "השלמת טיעון", "תגובה"]): + return "reply" + if doc_type == "response": + return "response" + return "claim" + + # ── Regex-based extraction (from existing decisions) ────────────── PARTY_PATTERNS = [ @@ -229,6 +248,11 @@ async def extract_and_store_claims( if not claims: return {"status": "no_claims", "total": 0, "source": source_name} + # Determine claim_type from document type and title + claim_type = _infer_claim_type(doc_type, source_name) + for c in claims: + c["claim_type"] = claim_type + stored = await db.store_claims(case_id, claims, source_document=source_name) # Summarize by role diff --git a/mcp-server/src/legal_mcp/services/db.py b/mcp-server/src/legal_mcp/services/db.py index 8bf18ee..5438f1a 100644 --- a/mcp-server/src/legal_mcp/services/db.py +++ b/mcp-server/src/legal_mcp/services/db.py @@ -572,14 +572,15 @@ async def store_claims(case_id: UUID, claims: list[dict], source_document: str = ) for claim in claims: await conn.execute( - """INSERT INTO claims (case_id, party_role, party_name, claim_text, claim_index, source_document) - VALUES ($1, $2, $3, $4, $5, $6)""", + """INSERT INTO claims (case_id, party_role, party_name, claim_text, claim_index, source_document, claim_type) + VALUES ($1, $2, $3, $4, $5, $6, $7)""", case_id, claim["party_role"], claim.get("party_name", ""), claim["claim_text"], claim.get("claim_index", 0), source_document, + claim.get("claim_type", "claim"), ) return len(claims)