Practice area separation: multi-tenant axis across DB, RAG, and UI
Adds two orthogonal columns — practice_area (top-level legal domain: appeals_committee / national_insurance / labor_law) and appeal_subtype (building_permit / betterment_levy / compensation_197) — denormalized into cases, documents, document_chunks, decisions, and style_corpus so vector searches can filter without JOINs. Why: the system handles two unrelated sub-domains under the same appeals committee (1xxx building permits and 8xxx/9xxx betterment/197), with different rules and writing style. Without a separation axis, search_similar() and the block-writer's precedent lookup were free to surface betterment-levy paragraphs while drafting a building-permit decision — a real risk of cross-domain contamination. The same axis also lets future domains (national insurance, labor law) coexist without separate schemas. Schema (V4 migration in db.py): - ALTER ... ADD COLUMN IF NOT EXISTS on all five tables + composite indexes (practice_area first). - Idempotent backfill: case_number ~ '^1' → building_permit, '^8' → betterment_levy, '^9' → compensation_197; propagated to documents, chunks, and decisions via case_id; training-corpus rows (case_id NULL) default to appeals_committee. Code: - New services/practice_area.py with derive_subtype, validate, and is_override + enum constants. - db.create_case / create_document / store_chunks / create_decision inherit practice_area from the parent case (or take an explicit override for the case_id=None training corpus). - db.search_similar and search_similar_paragraphs accept practice_area + appeal_subtype filters using the denormalized columns. - tools/search.py auto-resolves the filter from case_number when given. - block_writer._build_precedents_context now passes the active case's practice_area to search_similar_paragraphs — closes the contamination hole for the discussion-block precedent fetch. - tools/cases.case_create auto-derives subtype from case_number; an explicit override that disagrees writes a case_subtype_override entry to audit_log so we can spot bad classifications later. - tools/documents.document_upload_training tags new training material with practice_area + subtype end-to-end (corpus, document, chunks). UI (web/static/index.html + web/app.py): - New-case wizard gets a practice_area dropdown (others disabled until national_insurance / labor_law arrive) and an appeal_subtype dropdown with JS auto-fill from the case-number prefix; manual edits stick. - Case header shows a blue badge with practice_area · subtype. - CaseCreateRequest plumbs both fields through to cases_tools.case_create. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -8,7 +8,7 @@ from pathlib import Path
|
||||
from uuid import UUID
|
||||
|
||||
from legal_mcp import config
|
||||
from legal_mcp.services import db
|
||||
from legal_mcp.services import audit, db, practice_area as pa
|
||||
|
||||
|
||||
async def case_create(
|
||||
@@ -23,6 +23,8 @@ async def case_create(
|
||||
hearing_date: str = "",
|
||||
notes: str = "",
|
||||
expected_outcome: str = "",
|
||||
practice_area: str = "appeals_committee",
|
||||
appeal_subtype: str = "",
|
||||
) -> str:
|
||||
"""יצירת תיק ערר חדש.
|
||||
|
||||
@@ -38,6 +40,9 @@ async def case_create(
|
||||
hearing_date: תאריך דיון (YYYY-MM-DD)
|
||||
notes: הערות
|
||||
expected_outcome: תוצאה צפויה (rejection/partial_acceptance/full_acceptance/betterment_levy)
|
||||
practice_area: תחום משפטי (appeals_committee / national_insurance / labor_law)
|
||||
appeal_subtype: סוג ערר (building_permit / betterment_levy / compensation_197).
|
||||
ריק = יוסק אוטומטית ממספר התיק
|
||||
"""
|
||||
from datetime import date as date_type
|
||||
|
||||
@@ -45,6 +50,12 @@ async def case_create(
|
||||
if hearing_date:
|
||||
h_date = date_type.fromisoformat(hearing_date)
|
||||
|
||||
# Resolve appeal_subtype: explicit override > auto-derive > 'unknown'
|
||||
derived_subtype = pa.derive_subtype(case_number, practice_area)
|
||||
if not appeal_subtype:
|
||||
appeal_subtype = derived_subtype
|
||||
pa.validate(practice_area, appeal_subtype)
|
||||
|
||||
case = await db.create_case(
|
||||
case_number=case_number,
|
||||
title=title,
|
||||
@@ -57,8 +68,24 @@ async def case_create(
|
||||
hearing_date=h_date,
|
||||
notes=notes,
|
||||
expected_outcome=expected_outcome,
|
||||
practice_area=practice_area,
|
||||
appeal_subtype=appeal_subtype,
|
||||
)
|
||||
|
||||
# If the user overrode the case-number convention (e.g. case 8500 marked
|
||||
# as building_permit), record it so we can audit later.
|
||||
if pa.is_override(case_number, practice_area, appeal_subtype):
|
||||
await audit.log_action(
|
||||
action="case_subtype_override",
|
||||
case_id=UUID(case["id"]),
|
||||
details={
|
||||
"case_number": case_number,
|
||||
"derived_subtype": derived_subtype,
|
||||
"chosen_subtype": appeal_subtype,
|
||||
"practice_area": practice_area,
|
||||
},
|
||||
)
|
||||
|
||||
# Initialize git repo for the case
|
||||
case_dir = config.find_case_dir(case_number)
|
||||
case_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
@@ -105,6 +105,8 @@ async def document_upload_training(
|
||||
decision_date: str = "",
|
||||
subject_categories: list[str] | None = None,
|
||||
title: str = "",
|
||||
practice_area: str = "appeals_committee",
|
||||
appeal_subtype: str = "",
|
||||
) -> str:
|
||||
"""העלאת החלטה קודמת של דפנה לקורפוס הסגנון (training).
|
||||
|
||||
@@ -114,10 +116,13 @@ async def document_upload_training(
|
||||
decision_date: תאריך ההחלטה (YYYY-MM-DD)
|
||||
subject_categories: קטגוריות - אפשר לבחור כמה (בנייה, שימוש חורג, תכנית, היתר, הקלה, חלוקה, תמ"א 38, היטל השבחה, פיצויים 197)
|
||||
title: שם המסמך
|
||||
practice_area: תחום משפטי (appeals_committee / national_insurance / labor_law)
|
||||
appeal_subtype: סוג ערר (building_permit / betterment_levy / compensation_197).
|
||||
ריק = יוסק אוטומטית ממספר ההחלטה
|
||||
"""
|
||||
from datetime import date as date_type
|
||||
|
||||
from legal_mcp.services import extractor, embeddings, chunker
|
||||
from legal_mcp.services import chunker, embeddings, extractor, practice_area as pa
|
||||
|
||||
source = Path(file_path)
|
||||
if not source.exists():
|
||||
@@ -126,6 +131,11 @@ async def document_upload_training(
|
||||
if not title:
|
||||
title = source.stem
|
||||
|
||||
# Resolve subtype: explicit > derived from decision_number > 'unknown'
|
||||
if not appeal_subtype:
|
||||
appeal_subtype = pa.derive_subtype(decision_number, practice_area)
|
||||
pa.validate(practice_area, appeal_subtype)
|
||||
|
||||
# Copy to training directory (skip if already there)
|
||||
config.TRAINING_DIR.mkdir(parents=True, exist_ok=True)
|
||||
dest = config.TRAINING_DIR / source.name
|
||||
@@ -140,25 +150,29 @@ async def document_upload_training(
|
||||
if decision_date:
|
||||
d_date = date_type.fromisoformat(decision_date)
|
||||
|
||||
# Add to style corpus
|
||||
# Add to style corpus (tagged by domain so block-writer can filter)
|
||||
corpus_id = await db.add_to_style_corpus(
|
||||
document_id=None,
|
||||
decision_number=decision_number,
|
||||
decision_date=d_date,
|
||||
subject_categories=subject_categories or [],
|
||||
full_text=text,
|
||||
practice_area=practice_area,
|
||||
appeal_subtype=appeal_subtype,
|
||||
)
|
||||
|
||||
# Chunk and embed for RAG search over training corpus
|
||||
chunks = chunker.chunk_document(text)
|
||||
if chunks:
|
||||
# Create a document record (no case association)
|
||||
# Create a document record (no case association — tag explicitly)
|
||||
doc = await db.create_document(
|
||||
case_id=None,
|
||||
doc_type="decision",
|
||||
title=f"[קורפוס] {title}",
|
||||
file_path=str(dest),
|
||||
page_count=page_count,
|
||||
practice_area=practice_area,
|
||||
appeal_subtype=appeal_subtype,
|
||||
)
|
||||
doc_id = UUID(doc["id"])
|
||||
await db.update_document(doc_id, extracted_text=text, extraction_status="completed")
|
||||
@@ -176,7 +190,10 @@ async def document_upload_training(
|
||||
}
|
||||
for c, emb in zip(chunks, embs)
|
||||
]
|
||||
await db.store_chunks(doc_id, None, chunk_dicts)
|
||||
await db.store_chunks(
|
||||
doc_id, None, chunk_dicts,
|
||||
practice_area=practice_area, appeal_subtype=appeal_subtype,
|
||||
)
|
||||
|
||||
return json.dumps({
|
||||
"corpus_id": str(corpus_id),
|
||||
|
||||
@@ -3,28 +3,52 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from uuid import UUID
|
||||
|
||||
from legal_mcp.services import db, embeddings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def search_decisions(
|
||||
query: str,
|
||||
limit: int = 10,
|
||||
section_type: str = "",
|
||||
practice_area: str = "",
|
||||
appeal_subtype: str = "",
|
||||
case_number: str = "",
|
||||
) -> str:
|
||||
"""חיפוש סמנטי בהחלטות קודמות ובמסמכים.
|
||||
"""חיפוש סמנטי בהחלטות קודמות ובמסמכים — מסונן לפי תחום משפטי.
|
||||
|
||||
Args:
|
||||
query: שאילתת חיפוש בעברית (לדוגמה: "שימוש חורג למסחר באזור מגורים")
|
||||
query: שאילתת חיפוש בעברית
|
||||
limit: מספר תוצאות מקסימלי
|
||||
section_type: סינון לפי סוג סעיף (facts, legal_analysis, conclusion, ruling, וכו'). ריק = הכל
|
||||
section_type: סינון לפי סוג סעיף (facts, legal_analysis, ...)
|
||||
practice_area: תחום משפטי לסינון (appeals_committee/national_insurance/...)
|
||||
appeal_subtype: סוג ערר לסינון (building_permit/betterment_levy/compensation_197)
|
||||
case_number: אם סופק, ה-practice_area/subtype יוסקו אוטומטית מהתיק
|
||||
"""
|
||||
# Auto-resolve practice_area from case_number if available
|
||||
if case_number and not practice_area:
|
||||
case = await db.get_case_by_number(case_number)
|
||||
if case:
|
||||
practice_area = case.get("practice_area") or ""
|
||||
appeal_subtype = appeal_subtype or (case.get("appeal_subtype") or "")
|
||||
|
||||
if not practice_area:
|
||||
logger.warning(
|
||||
"search_decisions called without practice_area filter — "
|
||||
"results may mix legal domains"
|
||||
)
|
||||
|
||||
query_emb = await embeddings.embed_query(query)
|
||||
results = await db.search_similar(
|
||||
query_embedding=query_emb,
|
||||
limit=limit,
|
||||
section_type=section_type or None,
|
||||
practice_area=practice_area or None,
|
||||
appeal_subtype=appeal_subtype or None,
|
||||
)
|
||||
|
||||
if not results:
|
||||
@@ -61,6 +85,7 @@ async def search_case_documents(
|
||||
return f"תיק {case_number} לא נמצא."
|
||||
|
||||
query_emb = await embeddings.embed_query(query)
|
||||
# Restricted to case_id — practice_area filter would be redundant.
|
||||
results = await db.search_similar(
|
||||
query_embedding=query_emb,
|
||||
limit=limit,
|
||||
@@ -86,17 +111,37 @@ async def search_case_documents(
|
||||
async def find_similar_cases(
|
||||
description: str,
|
||||
limit: int = 5,
|
||||
practice_area: str = "",
|
||||
appeal_subtype: str = "",
|
||||
case_number: str = "",
|
||||
) -> str:
|
||||
"""מציאת תיקים דומים על בסיס תיאור.
|
||||
"""מציאת תיקים דומים על בסיס תיאור — מסונן לפי תחום משפטי.
|
||||
|
||||
Args:
|
||||
description: תיאור התיק או הנושא (לדוגמה: "ערר על סירוב להיתר בנייה לתוספת קומה")
|
||||
description: תיאור התיק או הנושא
|
||||
limit: מספר תוצאות מקסימלי
|
||||
practice_area: תחום משפטי לסינון
|
||||
appeal_subtype: סוג ערר לסינון
|
||||
case_number: אם סופק, ה-practice_area/subtype יוסקו אוטומטית מהתיק
|
||||
"""
|
||||
if case_number and not practice_area:
|
||||
case = await db.get_case_by_number(case_number)
|
||||
if case:
|
||||
practice_area = case.get("practice_area") or ""
|
||||
appeal_subtype = appeal_subtype or (case.get("appeal_subtype") or "")
|
||||
|
||||
if not practice_area:
|
||||
logger.warning(
|
||||
"find_similar_cases called without practice_area filter — "
|
||||
"results may mix legal domains"
|
||||
)
|
||||
|
||||
query_emb = await embeddings.embed_query(description)
|
||||
results = await db.search_similar(
|
||||
query_embedding=query_emb,
|
||||
limit=limit * 3, # Get more to deduplicate by case
|
||||
practice_area=practice_area or None,
|
||||
appeal_subtype=appeal_subtype or None,
|
||||
)
|
||||
|
||||
if not results:
|
||||
|
||||
Reference in New Issue
Block a user