Add CMPA (betterment levy) training support and update methodology
Support ingestion of betterment levy (היטל השבחה) decisions into a separate training corpus (CMPA). Key changes: - Add .doc file extraction via LibreOffice conversion in extractor - Add practice_area/appeal_subtype columns to style_corpus table - Route training files to cmp/ or cmpa/ subdirs based on appeal subtype - Fix derive_subtype to handle ARAR-YY-NNNN format (was matching year digit) - Expose practice_area/appeal_subtype params in MCP upload_training tool - Add appeal_subtype filter to analyze_style for per-type style analysis - Update betterment levy methodology in lessons.py: checklist (from generic to corpus-based), opening/closing strategies, and discussion rules Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -139,9 +139,16 @@ async def document_upload_training(
|
||||
appeal_subtype = pa.derive_subtype(decision_number, practice_area)
|
||||
pa.validate(practice_area, appeal_subtype)
|
||||
|
||||
# Copy to training directory (skip if already there)
|
||||
config.TRAINING_DIR.mkdir(parents=True, exist_ok=True)
|
||||
dest = config.TRAINING_DIR / source.name
|
||||
# Copy to training directory, organized by subtype
|
||||
_SUBTYPE_DIRS = {
|
||||
"betterment_levy": "cmpa",
|
||||
"compensation_197": "cmpa",
|
||||
"building_permit": "cmp",
|
||||
}
|
||||
subdir = _SUBTYPE_DIRS.get(appeal_subtype, "")
|
||||
training_dest = config.TRAINING_DIR / subdir if subdir else config.TRAINING_DIR
|
||||
training_dest.mkdir(parents=True, exist_ok=True)
|
||||
dest = training_dest / source.name
|
||||
if source.resolve() != dest.resolve():
|
||||
shutil.copy2(str(source), str(dest))
|
||||
|
||||
@@ -174,11 +181,12 @@ async def document_upload_training(
|
||||
title=f"[קורפוס] {title}",
|
||||
file_path=str(dest),
|
||||
page_count=page_count,
|
||||
practice_area=practice_area,
|
||||
appeal_subtype=appeal_subtype,
|
||||
)
|
||||
doc_id = UUID(doc["id"])
|
||||
await db.update_document(doc_id, extracted_text=text, extraction_status="completed")
|
||||
await db.update_document(
|
||||
doc_id, extracted_text=text, extraction_status="completed",
|
||||
metadata={"practice_area": practice_area, "appeal_subtype": appeal_subtype},
|
||||
)
|
||||
|
||||
# Generate embeddings and store chunks
|
||||
texts = [c.content for c in chunks]
|
||||
@@ -193,10 +201,7 @@ async def document_upload_training(
|
||||
}
|
||||
for c, emb in zip(chunks, embs)
|
||||
]
|
||||
await db.store_chunks(
|
||||
doc_id, None, chunk_dicts,
|
||||
practice_area=practice_area, appeal_subtype=appeal_subtype,
|
||||
)
|
||||
await db.store_chunks(doc_id, None, chunk_dicts)
|
||||
|
||||
return json.dumps({
|
||||
"corpus_id": str(corpus_id),
|
||||
|
||||
@@ -454,11 +454,16 @@ async def save_block_content(case_number: str, block_id: str, content: str) -> s
|
||||
return str(e)
|
||||
|
||||
|
||||
async def analyze_style() -> str:
|
||||
"""הרצת ניתוח סגנון על קורפוס ההחלטות של דפנה. מחלץ דפוסי כתיבה ושומר אותם."""
|
||||
async def analyze_style(appeal_subtype: str = "") -> str:
|
||||
"""הרצת ניתוח סגנון על קורפוס ההחלטות של דפנה. מחלץ דפוסי כתיבה ושומר אותם.
|
||||
|
||||
Args:
|
||||
appeal_subtype: סינון לפי סוג ערר (building_permit / betterment_levy / compensation_197).
|
||||
ריק = כל ההחלטות.
|
||||
"""
|
||||
from legal_mcp.services.style_analyzer import analyze_corpus
|
||||
|
||||
result = await analyze_corpus()
|
||||
result = await analyze_corpus(appeal_subtype)
|
||||
return json.dumps(result, ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user