feat: add internal committee decisions corpus (source_kind='internal_committee')
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m31s

Three-layer separation: style learning (style_corpus), appeals-committee decisions
(internal_committee), and court rulings (external_upload).

- SCHEMA_V10: chair_name + district columns on case_law and cases, partial indexes
- create_internal_committee_decision() DB upsert function
- search_precedent_library_semantic() now accepts source_kind/district/chair_name params
- search_precedent_library_hybrid() passes through new params
- services/internal_decisions.py: ingest_internal_decision, migrate_from_style_corpus,
  migrate_from_external_corpus (identifies rows via source_type='appeals_committee')
- search_internal_decisions() MCP tool (server.py + tools/search.py)
- internal_decision_migrate() MCP admin tool
- Web endpoints: POST /api/internal-decisions/upload, POST /api/internal-decisions/migrate,
  GET /api/internal-decisions
- ingest_final_version auto-ingests finalized decisions into internal corpus
- SKILL.md updated: agents now search internal + external in parallel, present separately

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-04 18:33:39 +00:00
parent 1b14e04373
commit 92a2763b86
8 changed files with 718 additions and 15 deletions

View File

@@ -4395,6 +4395,148 @@ async def precedent_queue_pending(kind: str = "metadata", limit: int = 20):
return {"items": items, "count": len(items)}
from legal_mcp.services import internal_decisions as int_decisions_service # noqa: E402
@app.post("/api/internal-decisions/upload")
async def internal_decisions_upload(
file: UploadFile = File(...),
case_number: str = Form(...),
case_name: str = Form(""),
court: str = Form(""),
decision_date: str = Form(""),
chair_name: str = Form(""),
district: str = Form(""),
practice_area: str = Form(""),
appeal_subtype: str = Form(""),
subject_tags: str = Form("[]"),
is_binding: bool = Form(True),
summary: str = Form(""),
):
"""Upload a planning appeals-committee decision to the internal corpus."""
if practice_area and practice_area not in _PRACTICE_AREAS:
raise HTTPException(400, "practice_area לא תקין")
if not case_number.strip():
raise HTTPException(400, "case_number חובה")
suffix = Path(file.filename or "").suffix.lower()
if suffix not in ALLOWED_EXTENSIONS:
raise HTTPException(400, f"סוג קובץ לא נתמך: {suffix}")
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
staged = UPLOAD_DIR / f"int_{uuid4().hex[:8]}_{file.filename}"
size = 0
with staged.open("wb") as out:
while chunk := await file.read(1024 * 1024):
size += len(chunk)
if size > MAX_FILE_SIZE:
staged.unlink(missing_ok=True)
raise HTTPException(413, "קובץ גדול מדי")
out.write(chunk)
try:
tags = json.loads(subject_tags) if subject_tags else []
if not isinstance(tags, list):
tags = []
except json.JSONDecodeError:
tags = []
task_id = str(uuid4())
await _progress.set(task_id, {
"status": "queued", "filename": file.filename or "",
"stage": "queued", "percent": 0,
})
async def _run():
try:
await int_decisions_service.ingest_internal_decision(
case_number=case_number.strip(),
case_name=case_name.strip(),
court=court.strip(),
decision_date=decision_date or None,
chair_name=chair_name.strip(),
district=district.strip(),
practice_area=practice_area,
appeal_subtype=appeal_subtype.strip(),
subject_tags=tags,
is_binding=is_binding,
summary=summary.strip(),
file_path=staged,
)
await _progress.set(task_id, {"status": "completed", "percent": 100})
except Exception as e:
logger.exception("internal-decisions upload failed")
await _progress.set(task_id, {"status": "failed", "error": str(e)})
finally:
staged.unlink(missing_ok=True)
asyncio.create_task(_run())
return {"task_id": task_id}
@app.post("/api/internal-decisions/migrate")
async def internal_decisions_migrate(
source: str = "both",
dry_run: bool = True,
):
"""Migrate existing data to the internal committee corpus.
source: 'style_corpus' | 'external_corpus' | 'both'
dry_run: if true, only report what would be done (no writes)
"""
if source not in {"style_corpus", "external_corpus", "both"}:
raise HTTPException(400, "source חייב להיות style_corpus / external_corpus / both")
results: dict = {}
if source in {"style_corpus", "both"}:
results["style_corpus"] = await int_decisions_service.migrate_from_style_corpus(dry_run=dry_run)
if source in {"external_corpus", "both"}:
results["external_corpus"] = await int_decisions_service.migrate_from_external_corpus(dry_run=dry_run)
return results
@app.get("/api/internal-decisions")
async def internal_decisions_list(
district: str = "",
chair_name: str = "",
practice_area: str = "",
limit: int = 100,
):
"""List internal committee decisions with optional filters."""
pool = await db.get_pool()
async with pool.acquire() as conn:
conditions = ["source_kind = 'internal_committee'"]
params: list = []
idx = 1
if district:
conditions.append(f"district = ${idx}")
params.append(district)
idx += 1
if chair_name:
conditions.append(f"chair_name = ${idx}")
params.append(chair_name)
idx += 1
if practice_area:
conditions.append(f"practice_area = ${idx}")
params.append(practice_area)
idx += 1
params.append(limit)
where = " AND ".join(conditions)
rows = await conn.fetch(
f"SELECT id, case_number, case_name, court, district, chair_name, "
f"date, practice_area, appeal_subtype, extraction_status, halacha_extraction_status "
f"FROM case_law WHERE {where} ORDER BY date DESC NULLS LAST LIMIT ${idx}",
*params,
)
total = await conn.fetchval(
"SELECT COUNT(*) FROM case_law WHERE source_kind = 'internal_committee'"
)
return {
"total": total,
"items": [dict(r) for r in rows],
}
@app.get("/api/halachot")
async def halachot_list(
case_law_id: str = "",