From 92a2763b86f311f93eb12ed41bf7e6eee4684f29 Mon Sep 17 00:00:00 2001 From: Chaim Date: Mon, 4 May 2026 18:33:39 +0000 Subject: [PATCH] feat: add internal committee decisions corpus (source_kind='internal_committee') Three-layer separation: style learning (style_corpus), appeals-committee decisions (internal_committee), and court rulings (external_upload). - SCHEMA_V10: chair_name + district columns on case_law and cases, partial indexes - create_internal_committee_decision() DB upsert function - search_precedent_library_semantic() now accepts source_kind/district/chair_name params - search_precedent_library_hybrid() passes through new params - services/internal_decisions.py: ingest_internal_decision, migrate_from_style_corpus, migrate_from_external_corpus (identifies rows via source_type='appeals_committee') - search_internal_decisions() MCP tool (server.py + tools/search.py) - internal_decision_migrate() MCP admin tool - Web endpoints: POST /api/internal-decisions/upload, POST /api/internal-decisions/migrate, GET /api/internal-decisions - ingest_final_version auto-ingests finalized decisions into internal corpus - SKILL.md updated: agents now search internal + external in parallel, present separately Co-Authored-By: Claude Opus 4.7 (1M context) --- mcp-server/src/legal_mcp/server.py | 51 +++ mcp-server/src/legal_mcp/services/db.py | 120 ++++++- .../src/legal_mcp/services/hybrid_search.py | 12 +- .../legal_mcp/services/internal_decisions.py | 301 ++++++++++++++++++ mcp-server/src/legal_mcp/tools/search.py | 60 ++++ mcp-server/src/legal_mcp/tools/workflow.py | 24 +- skills/decision/SKILL.md | 23 +- web/app.py | 142 +++++++++ 8 files changed, 718 insertions(+), 15 deletions(-) create mode 100644 mcp-server/src/legal_mcp/services/internal_decisions.py diff --git a/mcp-server/src/legal_mcp/server.py b/mcp-server/src/legal_mcp/server.py index 998bcc4..626c659 100644 --- a/mcp-server/src/legal_mcp/server.py +++ b/mcp-server/src/legal_mcp/server.py @@ -390,6 +390,35 @@ async def find_similar_cases( ) +@mcp.tool() +async def search_internal_decisions( + query: str, + practice_area: str = "", + appeal_subtype: str = "", + district: str = "", + chair_name: str = "", + limit: int = 10, + include_halachot: bool = True, +) -> str: + """חיפוש בהחלטות ועדות ערר לתכנון ובנייה (כל המחוזות). + + מחזיר החלטות מהקורפוס הפנימי של ועדות הערר — נפרד מפסיקת בתי המשפט. + השתמש בו במקביל ל-search_precedent_library להצגת שתי שכבות נפרדות. + + Args: + query: שאילתת חיפוש בעברית + practice_area: rishuy_uvniya / betterment_levy / compensation_197 + appeal_subtype: סינון לפי תת-סוג ערר + district: מחוז — ירושלים / מרכז / תל אביב / צפון / דרום / ארצי. ריק = כל המחוזות + chair_name: שם יו"ר הוועדה לסינון. ריק = כל היו"רים + limit: מספר תוצאות מקסימלי + include_halachot: האם לכלול הלכות שחולצו + """ + return await search.search_internal_decisions( + query, practice_area, appeal_subtype, district, chair_name, limit, include_halachot, + ) + + # Drafting @mcp.tool() async def get_style_guide() -> str: @@ -573,6 +602,28 @@ async def ingest_final_version( return await workflow.ingest_final_version(case_number, file_path, final_text) +@mcp.tool() +async def internal_decision_migrate( + source: str = "both", + dry_run: bool = True, +) -> str: + """העברת החלטות ועדת ערר קיימות לקורפוס הפנימי (פעולת admin). + + source: 'style_corpus' | 'external_corpus' | 'both' + dry_run: אם true — מציג מה יקרה ללא כתיבה + """ + import json as _json + from legal_mcp.services import internal_decisions as int_svc + if source not in {"style_corpus", "external_corpus", "both"}: + return "source חייב להיות style_corpus / external_corpus / both" + results: dict = {} + if source in {"style_corpus", "both"}: + results["style_corpus"] = await int_svc.migrate_from_style_corpus(dry_run=dry_run) + if source in {"external_corpus", "both"}: + results["external_corpus"] = await int_svc.migrate_from_external_corpus(dry_run=dry_run) + return _json.dumps(results, ensure_ascii=False, indent=2) + + @mcp.tool() async def record_chair_feedback( case_number: str, diff --git a/mcp-server/src/legal_mcp/services/db.py b/mcp-server/src/legal_mcp/services/db.py index 4d8acde..e98e103 100644 --- a/mcp-server/src/legal_mcp/services/db.py +++ b/mcp-server/src/legal_mcp/services/db.py @@ -691,6 +691,16 @@ CREATE INDEX IF NOT EXISTS idx_prec_img_emb_case_law """ +SCHEMA_V10_SQL = """ +ALTER TABLE case_law ADD COLUMN IF NOT EXISTS chair_name TEXT DEFAULT ''; +ALTER TABLE case_law ADD COLUMN IF NOT EXISTS district TEXT DEFAULT ''; +ALTER TABLE cases ADD COLUMN IF NOT EXISTS chair_name TEXT DEFAULT ''; +CREATE INDEX IF NOT EXISTS idx_case_law_source_kind ON case_law(source_kind); +CREATE INDEX IF NOT EXISTS idx_case_law_chair ON case_law(chair_name) WHERE chair_name <> ''; +CREATE INDEX IF NOT EXISTS idx_case_law_district ON case_law(district) WHERE district <> ''; +""" + + async def _run_schema_migrations(pool: asyncpg.Pool) -> None: async with pool.acquire() as conn: await conn.execute(SCHEMA_SQL) @@ -703,7 +713,8 @@ async def _run_schema_migrations(pool: asyncpg.Pool) -> None: await conn.execute(SCHEMA_V7_SQL) await conn.execute(SCHEMA_V8_SQL) await conn.execute(SCHEMA_V9_SQL) - logger.info("Database schema initialized (v1-v9)") + await conn.execute(SCHEMA_V10_SQL) + logger.info("Database schema initialized (v1-v10)") async def init_schema() -> None: @@ -1817,6 +1828,85 @@ async def create_external_case_law( return _row_to_case_law(row) +async def create_internal_committee_decision( + case_number: str, + case_name: str, + full_text: str, + court: str = "", + decision_date: date | None = None, + chair_name: str = "", + district: str = "", + practice_area: str = "", + appeal_subtype: str = "", + subject_tags: list[str] | None = None, + summary: str = "", + is_binding: bool = True, + document_id: UUID | None = None, +) -> dict: + """Upsert an appeals-committee decision as source_kind='internal_committee'. + + If a row with this case_number already exists as cited_only, promotes it. + Idempotent: calling again updates metadata in-place. + """ + pool = await get_pool() + tags_json = json.dumps(subject_tags or [], ensure_ascii=False) + async with pool.acquire() as conn: + existing = await conn.fetchrow( + "SELECT id FROM case_law WHERE case_number = $1", + case_number, + ) + if existing: + row = await conn.fetchrow( + """ + UPDATE case_law SET + case_name = $2, + court = COALESCE(NULLIF($3, ''), court), + date = COALESCE($4, date), + chair_name = COALESCE(NULLIF($5, ''), chair_name), + district = COALESCE(NULLIF($6, ''), district), + practice_area = $7, + appeal_subtype = $8, + subject_tags = $9, + summary = COALESCE(NULLIF($10, ''), summary), + full_text = $11, + source_type = 'appeals_committee', + source_kind = 'internal_committee', + is_binding = $12, + document_id = COALESCE($13, document_id), + extraction_status = 'processing', + halacha_extraction_status = 'pending' + WHERE id = $1 + RETURNING * + """, + existing["id"], case_name, court, decision_date, + chair_name, district, practice_area, appeal_subtype, + tags_json, summary, full_text, is_binding, document_id, + ) + else: + row = await conn.fetchrow( + """ + INSERT INTO case_law ( + case_number, case_name, court, date, chair_name, district, + subject_tags, summary, full_text, + source_kind, source_type, document_id, + extraction_status, halacha_extraction_status, + practice_area, appeal_subtype, is_binding + ) VALUES ( + $1, $2, $3, $4, $5, $6, + $7, $8, $9, + 'internal_committee', 'appeals_committee', $10, + 'processing', 'pending', + $11, $12, $13 + ) + RETURNING * + """, + case_number, case_name, court, decision_date, chair_name, district, + tags_json, summary, full_text, + document_id, practice_area, appeal_subtype, is_binding, + ) + return _row_to_case_law(row) + + async def update_case_law(case_law_id: UUID, **fields) -> dict | None: """Patch metadata fields on a case_law row. @@ -2194,8 +2284,14 @@ async def search_precedent_library_semantic( subject_tag: str = "", limit: int = 10, include_halachot: bool = True, + source_kind: str = "external_upload", + district: str = "", + chair_name: str = "", ) -> list[dict]: - """Semantic search over chair-uploaded precedents. + """Semantic search over precedents filtered by source_kind. + + source_kind='external_upload' → court rulings (default) + source_kind='internal_committee' → appeals-committee decisions Returns merged halachot + chunks. Halachot are pre-distilled rules, so they get a small score boost. Only ``approved`` / ``published`` halachot @@ -2204,7 +2300,7 @@ async def search_precedent_library_semantic( """ pool = await get_pool() halacha_filters = ["h.review_status IN ('approved', 'published')"] - chunk_filters = ["cl.source_kind = 'external_upload'"] + chunk_filters = [f"cl.source_kind = '{source_kind}'"] h_params: list = [query_embedding, limit] c_params: list = [query_embedding, limit] h_idx = 3 @@ -2249,13 +2345,27 @@ async def search_precedent_library_semantic( halacha_filters.append(f"${h_idx} = ANY(h.subject_tags)") h_params.append(subject_tag) h_idx += 1 + if district: + halacha_filters.append(f"cl.district = ${h_idx}") + h_params.append(district) + h_idx += 1 + chunk_filters.append(f"cl.district = ${c_idx}") + c_params.append(district) + c_idx += 1 + if chair_name: + halacha_filters.append(f"cl.chair_name = ${h_idx}") + h_params.append(chair_name) + h_idx += 1 + chunk_filters.append(f"cl.chair_name = ${c_idx}") + c_params.append(chair_name) + c_idx += 1 halacha_sql = f""" SELECT h.id AS halacha_id, h.case_law_id, h.rule_statement, h.reasoning_summary, h.supporting_quote, h.page_reference, h.practice_areas, h.subject_tags, h.confidence, h.rule_type, cl.case_number, cl.case_name, cl.court, cl.date AS decision_date, - cl.precedent_level, + cl.precedent_level, cl.chair_name, cl.district, 1 - (h.embedding <=> $1) AS score FROM halachot h JOIN case_law cl ON cl.id = h.case_law_id @@ -2269,7 +2379,7 @@ async def search_precedent_library_semantic( SELECT pc.id AS chunk_id, pc.case_law_id, pc.content, pc.section_type, pc.page_number, cl.case_number, cl.case_name, cl.court, cl.date AS decision_date, - cl.precedent_level, cl.practice_area, + cl.precedent_level, cl.practice_area, cl.chair_name, cl.district, 1 - (pc.embedding <=> $1) AS score FROM precedent_chunks pc JOIN case_law cl ON cl.id = pc.case_law_id diff --git a/mcp-server/src/legal_mcp/services/hybrid_search.py b/mcp-server/src/legal_mcp/services/hybrid_search.py index 494e66e..4a11ede 100644 --- a/mcp-server/src/legal_mcp/services/hybrid_search.py +++ b/mcp-server/src/legal_mcp/services/hybrid_search.py @@ -88,8 +88,15 @@ async def search_precedent_library_hybrid( is_binding: bool | None = None, subject_tag: str = "", include_halachot: bool = True, + source_kind: str = "external_upload", + district: str = "", + chair_name: str = "", ) -> list[dict]: - """Hybrid wrapper for precedent-library search.""" + """Hybrid wrapper for precedent-library search. + + source_kind='external_upload' → court rulings (default) + source_kind='internal_committee' → appeals-committee decisions + """ fetch_k = max(limit, config.VOYAGE_RERANK_FETCH_K) if config.MULTIMODAL_ENABLED else limit async def _base(limit: int) -> list[dict]: @@ -103,6 +110,9 @@ async def search_precedent_library_hybrid( subject_tag=subject_tag, limit=limit, include_halachot=include_halachot, + source_kind=source_kind, + district=district, + chair_name=chair_name, ) text_results = await rerank.maybe_rerank( diff --git a/mcp-server/src/legal_mcp/services/internal_decisions.py b/mcp-server/src/legal_mcp/services/internal_decisions.py new file mode 100644 index 0000000..98b4032 --- /dev/null +++ b/mcp-server/src/legal_mcp/services/internal_decisions.py @@ -0,0 +1,301 @@ +"""Orchestrator for the Internal Committee Decisions corpus. + +Ingest pipeline: + text/file → INSERT case_law (source_kind='internal_committee') + → chunk → embed → store precedent_chunks + → queue halacha extraction + +Migration helpers: + migrate_from_style_corpus() — re-index style_corpus entries as searchable + migrate_from_external_corpus() — reclassify external appeals-committee rows + +All ועדות ערר (any district) belong here. +Judicial decisions (Supreme Court, Administrative Court) stay in external_upload. +""" + +from __future__ import annotations + +import logging +import re +import shutil +from datetime import date +from pathlib import Path +from uuid import UUID, uuid4 + +from legal_mcp import config +from legal_mcp.services import chunker, db, embeddings, extractor + +logger = logging.getLogger(__name__) + +INTERNAL_DECISIONS_DIR = Path(config.DATA_DIR) / "internal-decisions" + +_VALID_DISTRICTS = {"", "ירושלים", "מרכז", "תל אביב", "צפון", "דרום", "ארצי"} + +_COURT_TO_DISTRICT = [ + ("ירושלים", "ירושלים"), + ("תל אביב", "תל אביב"), + ('ת"א', "תל אביב"), + ("מרכז", "מרכז"), + ("חיפה", "צפון"), + ("צפון", "צפון"), + ("דרום", "דרום"), + ("ארצי", "ארצי"), + ("ארצית", "ארצי"), +] + + +def _coerce_date(value) -> date | None: + if value is None or value == "": + return None + if isinstance(value, date): + return value + if isinstance(value, str): + try: + return date.fromisoformat(value[:10]) + except ValueError: + return None + return None + + +def _safe_filename(name: str) -> str: + base = Path(name).name + return re.sub(r"[^\w.\-+א-ת ]", "_", base) or f"internal-{uuid4().hex[:8]}" + + +def _district_from_court(court: str) -> str: + for keyword, district in _COURT_TO_DISTRICT: + if keyword in court: + return district + return "" + + +async def ingest_internal_decision( + *, + case_number: str, + case_name: str = "", + court: str = "", + decision_date=None, + chair_name: str = "", + district: str = "", + practice_area: str = "", + appeal_subtype: str = "", + subject_tags: list[str] | None = None, + summary: str = "", + is_binding: bool = True, + file_path: str | Path | None = None, + text: str | None = None, + document_id: UUID | None = None, +) -> dict: + """Ingest an appeals-committee decision into the internal corpus. + + Either file_path or text must be provided. + If district is empty, it is inferred from court. + Returns: {"status": "completed", "case_law_id": "...", "chunks": N} + """ + if not file_path and not text: + raise ValueError("either file_path or text is required") + if not case_number.strip(): + raise ValueError("case_number is required") + + resolved_district = district.strip() or _district_from_court(court) + + if file_path: + src = Path(file_path) + if not src.is_file(): + raise FileNotFoundError(f"file not found: {src}") + dest_dir = INTERNAL_DECISIONS_DIR / (resolved_district or "other") + dest_dir.mkdir(parents=True, exist_ok=True) + staged = dest_dir / f"{uuid4().hex[:8]}_{_safe_filename(src.name)}" + shutil.copy2(src, staged) + raw_text, page_count, page_offsets = await extractor.extract_text(str(staged)) + raw_text = extractor.strip_nevo_preamble(raw_text or "").strip() + if not raw_text: + raise ValueError("no extractable text in file") + else: + raw_text = (text or "").strip() + if not raw_text: + raise ValueError("text is empty") + page_count = 0 + page_offsets = None + + record = await db.create_internal_committee_decision( + case_number=case_number.strip(), + case_name=(case_name.strip() or case_number.strip()), + full_text=raw_text, + court=court.strip(), + decision_date=_coerce_date(decision_date), + chair_name=chair_name.strip(), + district=resolved_district, + practice_area=practice_area, + appeal_subtype=appeal_subtype.strip(), + subject_tags=list(subject_tags or []), + summary=summary.strip(), + is_binding=is_binding, + document_id=document_id, + ) + case_law_id = UUID(str(record["id"])) + + try: + chunks = chunker.chunk_document(raw_text, page_offsets=page_offsets) + if not chunks: + await db.set_case_law_extraction_status(case_law_id, "completed") + await db.set_case_law_halacha_status(case_law_id, "completed") + return {"status": "completed", "case_law_id": str(case_law_id), "chunks": 0} + + chunk_texts = [c.content for c in chunks] + chunk_vectors = await embeddings.embed_texts(chunk_texts, input_type="document") + chunk_dicts = [ + { + "chunk_index": c.chunk_index, + "content": c.content, + "section_type": c.section_type, + "page_number": c.page_number, + "embedding": v, + } + for c, v in zip(chunks, chunk_vectors) + ] + stored = await db.store_precedent_chunks(case_law_id, chunk_dicts) + + await db.set_case_law_extraction_status(case_law_id, "completed") + await db.set_case_law_halacha_status(case_law_id, "pending") + await db.request_halacha_extraction(case_law_id) + + return { + "status": "completed", + "case_law_id": str(case_law_id), + "chunks": stored, + "halachot_pending": True, + } + + except Exception: + logger.exception("ingest_internal_decision failed for %s", case_number) + await db.set_case_law_extraction_status(case_law_id, "failed") + raise + + +async def migrate_from_style_corpus(dry_run: bool = False) -> dict: + """Re-index all style_corpus entries as searchable internal committee decisions. + + Does NOT delete style_corpus rows — they remain for style analysis. + Skips entries that already exist in case_law as internal_committee. + """ + pool = await db.get_pool() + async with pool.acquire() as conn: + rows = await conn.fetch( + """SELECT decision_number, decision_date, full_text, + practice_area, appeal_subtype, subject_categories + FROM style_corpus + ORDER BY decision_date NULLS LAST""" + ) + + results = {"total": len(rows), "ingested": 0, "skipped": 0, "failed": 0, "dry_run": dry_run} + + for row in rows: + case_number = (row["decision_number"] or "").strip() + if not case_number: + results["skipped"] += 1 + continue + + if not dry_run: + existing = await pool.fetchval( + "SELECT id FROM case_law WHERE case_number = $1 AND source_kind = 'internal_committee'", + case_number, + ) + if existing: + results["skipped"] += 1 + continue + + if dry_run: + results["ingested"] += 1 + continue + + try: + subject_tags = list(row["subject_categories"] or []) + await ingest_internal_decision( + case_number=case_number, + court="ועדת הערר לתכנון ובנייה — מחוז ירושלים", + decision_date=row["decision_date"], + chair_name="דפנה תמיר", + district="ירושלים", + practice_area=row["practice_area"] or "", + appeal_subtype=row["appeal_subtype"] or "", + subject_tags=subject_tags, + text=row["full_text"], + ) + results["ingested"] += 1 + logger.info("Migrated style_corpus entry: %s", case_number) + except Exception as e: + logger.error("Failed to migrate %s: %s", case_number, e) + results["failed"] += 1 + + return results + + +async def migrate_from_external_corpus(dry_run: bool = False) -> dict: + """Reclassify external appeals-committee decisions to source_kind='internal_committee'. + + Identifies rows by source_type='appeals_committee' and updates source_kind + district. + Existing precedent_chunks remain — no re-embedding needed. + """ + pool = await db.get_pool() + async with pool.acquire() as conn: + rows = await conn.fetch( + """SELECT id, case_number, court + FROM case_law + WHERE source_kind = 'external_upload' + AND source_type = 'appeals_committee'""" + ) + + results = {"total": len(rows), "updated": 0, "dry_run": dry_run} + + if dry_run: + results["updated"] = len(rows) + results["preview"] = [ + {"case_number": r["case_number"], "court": r["court"], "district": _district_from_court(r["court"] or "")} + for r in rows + ] + return results + + async with pool.acquire() as conn: + for row in rows: + district = _district_from_court(row["court"] or "") + await conn.execute( + """UPDATE case_law + SET source_kind = 'internal_committee', + district = CASE WHEN $2 <> '' THEN $2 ELSE district END + WHERE id = $1""", + row["id"], district, + ) + results["updated"] = len(rows) + + logger.info("Migrated %d external appeals-committee rows to internal_committee", len(rows)) + return results + + +async def search_internal( + query: str, + *, + practice_area: str = "", + appeal_subtype: str = "", + district: str = "", + chair_name: str = "", + limit: int = 10, + include_halachot: bool = True, +) -> list[dict]: + """Semantic search over internal committee decisions.""" + from legal_mcp.services import hybrid_search + + if not query.strip(): + return [] + query_vec = await embeddings.embed_query(query) + return await hybrid_search.search_precedent_library_hybrid( + query=query, + query_text_embedding=query_vec, + limit=limit, + practice_area=practice_area, + appeal_subtype=appeal_subtype, + include_halachot=include_halachot, + source_kind="internal_committee", + district=district, + chair_name=chair_name, + ) diff --git a/mcp-server/src/legal_mcp/tools/search.py b/mcp-server/src/legal_mcp/tools/search.py index 0805884..fc0d3dc 100644 --- a/mcp-server/src/legal_mcp/tools/search.py +++ b/mcp-server/src/legal_mcp/tools/search.py @@ -179,3 +179,63 @@ async def find_similar_cases( }) return json.dumps(formatted, ensure_ascii=False, indent=2) + + +async def search_internal_decisions( + query: str, + practice_area: str = "", + appeal_subtype: str = "", + district: str = "", + chair_name: str = "", + limit: int = 10, + include_halachot: bool = True, +) -> str: + """חיפוש בהחלטות ועדות ערר לתכנון ובנייה (כל המחוזות). + + Args: + query: שאילתת חיפוש בעברית + practice_area: rishuy_uvniya / betterment_levy / compensation_197 + appeal_subtype: סינון לפי תת-סוג ערר + district: מחוז — ירושלים / מרכז / תל אביב / צפון / דרום / ארצי. ריק = כל המחוזות + chair_name: שם יו"ר הוועדה לסינון. ריק = כל היו"רים + limit: מספר תוצאות מקסימלי + include_halachot: האם לכלול הלכות שחולצו + """ + from legal_mcp.services import internal_decisions as int_svc + + results = await int_svc.search_internal( + query, + practice_area=practice_area, + appeal_subtype=appeal_subtype, + district=district, + chair_name=chair_name, + limit=limit, + include_halachot=include_halachot, + ) + + if not results: + return "לא נמצאו החלטות ועדת ערר רלוונטיות." + + formatted = [] + for r in results: + entry = { + "score": round(float(r["score"]), 4), + "type": r.get("type", "passage"), + "case_number": r.get("case_number"), + "case_name": r.get("case_name"), + "court": r.get("court"), + "district": r.get("district"), + "chair_name": r.get("chair_name"), + "decision_date": r.get("decision_date"), + } + if r.get("type") == "halacha": + entry["rule"] = r.get("rule_statement") + entry["quote"] = r.get("supporting_quote") + entry["rule_type"] = r.get("rule_type") + else: + entry["content"] = r.get("content", "") + entry["section"] = r.get("section_type") + entry["page"] = r.get("page_number") + formatted.append(entry) + + return json.dumps(formatted, ensure_ascii=False, indent=2) diff --git a/mcp-server/src/legal_mcp/tools/workflow.py b/mcp-server/src/legal_mcp/tools/workflow.py index d2c7e6d..e8f2e29 100644 --- a/mcp-server/src/legal_mcp/tools/workflow.py +++ b/mcp-server/src/legal_mcp/tools/workflow.py @@ -3,10 +3,13 @@ from __future__ import annotations import json +import logging from uuid import UUID from legal_mcp.services import db +logger = logging.getLogger(__name__) + async def workflow_status(case_number: str) -> str: """סטטוס תהליך עבודה מלא לתיק - מסמכים, עיבוד, טיוטות. @@ -315,10 +318,29 @@ async def ingest_final_version( try: result = await learning_loop.process_final_version(case_id, final_text) - return json.dumps(result, default=str, ensure_ascii=False, indent=2) except ValueError as e: return json.dumps({"status": "error", "message": str(e)}, ensure_ascii=False, indent=2) + # Auto-ingest into internal committee decisions corpus (best-effort). + try: + from legal_mcp.services import internal_decisions as int_svc + await int_svc.ingest_internal_decision( + case_number=case_number, + case_name=case.get("title", ""), + decision_date=case.get("decision_date"), + chair_name=case.get("chair_name", ""), + district="ירושלים", + practice_area=case.get("practice_area", ""), + appeal_subtype=case.get("appeal_subtype", ""), + text=final_text, + ) + result["internal_corpus_ingested"] = True + except Exception as e: + logger.warning("ingest_final_version: internal corpus ingestion failed (non-fatal): %s", e) + result["internal_corpus_ingested"] = False + + return json.dumps(result, default=str, ensure_ascii=False, indent=2) + # ── Chair feedback tools ────────────────────────────────────────── diff --git a/skills/decision/SKILL.md b/skills/decision/SKILL.md index 063d713..95c055d 100644 --- a/skills/decision/SKILL.md +++ b/skills/decision/SKILL.md @@ -293,18 +293,25 @@ description: This skill should be used when writing legal decisions (החלטו ### 7.5 שלושה מקורות פסיקה — אל תבלבל -המערכת מפרידה בין שלושה קורפוסי פסיקה. כל אחד מהם משמש למטרה אחרת ויש כלי MCP נפרד לחיפוש בו: +המערכת מפרידה בין **ארבעה** קורפוסי פסיקה. כל אחד מהם משמש למטרה אחרת ויש כלי MCP נפרד לחיפוש בו: | קורפוס | טבלה | כלי חיפוש | תפקיד | |---|---|---|---| -| תקדימי דפנה (סגנון) | `style_corpus` + `paragraph_embeddings` | `search_decisions` | החלטות שדפנה עצמה כתבה. מקור לסגנון, ניסוחים, ג'וריספרודנציה אישית. | -| ספריית הפסיקה הסמכותית | `case_law` (`source_kind='external_upload'`) + `halachot` | `search_precedent_library` | פסיקה חיצונית מחייבת — עליון, מנהלי, ועדות ערר אחרות — עם הלכות שאושרו ע"י דפנה. **המקור היחיד לציטוטים בבלוק י לפי CREAC.** | -| ציטוטים שצורפו ידנית | `case_precedents` | `precedent_search_library` | quotes שדפנה צירפה לתיק ספציפי בעבר. דומה לקורפוס סמכותי אך פר-תיק, ידני, לא עוברת חילוץ הלכות. | +| לימוד סגנון | `style_corpus` | (לא לחיפוש תוכן) | ממשק /training — ניתוח "הקול" של היו"ר: טון, ביטויי מעבר, מבנה פסקאות. **אין לחפש כאן תוכן משפטי.** | +| החלטות ועדות ערר | `case_law` (`source_kind='internal_committee'`) + `halachot` | `search_internal_decisions` | **כל** ועדות הערר לתכנון ובנייה (כל המחוזות). מסונן לפי `district` ו-`chair_name`. מקור לעקביות פנימית ופרקטיקה ארצית. | +| פסיקת בתי משפט | `case_law` (`source_kind='external_upload'`) + `halachot` | `search_precedent_library` | בתי משפט: עליון, מנהלי, בג"ץ. **המקור היחיד לציטוטים מחייבים בבלוק י לפי CREAC.** | +| ציטוטים ידניים | `case_precedents` | `precedent_search_library` | quotes שצורפו לתיק ספציפי בעבר. פר-תיק, ידני. | -**הזרימה הסטנדרטית בבלוק י:** -1. `search_decisions` קודם — בדוק אם דפנה כבר הכריעה בסוגיה דומה (חיסכון דוקטרינרי / הבחנה). -2. `search_precedent_library` — חפש את הכלל המחייב והציטוט התומך לפסקת CREAC. -3. אם הצדדים הפנו לפסיקה שלא בקורפוס — דפנה מעלה אותה דרך `/precedents` ב-UI; חילוץ ההלכות אוטומטי וההלכות מחכות לאישורה. +**הזרימה הסטנדרטית בבלוק י — חפש במקביל:** +1. `search_internal_decisions(district="ירושלים")` — האם ועדת ערר ירושלים הכריעה בסוגיה? (עקביות פנימית) + - אם יש תוצאה רלוונטית: הצג תחת **"החלטות ועדת ערר ירושלים"** והתייחס לה בניתוח. +2. `search_internal_decisions()` (ריק = כל המחוזות) — פרקטיקה ארצית של ועדות אחרות. + - הצג תחת **"החלטות ועדות ערר אחרות"** — כמשל/השוואה, לא כמחייב. +3. `search_precedent_library` — כלל מחייב מבית משפט לפסקת CREAC. + - הצג תחת **"פסיקת בתי משפט"** — זה המקור לציטוט מחייב. +4. אם הצדדים הפנו לפסיקה שלא בקורפוס — דפנה מעלה אותה דרך `/precedents` ב-UI. + +**חשוב:** החלטות ועדת ערר הן פרקטיקה, לא מחייב. ציטוט מחייב בבלוק י מגיע רק מ-`search_precedent_library`. **איסור על המצאת ציטוטים** — ציטוט פסיקה חייב להגיע מאחד מהקורפוסים. אם אין הלכה מאושרת תומכת בנקודה — אל תמציא; ציין שהנושא דורש הוספת פסיקה לקורפוס. diff --git a/web/app.py b/web/app.py index 8e6a6f8..6d5c5ab 100644 --- a/web/app.py +++ b/web/app.py @@ -4395,6 +4395,148 @@ async def precedent_queue_pending(kind: str = "metadata", limit: int = 20): return {"items": items, "count": len(items)} +from legal_mcp.services import internal_decisions as int_decisions_service # noqa: E402 + + +@app.post("/api/internal-decisions/upload") +async def internal_decisions_upload( + file: UploadFile = File(...), + case_number: str = Form(...), + case_name: str = Form(""), + court: str = Form(""), + decision_date: str = Form(""), + chair_name: str = Form(""), + district: str = Form(""), + practice_area: str = Form(""), + appeal_subtype: str = Form(""), + subject_tags: str = Form("[]"), + is_binding: bool = Form(True), + summary: str = Form(""), +): + """Upload a planning appeals-committee decision to the internal corpus.""" + if practice_area and practice_area not in _PRACTICE_AREAS: + raise HTTPException(400, "practice_area לא תקין") + if not case_number.strip(): + raise HTTPException(400, "case_number חובה") + + suffix = Path(file.filename or "").suffix.lower() + if suffix not in ALLOWED_EXTENSIONS: + raise HTTPException(400, f"סוג קובץ לא נתמך: {suffix}") + + UPLOAD_DIR.mkdir(parents=True, exist_ok=True) + staged = UPLOAD_DIR / f"int_{uuid4().hex[:8]}_{file.filename}" + size = 0 + with staged.open("wb") as out: + while chunk := await file.read(1024 * 1024): + size += len(chunk) + if size > MAX_FILE_SIZE: + staged.unlink(missing_ok=True) + raise HTTPException(413, "קובץ גדול מדי") + out.write(chunk) + + try: + tags = json.loads(subject_tags) if subject_tags else [] + if not isinstance(tags, list): + tags = [] + except json.JSONDecodeError: + tags = [] + + task_id = str(uuid4()) + await _progress.set(task_id, { + "status": "queued", "filename": file.filename or "", + "stage": "queued", "percent": 0, + }) + + async def _run(): + try: + await int_decisions_service.ingest_internal_decision( + case_number=case_number.strip(), + case_name=case_name.strip(), + court=court.strip(), + decision_date=decision_date or None, + chair_name=chair_name.strip(), + district=district.strip(), + practice_area=practice_area, + appeal_subtype=appeal_subtype.strip(), + subject_tags=tags, + is_binding=is_binding, + summary=summary.strip(), + file_path=staged, + ) + await _progress.set(task_id, {"status": "completed", "percent": 100}) + except Exception as e: + logger.exception("internal-decisions upload failed") + await _progress.set(task_id, {"status": "failed", "error": str(e)}) + finally: + staged.unlink(missing_ok=True) + + asyncio.create_task(_run()) + return {"task_id": task_id} + + +@app.post("/api/internal-decisions/migrate") +async def internal_decisions_migrate( + source: str = "both", + dry_run: bool = True, +): + """Migrate existing data to the internal committee corpus. + + source: 'style_corpus' | 'external_corpus' | 'both' + dry_run: if true, only report what would be done (no writes) + """ + if source not in {"style_corpus", "external_corpus", "both"}: + raise HTTPException(400, "source חייב להיות style_corpus / external_corpus / both") + + results: dict = {} + if source in {"style_corpus", "both"}: + results["style_corpus"] = await int_decisions_service.migrate_from_style_corpus(dry_run=dry_run) + if source in {"external_corpus", "both"}: + results["external_corpus"] = await int_decisions_service.migrate_from_external_corpus(dry_run=dry_run) + return results + + +@app.get("/api/internal-decisions") +async def internal_decisions_list( + district: str = "", + chair_name: str = "", + practice_area: str = "", + limit: int = 100, +): + """List internal committee decisions with optional filters.""" + pool = await db.get_pool() + async with pool.acquire() as conn: + conditions = ["source_kind = 'internal_committee'"] + params: list = [] + idx = 1 + if district: + conditions.append(f"district = ${idx}") + params.append(district) + idx += 1 + if chair_name: + conditions.append(f"chair_name = ${idx}") + params.append(chair_name) + idx += 1 + if practice_area: + conditions.append(f"practice_area = ${idx}") + params.append(practice_area) + idx += 1 + params.append(limit) + where = " AND ".join(conditions) + rows = await conn.fetch( + f"SELECT id, case_number, case_name, court, district, chair_name, " + f"date, practice_area, appeal_subtype, extraction_status, halacha_extraction_status " + f"FROM case_law WHERE {where} ORDER BY date DESC NULLS LAST LIMIT ${idx}", + *params, + ) + total = await conn.fetchval( + "SELECT COUNT(*) FROM case_law WHERE source_kind = 'internal_committee'" + ) + return { + "total": total, + "items": [dict(r) for r in rows], + } + + @app.get("/api/halachot") async def halachot_list( case_law_id: str = "",