feat: add internal committee decisions corpus (source_kind='internal_committee')
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m31s

Three-layer separation: style learning (style_corpus), appeals-committee decisions
(internal_committee), and court rulings (external_upload).

- SCHEMA_V10: chair_name + district columns on case_law and cases, partial indexes
- create_internal_committee_decision() DB upsert function
- search_precedent_library_semantic() now accepts source_kind/district/chair_name params
- search_precedent_library_hybrid() passes through new params
- services/internal_decisions.py: ingest_internal_decision, migrate_from_style_corpus,
  migrate_from_external_corpus (identifies rows via source_type='appeals_committee')
- search_internal_decisions() MCP tool (server.py + tools/search.py)
- internal_decision_migrate() MCP admin tool
- Web endpoints: POST /api/internal-decisions/upload, POST /api/internal-decisions/migrate,
  GET /api/internal-decisions
- ingest_final_version auto-ingests finalized decisions into internal corpus
- SKILL.md updated: agents now search internal + external in parallel, present separately

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-04 18:33:39 +00:00
parent 1b14e04373
commit 92a2763b86
8 changed files with 718 additions and 15 deletions

View File

@@ -390,6 +390,35 @@ async def find_similar_cases(
) )
@mcp.tool()
async def search_internal_decisions(
query: str,
practice_area: str = "",
appeal_subtype: str = "",
district: str = "",
chair_name: str = "",
limit: int = 10,
include_halachot: bool = True,
) -> str:
"""חיפוש בהחלטות ועדות ערר לתכנון ובנייה (כל המחוזות).
מחזיר החלטות מהקורפוס הפנימי של ועדות הערר — נפרד מפסיקת בתי המשפט.
השתמש בו במקביל ל-search_precedent_library להצגת שתי שכבות נפרדות.
Args:
query: שאילתת חיפוש בעברית
practice_area: rishuy_uvniya / betterment_levy / compensation_197
appeal_subtype: סינון לפי תת-סוג ערר
district: מחוז — ירושלים / מרכז / תל אביב / צפון / דרום / ארצי. ריק = כל המחוזות
chair_name: שם יו"ר הוועדה לסינון. ריק = כל היו"רים
limit: מספר תוצאות מקסימלי
include_halachot: האם לכלול הלכות שחולצו
"""
return await search.search_internal_decisions(
query, practice_area, appeal_subtype, district, chair_name, limit, include_halachot,
)
# Drafting # Drafting
@mcp.tool() @mcp.tool()
async def get_style_guide() -> str: async def get_style_guide() -> str:
@@ -573,6 +602,28 @@ async def ingest_final_version(
return await workflow.ingest_final_version(case_number, file_path, final_text) return await workflow.ingest_final_version(case_number, file_path, final_text)
@mcp.tool()
async def internal_decision_migrate(
source: str = "both",
dry_run: bool = True,
) -> str:
"""העברת החלטות ועדת ערר קיימות לקורפוס הפנימי (פעולת admin).
source: 'style_corpus' | 'external_corpus' | 'both'
dry_run: אם true — מציג מה יקרה ללא כתיבה
"""
import json as _json
from legal_mcp.services import internal_decisions as int_svc
if source not in {"style_corpus", "external_corpus", "both"}:
return "source חייב להיות style_corpus / external_corpus / both"
results: dict = {}
if source in {"style_corpus", "both"}:
results["style_corpus"] = await int_svc.migrate_from_style_corpus(dry_run=dry_run)
if source in {"external_corpus", "both"}:
results["external_corpus"] = await int_svc.migrate_from_external_corpus(dry_run=dry_run)
return _json.dumps(results, ensure_ascii=False, indent=2)
@mcp.tool() @mcp.tool()
async def record_chair_feedback( async def record_chair_feedback(
case_number: str, case_number: str,

View File

@@ -691,6 +691,16 @@ CREATE INDEX IF NOT EXISTS idx_prec_img_emb_case_law
""" """
SCHEMA_V10_SQL = """
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS chair_name TEXT DEFAULT '';
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS district TEXT DEFAULT '';
ALTER TABLE cases ADD COLUMN IF NOT EXISTS chair_name TEXT DEFAULT '';
CREATE INDEX IF NOT EXISTS idx_case_law_source_kind ON case_law(source_kind);
CREATE INDEX IF NOT EXISTS idx_case_law_chair ON case_law(chair_name) WHERE chair_name <> '';
CREATE INDEX IF NOT EXISTS idx_case_law_district ON case_law(district) WHERE district <> '';
"""
async def _run_schema_migrations(pool: asyncpg.Pool) -> None: async def _run_schema_migrations(pool: asyncpg.Pool) -> None:
async with pool.acquire() as conn: async with pool.acquire() as conn:
await conn.execute(SCHEMA_SQL) await conn.execute(SCHEMA_SQL)
@@ -703,7 +713,8 @@ async def _run_schema_migrations(pool: asyncpg.Pool) -> None:
await conn.execute(SCHEMA_V7_SQL) await conn.execute(SCHEMA_V7_SQL)
await conn.execute(SCHEMA_V8_SQL) await conn.execute(SCHEMA_V8_SQL)
await conn.execute(SCHEMA_V9_SQL) await conn.execute(SCHEMA_V9_SQL)
logger.info("Database schema initialized (v1-v9)") await conn.execute(SCHEMA_V10_SQL)
logger.info("Database schema initialized (v1-v10)")
async def init_schema() -> None: async def init_schema() -> None:
@@ -1817,6 +1828,85 @@ async def create_external_case_law(
return _row_to_case_law(row) return _row_to_case_law(row)
async def create_internal_committee_decision(
case_number: str,
case_name: str,
full_text: str,
court: str = "",
decision_date: date | None = None,
chair_name: str = "",
district: str = "",
practice_area: str = "",
appeal_subtype: str = "",
subject_tags: list[str] | None = None,
summary: str = "",
is_binding: bool = True,
document_id: UUID | None = None,
) -> dict:
"""Upsert an appeals-committee decision as source_kind='internal_committee'.
If a row with this case_number already exists as cited_only, promotes it.
Idempotent: calling again updates metadata in-place.
"""
pool = await get_pool()
tags_json = json.dumps(subject_tags or [], ensure_ascii=False)
async with pool.acquire() as conn:
existing = await conn.fetchrow(
"SELECT id FROM case_law WHERE case_number = $1",
case_number,
)
if existing:
row = await conn.fetchrow(
"""
UPDATE case_law SET
case_name = $2,
court = COALESCE(NULLIF($3, ''), court),
date = COALESCE($4, date),
chair_name = COALESCE(NULLIF($5, ''), chair_name),
district = COALESCE(NULLIF($6, ''), district),
practice_area = $7,
appeal_subtype = $8,
subject_tags = $9,
summary = COALESCE(NULLIF($10, ''), summary),
full_text = $11,
source_type = 'appeals_committee',
source_kind = 'internal_committee',
is_binding = $12,
document_id = COALESCE($13, document_id),
extraction_status = 'processing',
halacha_extraction_status = 'pending'
WHERE id = $1
RETURNING *
""",
existing["id"], case_name, court, decision_date,
chair_name, district, practice_area, appeal_subtype,
tags_json, summary, full_text, is_binding, document_id,
)
else:
row = await conn.fetchrow(
"""
INSERT INTO case_law (
case_number, case_name, court, date, chair_name, district,
subject_tags, summary, full_text,
source_kind, source_type, document_id,
extraction_status, halacha_extraction_status,
practice_area, appeal_subtype, is_binding
) VALUES (
$1, $2, $3, $4, $5, $6,
$7, $8, $9,
'internal_committee', 'appeals_committee', $10,
'processing', 'pending',
$11, $12, $13
)
RETURNING *
""",
case_number, case_name, court, decision_date, chair_name, district,
tags_json, summary, full_text,
document_id, practice_area, appeal_subtype, is_binding,
)
return _row_to_case_law(row)
async def update_case_law(case_law_id: UUID, **fields) -> dict | None: async def update_case_law(case_law_id: UUID, **fields) -> dict | None:
"""Patch metadata fields on a case_law row. """Patch metadata fields on a case_law row.
@@ -2194,8 +2284,14 @@ async def search_precedent_library_semantic(
subject_tag: str = "", subject_tag: str = "",
limit: int = 10, limit: int = 10,
include_halachot: bool = True, include_halachot: bool = True,
source_kind: str = "external_upload",
district: str = "",
chair_name: str = "",
) -> list[dict]: ) -> list[dict]:
"""Semantic search over chair-uploaded precedents. """Semantic search over precedents filtered by source_kind.
source_kind='external_upload' → court rulings (default)
source_kind='internal_committee' → appeals-committee decisions
Returns merged halachot + chunks. Halachot are pre-distilled rules, so Returns merged halachot + chunks. Halachot are pre-distilled rules, so
they get a small score boost. Only ``approved`` / ``published`` halachot they get a small score boost. Only ``approved`` / ``published`` halachot
@@ -2204,7 +2300,7 @@ async def search_precedent_library_semantic(
""" """
pool = await get_pool() pool = await get_pool()
halacha_filters = ["h.review_status IN ('approved', 'published')"] halacha_filters = ["h.review_status IN ('approved', 'published')"]
chunk_filters = ["cl.source_kind = 'external_upload'"] chunk_filters = [f"cl.source_kind = '{source_kind}'"]
h_params: list = [query_embedding, limit] h_params: list = [query_embedding, limit]
c_params: list = [query_embedding, limit] c_params: list = [query_embedding, limit]
h_idx = 3 h_idx = 3
@@ -2249,13 +2345,27 @@ async def search_precedent_library_semantic(
halacha_filters.append(f"${h_idx} = ANY(h.subject_tags)") halacha_filters.append(f"${h_idx} = ANY(h.subject_tags)")
h_params.append(subject_tag) h_params.append(subject_tag)
h_idx += 1 h_idx += 1
if district:
halacha_filters.append(f"cl.district = ${h_idx}")
h_params.append(district)
h_idx += 1
chunk_filters.append(f"cl.district = ${c_idx}")
c_params.append(district)
c_idx += 1
if chair_name:
halacha_filters.append(f"cl.chair_name = ${h_idx}")
h_params.append(chair_name)
h_idx += 1
chunk_filters.append(f"cl.chair_name = ${c_idx}")
c_params.append(chair_name)
c_idx += 1
halacha_sql = f""" halacha_sql = f"""
SELECT h.id AS halacha_id, h.case_law_id, h.rule_statement, SELECT h.id AS halacha_id, h.case_law_id, h.rule_statement,
h.reasoning_summary, h.supporting_quote, h.page_reference, h.reasoning_summary, h.supporting_quote, h.page_reference,
h.practice_areas, h.subject_tags, h.confidence, h.rule_type, h.practice_areas, h.subject_tags, h.confidence, h.rule_type,
cl.case_number, cl.case_name, cl.court, cl.date AS decision_date, cl.case_number, cl.case_name, cl.court, cl.date AS decision_date,
cl.precedent_level, cl.precedent_level, cl.chair_name, cl.district,
1 - (h.embedding <=> $1) AS score 1 - (h.embedding <=> $1) AS score
FROM halachot h FROM halachot h
JOIN case_law cl ON cl.id = h.case_law_id JOIN case_law cl ON cl.id = h.case_law_id
@@ -2269,7 +2379,7 @@ async def search_precedent_library_semantic(
SELECT pc.id AS chunk_id, pc.case_law_id, pc.content, SELECT pc.id AS chunk_id, pc.case_law_id, pc.content,
pc.section_type, pc.page_number, pc.section_type, pc.page_number,
cl.case_number, cl.case_name, cl.court, cl.date AS decision_date, cl.case_number, cl.case_name, cl.court, cl.date AS decision_date,
cl.precedent_level, cl.practice_area, cl.precedent_level, cl.practice_area, cl.chair_name, cl.district,
1 - (pc.embedding <=> $1) AS score 1 - (pc.embedding <=> $1) AS score
FROM precedent_chunks pc FROM precedent_chunks pc
JOIN case_law cl ON cl.id = pc.case_law_id JOIN case_law cl ON cl.id = pc.case_law_id

View File

@@ -88,8 +88,15 @@ async def search_precedent_library_hybrid(
is_binding: bool | None = None, is_binding: bool | None = None,
subject_tag: str = "", subject_tag: str = "",
include_halachot: bool = True, include_halachot: bool = True,
source_kind: str = "external_upload",
district: str = "",
chair_name: str = "",
) -> list[dict]: ) -> list[dict]:
"""Hybrid wrapper for precedent-library search.""" """Hybrid wrapper for precedent-library search.
source_kind='external_upload' → court rulings (default)
source_kind='internal_committee' → appeals-committee decisions
"""
fetch_k = max(limit, config.VOYAGE_RERANK_FETCH_K) if config.MULTIMODAL_ENABLED else limit fetch_k = max(limit, config.VOYAGE_RERANK_FETCH_K) if config.MULTIMODAL_ENABLED else limit
async def _base(limit: int) -> list[dict]: async def _base(limit: int) -> list[dict]:
@@ -103,6 +110,9 @@ async def search_precedent_library_hybrid(
subject_tag=subject_tag, subject_tag=subject_tag,
limit=limit, limit=limit,
include_halachot=include_halachot, include_halachot=include_halachot,
source_kind=source_kind,
district=district,
chair_name=chair_name,
) )
text_results = await rerank.maybe_rerank( text_results = await rerank.maybe_rerank(

View File

@@ -0,0 +1,301 @@
"""Orchestrator for the Internal Committee Decisions corpus.
Ingest pipeline:
text/file → INSERT case_law (source_kind='internal_committee')
→ chunk → embed → store precedent_chunks
→ queue halacha extraction
Migration helpers:
migrate_from_style_corpus() — re-index style_corpus entries as searchable
migrate_from_external_corpus() — reclassify external appeals-committee rows
All ועדות ערר (any district) belong here.
Judicial decisions (Supreme Court, Administrative Court) stay in external_upload.
"""
from __future__ import annotations
import logging
import re
import shutil
from datetime import date
from pathlib import Path
from uuid import UUID, uuid4
from legal_mcp import config
from legal_mcp.services import chunker, db, embeddings, extractor
logger = logging.getLogger(__name__)
INTERNAL_DECISIONS_DIR = Path(config.DATA_DIR) / "internal-decisions"
_VALID_DISTRICTS = {"", "ירושלים", "מרכז", "תל אביב", "צפון", "דרום", "ארצי"}
_COURT_TO_DISTRICT = [
("ירושלים", "ירושלים"),
("תל אביב", "תל אביב"),
('ת"א', "תל אביב"),
("מרכז", "מרכז"),
("חיפה", "צפון"),
("צפון", "צפון"),
("דרום", "דרום"),
("ארצי", "ארצי"),
("ארצית", "ארצי"),
]
def _coerce_date(value) -> date | None:
if value is None or value == "":
return None
if isinstance(value, date):
return value
if isinstance(value, str):
try:
return date.fromisoformat(value[:10])
except ValueError:
return None
return None
def _safe_filename(name: str) -> str:
base = Path(name).name
return re.sub(r"[^\w.\-+א-ת ]", "_", base) or f"internal-{uuid4().hex[:8]}"
def _district_from_court(court: str) -> str:
for keyword, district in _COURT_TO_DISTRICT:
if keyword in court:
return district
return ""
async def ingest_internal_decision(
*,
case_number: str,
case_name: str = "",
court: str = "",
decision_date=None,
chair_name: str = "",
district: str = "",
practice_area: str = "",
appeal_subtype: str = "",
subject_tags: list[str] | None = None,
summary: str = "",
is_binding: bool = True,
file_path: str | Path | None = None,
text: str | None = None,
document_id: UUID | None = None,
) -> dict:
"""Ingest an appeals-committee decision into the internal corpus.
Either file_path or text must be provided.
If district is empty, it is inferred from court.
Returns: {"status": "completed", "case_law_id": "...", "chunks": N}
"""
if not file_path and not text:
raise ValueError("either file_path or text is required")
if not case_number.strip():
raise ValueError("case_number is required")
resolved_district = district.strip() or _district_from_court(court)
if file_path:
src = Path(file_path)
if not src.is_file():
raise FileNotFoundError(f"file not found: {src}")
dest_dir = INTERNAL_DECISIONS_DIR / (resolved_district or "other")
dest_dir.mkdir(parents=True, exist_ok=True)
staged = dest_dir / f"{uuid4().hex[:8]}_{_safe_filename(src.name)}"
shutil.copy2(src, staged)
raw_text, page_count, page_offsets = await extractor.extract_text(str(staged))
raw_text = extractor.strip_nevo_preamble(raw_text or "").strip()
if not raw_text:
raise ValueError("no extractable text in file")
else:
raw_text = (text or "").strip()
if not raw_text:
raise ValueError("text is empty")
page_count = 0
page_offsets = None
record = await db.create_internal_committee_decision(
case_number=case_number.strip(),
case_name=(case_name.strip() or case_number.strip()),
full_text=raw_text,
court=court.strip(),
decision_date=_coerce_date(decision_date),
chair_name=chair_name.strip(),
district=resolved_district,
practice_area=practice_area,
appeal_subtype=appeal_subtype.strip(),
subject_tags=list(subject_tags or []),
summary=summary.strip(),
is_binding=is_binding,
document_id=document_id,
)
case_law_id = UUID(str(record["id"]))
try:
chunks = chunker.chunk_document(raw_text, page_offsets=page_offsets)
if not chunks:
await db.set_case_law_extraction_status(case_law_id, "completed")
await db.set_case_law_halacha_status(case_law_id, "completed")
return {"status": "completed", "case_law_id": str(case_law_id), "chunks": 0}
chunk_texts = [c.content for c in chunks]
chunk_vectors = await embeddings.embed_texts(chunk_texts, input_type="document")
chunk_dicts = [
{
"chunk_index": c.chunk_index,
"content": c.content,
"section_type": c.section_type,
"page_number": c.page_number,
"embedding": v,
}
for c, v in zip(chunks, chunk_vectors)
]
stored = await db.store_precedent_chunks(case_law_id, chunk_dicts)
await db.set_case_law_extraction_status(case_law_id, "completed")
await db.set_case_law_halacha_status(case_law_id, "pending")
await db.request_halacha_extraction(case_law_id)
return {
"status": "completed",
"case_law_id": str(case_law_id),
"chunks": stored,
"halachot_pending": True,
}
except Exception:
logger.exception("ingest_internal_decision failed for %s", case_number)
await db.set_case_law_extraction_status(case_law_id, "failed")
raise
async def migrate_from_style_corpus(dry_run: bool = False) -> dict:
"""Re-index all style_corpus entries as searchable internal committee decisions.
Does NOT delete style_corpus rows — they remain for style analysis.
Skips entries that already exist in case_law as internal_committee.
"""
pool = await db.get_pool()
async with pool.acquire() as conn:
rows = await conn.fetch(
"""SELECT decision_number, decision_date, full_text,
practice_area, appeal_subtype, subject_categories
FROM style_corpus
ORDER BY decision_date NULLS LAST"""
)
results = {"total": len(rows), "ingested": 0, "skipped": 0, "failed": 0, "dry_run": dry_run}
for row in rows:
case_number = (row["decision_number"] or "").strip()
if not case_number:
results["skipped"] += 1
continue
if not dry_run:
existing = await pool.fetchval(
"SELECT id FROM case_law WHERE case_number = $1 AND source_kind = 'internal_committee'",
case_number,
)
if existing:
results["skipped"] += 1
continue
if dry_run:
results["ingested"] += 1
continue
try:
subject_tags = list(row["subject_categories"] or [])
await ingest_internal_decision(
case_number=case_number,
court="ועדת הערר לתכנון ובנייה — מחוז ירושלים",
decision_date=row["decision_date"],
chair_name="דפנה תמיר",
district="ירושלים",
practice_area=row["practice_area"] or "",
appeal_subtype=row["appeal_subtype"] or "",
subject_tags=subject_tags,
text=row["full_text"],
)
results["ingested"] += 1
logger.info("Migrated style_corpus entry: %s", case_number)
except Exception as e:
logger.error("Failed to migrate %s: %s", case_number, e)
results["failed"] += 1
return results
async def migrate_from_external_corpus(dry_run: bool = False) -> dict:
"""Reclassify external appeals-committee decisions to source_kind='internal_committee'.
Identifies rows by source_type='appeals_committee' and updates source_kind + district.
Existing precedent_chunks remain — no re-embedding needed.
"""
pool = await db.get_pool()
async with pool.acquire() as conn:
rows = await conn.fetch(
"""SELECT id, case_number, court
FROM case_law
WHERE source_kind = 'external_upload'
AND source_type = 'appeals_committee'"""
)
results = {"total": len(rows), "updated": 0, "dry_run": dry_run}
if dry_run:
results["updated"] = len(rows)
results["preview"] = [
{"case_number": r["case_number"], "court": r["court"], "district": _district_from_court(r["court"] or "")}
for r in rows
]
return results
async with pool.acquire() as conn:
for row in rows:
district = _district_from_court(row["court"] or "")
await conn.execute(
"""UPDATE case_law
SET source_kind = 'internal_committee',
district = CASE WHEN $2 <> '' THEN $2 ELSE district END
WHERE id = $1""",
row["id"], district,
)
results["updated"] = len(rows)
logger.info("Migrated %d external appeals-committee rows to internal_committee", len(rows))
return results
async def search_internal(
query: str,
*,
practice_area: str = "",
appeal_subtype: str = "",
district: str = "",
chair_name: str = "",
limit: int = 10,
include_halachot: bool = True,
) -> list[dict]:
"""Semantic search over internal committee decisions."""
from legal_mcp.services import hybrid_search
if not query.strip():
return []
query_vec = await embeddings.embed_query(query)
return await hybrid_search.search_precedent_library_hybrid(
query=query,
query_text_embedding=query_vec,
limit=limit,
practice_area=practice_area,
appeal_subtype=appeal_subtype,
include_halachot=include_halachot,
source_kind="internal_committee",
district=district,
chair_name=chair_name,
)

View File

@@ -179,3 +179,63 @@ async def find_similar_cases(
}) })
return json.dumps(formatted, ensure_ascii=False, indent=2) return json.dumps(formatted, ensure_ascii=False, indent=2)
async def search_internal_decisions(
query: str,
practice_area: str = "",
appeal_subtype: str = "",
district: str = "",
chair_name: str = "",
limit: int = 10,
include_halachot: bool = True,
) -> str:
"""חיפוש בהחלטות ועדות ערר לתכנון ובנייה (כל המחוזות).
Args:
query: שאילתת חיפוש בעברית
practice_area: rishuy_uvniya / betterment_levy / compensation_197
appeal_subtype: סינון לפי תת-סוג ערר
district: מחוז — ירושלים / מרכז / תל אביב / צפון / דרום / ארצי. ריק = כל המחוזות
chair_name: שם יו"ר הוועדה לסינון. ריק = כל היו"רים
limit: מספר תוצאות מקסימלי
include_halachot: האם לכלול הלכות שחולצו
"""
from legal_mcp.services import internal_decisions as int_svc
results = await int_svc.search_internal(
query,
practice_area=practice_area,
appeal_subtype=appeal_subtype,
district=district,
chair_name=chair_name,
limit=limit,
include_halachot=include_halachot,
)
if not results:
return "לא נמצאו החלטות ועדת ערר רלוונטיות."
formatted = []
for r in results:
entry = {
"score": round(float(r["score"]), 4),
"type": r.get("type", "passage"),
"case_number": r.get("case_number"),
"case_name": r.get("case_name"),
"court": r.get("court"),
"district": r.get("district"),
"chair_name": r.get("chair_name"),
"decision_date": r.get("decision_date"),
}
if r.get("type") == "halacha":
entry["rule"] = r.get("rule_statement")
entry["quote"] = r.get("supporting_quote")
entry["rule_type"] = r.get("rule_type")
else:
entry["content"] = r.get("content", "")
entry["section"] = r.get("section_type")
entry["page"] = r.get("page_number")
formatted.append(entry)
return json.dumps(formatted, ensure_ascii=False, indent=2)

View File

@@ -3,10 +3,13 @@
from __future__ import annotations from __future__ import annotations
import json import json
import logging
from uuid import UUID from uuid import UUID
from legal_mcp.services import db from legal_mcp.services import db
logger = logging.getLogger(__name__)
async def workflow_status(case_number: str) -> str: async def workflow_status(case_number: str) -> str:
"""סטטוס תהליך עבודה מלא לתיק - מסמכים, עיבוד, טיוטות. """סטטוס תהליך עבודה מלא לתיק - מסמכים, עיבוד, טיוטות.
@@ -315,10 +318,29 @@ async def ingest_final_version(
try: try:
result = await learning_loop.process_final_version(case_id, final_text) result = await learning_loop.process_final_version(case_id, final_text)
return json.dumps(result, default=str, ensure_ascii=False, indent=2)
except ValueError as e: except ValueError as e:
return json.dumps({"status": "error", "message": str(e)}, ensure_ascii=False, indent=2) return json.dumps({"status": "error", "message": str(e)}, ensure_ascii=False, indent=2)
# Auto-ingest into internal committee decisions corpus (best-effort).
try:
from legal_mcp.services import internal_decisions as int_svc
await int_svc.ingest_internal_decision(
case_number=case_number,
case_name=case.get("title", ""),
decision_date=case.get("decision_date"),
chair_name=case.get("chair_name", ""),
district="ירושלים",
practice_area=case.get("practice_area", ""),
appeal_subtype=case.get("appeal_subtype", ""),
text=final_text,
)
result["internal_corpus_ingested"] = True
except Exception as e:
logger.warning("ingest_final_version: internal corpus ingestion failed (non-fatal): %s", e)
result["internal_corpus_ingested"] = False
return json.dumps(result, default=str, ensure_ascii=False, indent=2)
# ── Chair feedback tools ────────────────────────────────────────── # ── Chair feedback tools ──────────────────────────────────────────

View File

@@ -293,18 +293,25 @@ description: This skill should be used when writing legal decisions (החלטו
### 7.5 שלושה מקורות פסיקה — אל תבלבל ### 7.5 שלושה מקורות פסיקה — אל תבלבל
המערכת מפרידה בין שלושה קורפוסי פסיקה. כל אחד מהם משמש למטרה אחרת ויש כלי MCP נפרד לחיפוש בו: המערכת מפרידה בין **ארבעה** קורפוסי פסיקה. כל אחד מהם משמש למטרה אחרת ויש כלי MCP נפרד לחיפוש בו:
| קורפוס | טבלה | כלי חיפוש | תפקיד | | קורפוס | טבלה | כלי חיפוש | תפקיד |
|---|---|---|---| |---|---|---|---|
| תקדימי דפנה (סגנון) | `style_corpus` + `paragraph_embeddings` | `search_decisions` | החלטות שדפנה עצמה כתבה. מקור לסגנון, ניסוחים, ג'וריספרודנציה אישית. | | לימוד סגנון | `style_corpus` | (לא לחיפוש תוכן) | ממשק /training — ניתוח "הקול" של היו"ר: טון, ביטויי מעבר, מבנה פסקאות. **אין לחפש כאן תוכן משפטי.** |
| ספריית הפסיקה הסמכותית | `case_law` (`source_kind='external_upload'`) + `halachot` | `search_precedent_library` | פסיקה חיצונית מחייבת — עליון, מנהלי, ועדות ערר אחרות — עם הלכות שאושרו ע"י דפנה. **המקור היחיד לציטוטים בבלוק י לפי CREAC.** | | החלטות ועדות ערר | `case_law` (`source_kind='internal_committee'`) + `halachot` | `search_internal_decisions` | **כל** ועדות הערר לתכנון ובנייה (כל המחוזות). מסונן לפי `district` ו-`chair_name`. מקור לעקביות פנימית ופרקטיקה ארצית. |
| ציטוטים שצורפו ידנית | `case_precedents` | `precedent_search_library` | quotes שדפנה צירפה לתיק ספציפי בעבר. דומה לקורפוס סמכותי אך פר-תיק, ידני, לא עוברת חילוץ הלכות. | | פסיקת בתי משפט | `case_law` (`source_kind='external_upload'`) + `halachot` | `search_precedent_library` | בתי משפט: עליון, מנהלי, בג"ץ. **המקור היחיד לציטוטים מחייבים בבלוק י לפי CREAC.** |
| ציטוטים ידניים | `case_precedents` | `precedent_search_library` | quotes שצורפו לתיק ספציפי בעבר. פר-תיק, ידני. |
**הזרימה הסטנדרטית בבלוק י:** **הזרימה הסטנדרטית בבלוק י — חפש במקביל:**
1. `search_decisions` קודם — בדוק אם דפנה כבר הכריעה בסוגיה דומה (חיסכון דוקטרינרי / הבחנה). 1. `search_internal_decisions(district="ירושלים")` — האם ועדת ערר ירושלים הכריעה בסוגיה? (עקביות פנימית)
2. `search_precedent_library` — חפש את הכלל המחייב והציטוט התומך לפסקת CREAC. - אם יש תוצאה רלוונטית: הצג תחת **"החלטות ועדת ערר ירושלים"** והתייחס לה בניתוח.
3. אם הצדדים הפנו לפסיקה שלא בקורפוס — דפנה מעלה אותה דרך `/precedents` ב-UI; חילוץ ההלכות אוטומטי וההלכות מחכות לאישורה. 2. `search_internal_decisions()` (ריק = כל המחוזות) — פרקטיקה ארצית של ועדות אחרות.
- הצג תחת **"החלטות ועדות ערר אחרות"** — כמשל/השוואה, לא כמחייב.
3. `search_precedent_library` — כלל מחייב מבית משפט לפסקת CREAC.
- הצג תחת **"פסיקת בתי משפט"** — זה המקור לציטוט מחייב.
4. אם הצדדים הפנו לפסיקה שלא בקורפוס — דפנה מעלה אותה דרך `/precedents` ב-UI.
**חשוב:** החלטות ועדת ערר הן פרקטיקה, לא מחייב. ציטוט מחייב בבלוק י מגיע רק מ-`search_precedent_library`.
**איסור על המצאת ציטוטים** — ציטוט פסיקה חייב להגיע מאחד מהקורפוסים. אם אין הלכה מאושרת תומכת בנקודה — אל תמציא; ציין שהנושא דורש הוספת פסיקה לקורפוס. **איסור על המצאת ציטוטים** — ציטוט פסיקה חייב להגיע מאחד מהקורפוסים. אם אין הלכה מאושרת תומכת בנקודה — אל תמציא; ציין שהנושא דורש הוספת פסיקה לקורפוס.

View File

@@ -4395,6 +4395,148 @@ async def precedent_queue_pending(kind: str = "metadata", limit: int = 20):
return {"items": items, "count": len(items)} return {"items": items, "count": len(items)}
from legal_mcp.services import internal_decisions as int_decisions_service # noqa: E402
@app.post("/api/internal-decisions/upload")
async def internal_decisions_upload(
file: UploadFile = File(...),
case_number: str = Form(...),
case_name: str = Form(""),
court: str = Form(""),
decision_date: str = Form(""),
chair_name: str = Form(""),
district: str = Form(""),
practice_area: str = Form(""),
appeal_subtype: str = Form(""),
subject_tags: str = Form("[]"),
is_binding: bool = Form(True),
summary: str = Form(""),
):
"""Upload a planning appeals-committee decision to the internal corpus."""
if practice_area and practice_area not in _PRACTICE_AREAS:
raise HTTPException(400, "practice_area לא תקין")
if not case_number.strip():
raise HTTPException(400, "case_number חובה")
suffix = Path(file.filename or "").suffix.lower()
if suffix not in ALLOWED_EXTENSIONS:
raise HTTPException(400, f"סוג קובץ לא נתמך: {suffix}")
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
staged = UPLOAD_DIR / f"int_{uuid4().hex[:8]}_{file.filename}"
size = 0
with staged.open("wb") as out:
while chunk := await file.read(1024 * 1024):
size += len(chunk)
if size > MAX_FILE_SIZE:
staged.unlink(missing_ok=True)
raise HTTPException(413, "קובץ גדול מדי")
out.write(chunk)
try:
tags = json.loads(subject_tags) if subject_tags else []
if not isinstance(tags, list):
tags = []
except json.JSONDecodeError:
tags = []
task_id = str(uuid4())
await _progress.set(task_id, {
"status": "queued", "filename": file.filename or "",
"stage": "queued", "percent": 0,
})
async def _run():
try:
await int_decisions_service.ingest_internal_decision(
case_number=case_number.strip(),
case_name=case_name.strip(),
court=court.strip(),
decision_date=decision_date or None,
chair_name=chair_name.strip(),
district=district.strip(),
practice_area=practice_area,
appeal_subtype=appeal_subtype.strip(),
subject_tags=tags,
is_binding=is_binding,
summary=summary.strip(),
file_path=staged,
)
await _progress.set(task_id, {"status": "completed", "percent": 100})
except Exception as e:
logger.exception("internal-decisions upload failed")
await _progress.set(task_id, {"status": "failed", "error": str(e)})
finally:
staged.unlink(missing_ok=True)
asyncio.create_task(_run())
return {"task_id": task_id}
@app.post("/api/internal-decisions/migrate")
async def internal_decisions_migrate(
source: str = "both",
dry_run: bool = True,
):
"""Migrate existing data to the internal committee corpus.
source: 'style_corpus' | 'external_corpus' | 'both'
dry_run: if true, only report what would be done (no writes)
"""
if source not in {"style_corpus", "external_corpus", "both"}:
raise HTTPException(400, "source חייב להיות style_corpus / external_corpus / both")
results: dict = {}
if source in {"style_corpus", "both"}:
results["style_corpus"] = await int_decisions_service.migrate_from_style_corpus(dry_run=dry_run)
if source in {"external_corpus", "both"}:
results["external_corpus"] = await int_decisions_service.migrate_from_external_corpus(dry_run=dry_run)
return results
@app.get("/api/internal-decisions")
async def internal_decisions_list(
district: str = "",
chair_name: str = "",
practice_area: str = "",
limit: int = 100,
):
"""List internal committee decisions with optional filters."""
pool = await db.get_pool()
async with pool.acquire() as conn:
conditions = ["source_kind = 'internal_committee'"]
params: list = []
idx = 1
if district:
conditions.append(f"district = ${idx}")
params.append(district)
idx += 1
if chair_name:
conditions.append(f"chair_name = ${idx}")
params.append(chair_name)
idx += 1
if practice_area:
conditions.append(f"practice_area = ${idx}")
params.append(practice_area)
idx += 1
params.append(limit)
where = " AND ".join(conditions)
rows = await conn.fetch(
f"SELECT id, case_number, case_name, court, district, chair_name, "
f"date, practice_area, appeal_subtype, extraction_status, halacha_extraction_status "
f"FROM case_law WHERE {where} ORDER BY date DESC NULLS LAST LIMIT ${idx}",
*params,
)
total = await conn.fetchval(
"SELECT COUNT(*) FROM case_law WHERE source_kind = 'internal_committee'"
)
return {
"total": total,
"items": [dict(r) for r in rows],
}
@app.get("/api/halachot") @app.get("/api/halachot")
async def halachot_list( async def halachot_list(
case_law_id: str = "", case_law_id: str = "",