feat: add internal committee decisions corpus (source_kind='internal_committee')
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m31s

Three-layer separation: style learning (style_corpus), appeals-committee decisions
(internal_committee), and court rulings (external_upload).

- SCHEMA_V10: chair_name + district columns on case_law and cases, partial indexes
- create_internal_committee_decision() DB upsert function
- search_precedent_library_semantic() now accepts source_kind/district/chair_name params
- search_precedent_library_hybrid() passes through new params
- services/internal_decisions.py: ingest_internal_decision, migrate_from_style_corpus,
  migrate_from_external_corpus (identifies rows via source_type='appeals_committee')
- search_internal_decisions() MCP tool (server.py + tools/search.py)
- internal_decision_migrate() MCP admin tool
- Web endpoints: POST /api/internal-decisions/upload, POST /api/internal-decisions/migrate,
  GET /api/internal-decisions
- ingest_final_version auto-ingests finalized decisions into internal corpus
- SKILL.md updated: agents now search internal + external in parallel, present separately

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-04 18:33:39 +00:00
parent 1b14e04373
commit 92a2763b86
8 changed files with 718 additions and 15 deletions

View File

@@ -691,6 +691,16 @@ CREATE INDEX IF NOT EXISTS idx_prec_img_emb_case_law
"""
SCHEMA_V10_SQL = """
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS chair_name TEXT DEFAULT '';
ALTER TABLE case_law ADD COLUMN IF NOT EXISTS district TEXT DEFAULT '';
ALTER TABLE cases ADD COLUMN IF NOT EXISTS chair_name TEXT DEFAULT '';
CREATE INDEX IF NOT EXISTS idx_case_law_source_kind ON case_law(source_kind);
CREATE INDEX IF NOT EXISTS idx_case_law_chair ON case_law(chair_name) WHERE chair_name <> '';
CREATE INDEX IF NOT EXISTS idx_case_law_district ON case_law(district) WHERE district <> '';
"""
async def _run_schema_migrations(pool: asyncpg.Pool) -> None:
async with pool.acquire() as conn:
await conn.execute(SCHEMA_SQL)
@@ -703,7 +713,8 @@ async def _run_schema_migrations(pool: asyncpg.Pool) -> None:
await conn.execute(SCHEMA_V7_SQL)
await conn.execute(SCHEMA_V8_SQL)
await conn.execute(SCHEMA_V9_SQL)
logger.info("Database schema initialized (v1-v9)")
await conn.execute(SCHEMA_V10_SQL)
logger.info("Database schema initialized (v1-v10)")
async def init_schema() -> None:
@@ -1817,6 +1828,85 @@ async def create_external_case_law(
return _row_to_case_law(row)
async def create_internal_committee_decision(
case_number: str,
case_name: str,
full_text: str,
court: str = "",
decision_date: date | None = None,
chair_name: str = "",
district: str = "",
practice_area: str = "",
appeal_subtype: str = "",
subject_tags: list[str] | None = None,
summary: str = "",
is_binding: bool = True,
document_id: UUID | None = None,
) -> dict:
"""Upsert an appeals-committee decision as source_kind='internal_committee'.
If a row with this case_number already exists as cited_only, promotes it.
Idempotent: calling again updates metadata in-place.
"""
pool = await get_pool()
tags_json = json.dumps(subject_tags or [], ensure_ascii=False)
async with pool.acquire() as conn:
existing = await conn.fetchrow(
"SELECT id FROM case_law WHERE case_number = $1",
case_number,
)
if existing:
row = await conn.fetchrow(
"""
UPDATE case_law SET
case_name = $2,
court = COALESCE(NULLIF($3, ''), court),
date = COALESCE($4, date),
chair_name = COALESCE(NULLIF($5, ''), chair_name),
district = COALESCE(NULLIF($6, ''), district),
practice_area = $7,
appeal_subtype = $8,
subject_tags = $9,
summary = COALESCE(NULLIF($10, ''), summary),
full_text = $11,
source_type = 'appeals_committee',
source_kind = 'internal_committee',
is_binding = $12,
document_id = COALESCE($13, document_id),
extraction_status = 'processing',
halacha_extraction_status = 'pending'
WHERE id = $1
RETURNING *
""",
existing["id"], case_name, court, decision_date,
chair_name, district, practice_area, appeal_subtype,
tags_json, summary, full_text, is_binding, document_id,
)
else:
row = await conn.fetchrow(
"""
INSERT INTO case_law (
case_number, case_name, court, date, chair_name, district,
subject_tags, summary, full_text,
source_kind, source_type, document_id,
extraction_status, halacha_extraction_status,
practice_area, appeal_subtype, is_binding
) VALUES (
$1, $2, $3, $4, $5, $6,
$7, $8, $9,
'internal_committee', 'appeals_committee', $10,
'processing', 'pending',
$11, $12, $13
)
RETURNING *
""",
case_number, case_name, court, decision_date, chair_name, district,
tags_json, summary, full_text,
document_id, practice_area, appeal_subtype, is_binding,
)
return _row_to_case_law(row)
async def update_case_law(case_law_id: UUID, **fields) -> dict | None:
"""Patch metadata fields on a case_law row.
@@ -2194,8 +2284,14 @@ async def search_precedent_library_semantic(
subject_tag: str = "",
limit: int = 10,
include_halachot: bool = True,
source_kind: str = "external_upload",
district: str = "",
chair_name: str = "",
) -> list[dict]:
"""Semantic search over chair-uploaded precedents.
"""Semantic search over precedents filtered by source_kind.
source_kind='external_upload' → court rulings (default)
source_kind='internal_committee' → appeals-committee decisions
Returns merged halachot + chunks. Halachot are pre-distilled rules, so
they get a small score boost. Only ``approved`` / ``published`` halachot
@@ -2204,7 +2300,7 @@ async def search_precedent_library_semantic(
"""
pool = await get_pool()
halacha_filters = ["h.review_status IN ('approved', 'published')"]
chunk_filters = ["cl.source_kind = 'external_upload'"]
chunk_filters = [f"cl.source_kind = '{source_kind}'"]
h_params: list = [query_embedding, limit]
c_params: list = [query_embedding, limit]
h_idx = 3
@@ -2249,13 +2345,27 @@ async def search_precedent_library_semantic(
halacha_filters.append(f"${h_idx} = ANY(h.subject_tags)")
h_params.append(subject_tag)
h_idx += 1
if district:
halacha_filters.append(f"cl.district = ${h_idx}")
h_params.append(district)
h_idx += 1
chunk_filters.append(f"cl.district = ${c_idx}")
c_params.append(district)
c_idx += 1
if chair_name:
halacha_filters.append(f"cl.chair_name = ${h_idx}")
h_params.append(chair_name)
h_idx += 1
chunk_filters.append(f"cl.chair_name = ${c_idx}")
c_params.append(chair_name)
c_idx += 1
halacha_sql = f"""
SELECT h.id AS halacha_id, h.case_law_id, h.rule_statement,
h.reasoning_summary, h.supporting_quote, h.page_reference,
h.practice_areas, h.subject_tags, h.confidence, h.rule_type,
cl.case_number, cl.case_name, cl.court, cl.date AS decision_date,
cl.precedent_level,
cl.precedent_level, cl.chair_name, cl.district,
1 - (h.embedding <=> $1) AS score
FROM halachot h
JOIN case_law cl ON cl.id = h.case_law_id
@@ -2269,7 +2379,7 @@ async def search_precedent_library_semantic(
SELECT pc.id AS chunk_id, pc.case_law_id, pc.content,
pc.section_type, pc.page_number,
cl.case_number, cl.case_name, cl.court, cl.date AS decision_date,
cl.precedent_level, cl.practice_area,
cl.precedent_level, cl.practice_area, cl.chair_name, cl.district,
1 - (pc.embedding <=> $1) AS score
FROM precedent_chunks pc
JOIN case_law cl ON cl.id = pc.case_law_id