feat(migration): enrich internal committee entries — fix case_number + metadata + halachot
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m32s
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m32s
- precedent_metadata_extractor: add case_number_clean extraction field - apply_to_record: overwrite_case_number param for one-time migration - internal_decisions: enrich_migrated_entries() — runs metadata then queues halachot - server: expose as internal_decision_enrich MCP tool Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -285,6 +285,68 @@ async def migrate_from_external_corpus(dry_run: bool = False) -> dict:
|
||||
return results
|
||||
|
||||
|
||||
async def enrich_migrated_entries(dry_run: bool = False) -> dict:
|
||||
"""One-time enrichment: run metadata extraction + halacha extraction on all
|
||||
internal_committee entries that are waiting (halacha_status='pending',
|
||||
metadata never requested).
|
||||
|
||||
Metadata extraction will:
|
||||
- Fix case_number from the decision header text
|
||||
- Fill case_name from the parties line
|
||||
- Fill date if missing
|
||||
|
||||
Halacha extraction queues the LLM-based halacha extraction job.
|
||||
"""
|
||||
from legal_mcp.services import precedent_metadata_extractor, db as _db
|
||||
|
||||
pool = await _db.get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
rows = await conn.fetch(
|
||||
"""SELECT id, case_number
|
||||
FROM case_law
|
||||
WHERE source_kind = 'internal_committee'
|
||||
AND halacha_extraction_status = 'pending'
|
||||
AND metadata_extraction_requested_at IS NULL
|
||||
ORDER BY created_at"""
|
||||
)
|
||||
|
||||
results = {
|
||||
"total": len(rows),
|
||||
"metadata_updated": 0,
|
||||
"halachot_queued": 0,
|
||||
"failed": 0,
|
||||
"dry_run": dry_run,
|
||||
}
|
||||
|
||||
if dry_run:
|
||||
return results
|
||||
|
||||
for row in rows:
|
||||
case_law_id = row["id"]
|
||||
try:
|
||||
meta = await precedent_metadata_extractor.extract_and_apply(
|
||||
case_law_id, overwrite_case_number=True
|
||||
)
|
||||
if meta.get("status") in ("completed", "no_changes"):
|
||||
results["metadata_updated"] += 1
|
||||
logger.info(
|
||||
"enrich_migrated: %s → fields=%s",
|
||||
row["case_number"], meta.get("fields"),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("enrich_migrated metadata failed for %s: %s", row["case_number"], e)
|
||||
results["failed"] += 1
|
||||
continue
|
||||
|
||||
try:
|
||||
await _db.request_halacha_extraction(case_law_id)
|
||||
results["halachot_queued"] += 1
|
||||
except Exception as e:
|
||||
logger.error("enrich_migrated halacha queue failed for %s: %s", row["case_number"], e)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
async def search_internal(
|
||||
query: str,
|
||||
*,
|
||||
|
||||
Reference in New Issue
Block a user