feat(precedents): metadata auto-fill, edit sheet, persuasive extraction
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m28s

Three improvements to the precedent library based on usage feedback:

1. Auto-fill metadata at upload time. New service
   precedent_metadata_extractor reads the ruling's full_text and
   suggests case_name (short), summary, headnote, key_quote,
   subject_tags, appeal_subtype. The merge policy fills only empty
   fields, preserving everything the chair typed in the upload form.
   Wired into the ingest pipeline; also exposed as a re-run endpoint
   POST /api/precedent-library/{id}/extract-metadata for existing
   records.

2. Edit sheet in the UI. Pencil icon on each library row opens a
   pre-populated form covering every field. A Sparkles button on the
   sheet runs the metadata extractor on demand and refreshes the
   form. The case_number is read-only because halachot are FK'd to
   it; renaming requires delete + re-upload.

3. Halacha extractor branches on is_binding. Sources marked binding
   (Supreme/Administrative) keep the strict halacha prompt. Non-binding
   sources (other appeals committees, district courts on planning
   matters) get a different prompt that extracts applications,
   interpretive principles, and persuasive conclusions — labeled with
   new rule_types 'application' and 'persuasive'. The fallback also
   widens chunk selection: if the chunker labeled nothing as
   legal_analysis/ruling/conclusion, we now run on all chunks rather
   than returning zero halachot for a usable ruling.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-03 10:19:35 +00:00
parent b51163b67c
commit 73a79ea7e8
10 changed files with 841 additions and 21 deletions

View File

@@ -28,6 +28,7 @@ from legal_mcp.services import (
embeddings,
extractor,
halacha_extractor,
precedent_metadata_extractor,
)
logger = logging.getLogger(__name__)
@@ -188,16 +189,27 @@ async def ingest_precedent(
]
stored_chunks = await db.store_precedent_chunks(case_law_id, chunk_dicts)
await progress("extracting_halachot", 75, "מחלץ הלכות מחייבות")
await db.set_case_law_extraction_status(case_law_id, "completed")
await progress("extracting_metadata", 65, "מחלץ מטא-דאטה (תקציר, תגיות)")
try:
metadata_result = await precedent_metadata_extractor.extract_and_apply(
case_law_id,
)
except Exception as e:
logger.warning("metadata extraction failed (non-fatal): %s", e)
metadata_result = {"status": "failed", "fields": []}
await progress("extracting_halachot", 80, "מחלץ הלכות / יישומים")
halacha_result = await halacha_extractor.extract(case_law_id)
await progress(
"completed",
100,
msg = (
f"הוכנס לספרייה: {stored_chunks} chunks, "
f"{halacha_result.get('stored', 0)} הלכות ממתינות לאישור",
f"{halacha_result.get('stored', 0)} פריטים ממתינים לאישור"
)
if metadata_result.get("fields"):
msg += f"; מולאו אוטומטית: {', '.join(metadata_result['fields'])}"
await progress("completed", 100, msg)
return {
"status": "completed",
@@ -206,6 +218,7 @@ async def ingest_precedent(
"halachot": halacha_result.get("stored", 0),
"halachot_extracted_raw": halacha_result.get("extracted", 0),
"halachot_verified": halacha_result.get("verified", 0),
"metadata_filled": metadata_result.get("fields", []),
"pages": page_count,
}
@@ -239,6 +252,36 @@ async def reextract_halachot(
return result
async def reextract_metadata(
case_law_id: UUID | str,
progress: ProgressCb | None = None,
) -> dict:
"""Re-run metadata extraction on an existing precedent.
Only fills empty fields (subject_tags, summary, headnote, key_quote,
appeal_subtype, and case_name when it equals the citation). User
values are preserved.
"""
progress = progress or _noop_progress
if isinstance(case_law_id, str):
case_law_id = UUID(case_law_id)
record = await db.get_case_law(case_law_id)
if not record or record.get("source_kind") != "external_upload":
raise ValueError("precedent not found or not chair-uploaded")
await progress("extracting_metadata", 40, "מחלץ מטא-דאטה (תקציר, תגיות)")
result = await precedent_metadata_extractor.extract_and_apply(case_law_id)
fields = result.get("fields") or []
msg = (
f"מולאו {len(fields)} שדות: {', '.join(fields)}"
if fields
else "לא נמצא מה למלא (כל השדות מאוכלסים או לא ניתן לחלץ)"
)
await progress("completed", 100, msg)
return result
async def delete_precedent(case_law_id: UUID | str) -> bool:
"""Delete a precedent and cascade chunks + halachot."""
if isinstance(case_law_id, str):