feat(precedents): metadata auto-fill, edit sheet, persuasive extraction
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m28s

Three improvements to the precedent library based on usage feedback:

1. Auto-fill metadata at upload time. New service
   precedent_metadata_extractor reads the ruling's full_text and
   suggests case_name (short), summary, headnote, key_quote,
   subject_tags, appeal_subtype. The merge policy fills only empty
   fields, preserving everything the chair typed in the upload form.
   Wired into the ingest pipeline; also exposed as a re-run endpoint
   POST /api/precedent-library/{id}/extract-metadata for existing
   records.

2. Edit sheet in the UI. Pencil icon on each library row opens a
   pre-populated form covering every field. A Sparkles button on the
   sheet runs the metadata extractor on demand and refreshes the
   form. The case_number is read-only because halachot are FK'd to
   it; renaming requires delete + re-upload.

3. Halacha extractor branches on is_binding. Sources marked binding
   (Supreme/Administrative) keep the strict halacha prompt. Non-binding
   sources (other appeals committees, district courts on planning
   matters) get a different prompt that extracts applications,
   interpretive principles, and persuasive conclusions — labeled with
   new rule_types 'application' and 'persuasive'. The fallback also
   widens chunk selection: if the chunker labeled nothing as
   legal_analysis/ruling/conclusion, we now run on all chunks rather
   than returning zero halachot for a usable ruling.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-03 10:19:35 +00:00
parent b51163b67c
commit 73a79ea7e8
10 changed files with 841 additions and 21 deletions

View File

@@ -1089,13 +1089,79 @@
"priority": "medium",
"subtasks": [],
"updatedAt": "2026-05-03T08:36:24.711Z"
},
{
"id": "9",
"title": "Service: precedent_metadata_extractor.py",
"description": "LLM-based extractor that auto-fills empty metadata fields after upload: short case_name (e.g. 'אהרון ברק' from long citation), summary (2-3 sentences), headnote, key_quote, subject_tags array, appeal_subtype. Reuses claude_session.query_json. Returns dict; caller decides which empty fields to merge (never overrides user values).",
"details": "",
"testStrategy": "",
"status": "done",
"dependencies": [],
"priority": "high",
"subtasks": [],
"updatedAt": "2026-05-03T10:19:15.105Z"
},
{
"id": "10",
"title": "Halacha extractor: dual mode (binding vs persuasive)",
"description": "Update halacha_extractor.py prompt to branch on is_binding: binding=true → strict halacha extraction (current). binding=false → extract reasoning principles, applications of established halachot, persuasive conclusions. New rule_types: 'application' (applying known rule to facts), 'persuasive' (committee's reasoning citable as authority). Schema unchanged (rule_type already TEXT).",
"details": "",
"testStrategy": "",
"status": "done",
"dependencies": [],
"priority": "high",
"subtasks": [],
"updatedAt": "2026-05-03T10:19:15.117Z"
},
{
"id": "11",
"title": "Ingest pipeline: add metadata extraction stage",
"description": "In services/precedent_library.py:ingest_precedent, after halacha extraction, run metadata_extractor and PATCH the case_law row with auto-filled fields (only those left empty by user). Publish progress 'extracting_metadata'.",
"details": "",
"testStrategy": "",
"status": "done",
"dependencies": [
"9"
],
"priority": "high",
"subtasks": [],
"updatedAt": "2026-05-03T10:19:15.128Z"
},
{
"id": "12",
"title": "UI: precedent edit sheet",
"description": "Add edit button to library-list-panel rows that opens a Sheet with all editable fields (case_name, citation, court, date, practice_area, appeal_subtype, subject_tags, summary, headnote, key_quote, source_type, precedent_level, is_binding). Pre-populated from current values. Submit calls PATCH /api/precedent-library/{id} via useUpdatePrecedent. After save, invalidate library list query.",
"details": "",
"testStrategy": "",
"status": "done",
"dependencies": [],
"priority": "high",
"subtasks": [],
"updatedAt": "2026-05-03T10:19:15.134Z"
},
{
"id": "13",
"title": "Test on 403-17: fix metadata + re-extract",
"description": "After deploy: PATCH 403-17 to set case_name='ערר 403/17', then trigger precedent_extract_halachot to test the dual-mode extraction on a non-binding committee decision.",
"details": "",
"testStrategy": "",
"status": "pending",
"dependencies": [
"9",
"10",
"11",
"12"
],
"priority": "medium",
"subtasks": []
}
],
"metadata": {
"version": "1.0.0",
"lastModified": "2026-05-03T08:36:24.711Z",
"taskCount": 8,
"completedCount": 8,
"lastModified": "2026-05-03T10:19:15.134Z",
"taskCount": 13,
"completedCount": 12,
"tags": [
"legal-ai"
]

View File

@@ -210,6 +210,12 @@ async def precedent_extract_halachot(case_law_id: str) -> str:
return await plib.precedent_extract_halachot(case_law_id)
@mcp.tool()
async def precedent_extract_metadata(case_law_id: str) -> str:
"""חילוץ מטא-דאטה (case_name קצר, summary, headnote, key_quote, subject_tags, appeal_subtype) מהטקסט. ממלא רק שדות ריקים."""
return await plib.precedent_extract_metadata(case_law_id)
@mcp.tool()
async def search_precedent_library(
query: str,

View File

@@ -41,7 +41,23 @@ CHUNK_RETRY_ATTEMPTS = 1
EXTRACTABLE_SECTIONS = ("legal_analysis", "ruling", "conclusion")
HALACHA_EXTRACTION_PROMPT = """אתה משפטן בכיר המתמחה בדיני תכנון ובניה (ועדות ערר, היטל השבחה, פיצויים לפי סעיף 197 לחוק התכנון והבניה). תפקידך: לחלץ הלכות מחייבות מתוך פסק דין/החלטה משפטית.
# Two prompts — choose by source's is_binding flag.
#
# The binding prompt extracts strict halachot (rules a future panel MUST
# follow). It rejects obiter dicta, factual findings, and citations of
# other rulings that the present court only mentioned in passing.
#
# The persuasive prompt is for sources that don't establish binding law
# (most appeals committee decisions, district courts on planning matters,
# etc.). For those, the value is in **how the panel reasoned and applied**
# established law to facts — not in new halachot. The user explicitly
# wants to be able to cite "another committee reached the same conclusion"
# even though it is not binding.
#
# The schema's rule_type field accepts six values:
# binding | interpretive | procedural | obiter | application | persuasive
HALACHA_EXTRACTION_PROMPT_BINDING = """אתה משפטן בכיר המתמחה בדיני תכנון ובניה (ועדות ערר, היטל השבחה, פיצויים לפי סעיף 197 לחוק התכנון והבניה). תפקידך: לחלץ הלכות מחייבות מתוך פסק דין/החלטה משפטית של ערכאה עליונה (עליון / מנהלי).
## הגדרות מחייבות
@@ -94,8 +110,60 @@ HALACHA_EXTRACTION_PROMPT = """אתה משפטן בכיר המתמחה בדינ
"""
HALACHA_EXTRACTION_PROMPT_PERSUASIVE = """אתה משפטן בכיר המתמחה בדיני תכנון ובניה. תפקידך: לחלץ עקרונות, יישומים ומסקנות מתוך החלטה של ועדת ערר אחרת או של בית משפט שאינו ערכאה עליונה לסוגיה.
## חשוב — מה לחלץ ומה לא
המקור הזה **אינו** מקור להלכות מחייבות חדשות (binding rules). הלכות מחייבות מגיעות מהעליון/מנהלי. עם זאת, יש כאן ערך משמעותי שצריך לחלץ — איך הפנל הזה ניתח ויישם את הדין הקיים. כשנכתוב החלטה עתידית, נצטט מהמקור הזה כ"גם ועדת הערר ב-X הגיעה למסקנה דומה" — לא כסמכות מחייבת, אלא כתמיכה משכנעת.
**יש לחלץ:**
- **יישום של הלכה ידועה** (rule_type=`application`) — הפנל החיל הלכה ידועה (של עליון/מנהלי) על עובדות הנידונות. תצטט את ניסוח הכלל **כפי שהוצג כאן** (לא בהכרח כפי שנקבע במקור) ואת התוצאה.
- **עקרון פרשני שאומץ** (rule_type=`interpretive`) — איך הפנל פירש סעיף חוק / תכנית, באופן שניתן לאמץ.
- **כלל פרוצדורלי** (rule_type=`procedural`) — קביעות בנושאי סמכות, מועדים, הליך.
- **מסקנה מנומקת ומשכנעת** (rule_type=`persuasive`) — מסקנה שלמה של הפנל בסוגיה, עם ההיגיון התומך, ניתנת לציטוט כאסמכתא משכנעת.
**אין לחלץ:**
- ממצאים עובדתיים ספציפיים לתיק ("העורר לא הוכיח X").
- ציטוטים מפסקי דין אחרים ללא ניתוח של הפנל.
- אמרות אגב חסרות חשיבות.
## תחומים אפשריים (practice_areas) — תחומי ועדת הערר בלבד
- rishuy_uvniya — רישוי ובניה (תיקי 1xxx: היתרים, שימוש חורג, תכניות, קווי בניין, גובה, חניה)
- betterment_levy — היטל השבחה (תיקי 8xxx: שומה, מערכות, תכניות המקנות בה, מועד קובע, סופיות ההחלטה)
- compensation_197 — פיצויים לפי ס' 197 (תיקי 9xxx: פגיעה במקרקעין, ירידת ערך, ס' 200/פטור)
## פלט נדרש
החזר JSON array בלבד, ללא markdown, ללא הסברים:
[
{
"rule_statement": "ניסוח הכלל / המסקנה / היישום בלשון משפטית מדויקת, 1-3 משפטים.",
"rule_type": "application",
"reasoning_summary": "תמצית ההיגיון של הפנל (1-2 משפטים).",
"supporting_quote": "ציטוט מילולי מדויק מהקלט שתומך בכלל. חייב להופיע מילה במילה.",
"page_reference": "פס' 12 / עמ' 8 — ככל שניתן לזהות.",
"practice_areas": ["betterment_levy"],
"subject_tags": ["מועד_קביעת_שומה", "תכנית_רחביה"],
"cites": ["עע\\"מ 3975/22"],
"confidence": 0.85
}
]
## כללי איכות
1. **נאמנות מוחלטת לציטוט** — supporting_quote חייב להיות הדבקה מדויקת מהקלט. אם אין ציטוט מתאים — אל תוסיף את ההלכה.
2. **מספר הלכות** — החלטה ארוכה של ועדת ערר יכולה להניב 2-8 פריטים (יישומים + מסקנות). אם אין מה לחלץ — החזר [].
3. **rule_type מדויק** — application = יישום הלכה ידועה. interpretive = פרשנות. procedural = פרוצדורה. persuasive = מסקנה כללית בעלת ערך כאסמכתא.
4. **לא לפצל יתר על המידה** — שני סעיפים זהים מבחינה רעיונית = פריט אחד.
5. **שפה** — עברית משפטית מקצועית, גוף שלישי.
6. **subject_tags** — 2-5 תגיות בעברית, snake_case.
7. **confidence** — 0..1. דייק.
"""
_VALID_PRACTICE_AREAS = {"rishuy_uvniya", "betterment_levy", "compensation_197"}
_VALID_RULE_TYPES = {"binding", "interpretive", "procedural", "obiter"}
_VALID_RULE_TYPES = {
"binding", "interpretive", "procedural", "obiter",
"application", "persuasive",
}
def _normalize_for_comparison(text: str) -> str:
@@ -135,10 +203,13 @@ def _verify_quote(supporting_quote: str, full_text: str) -> bool:
return False
def _coerce_halacha(raw: dict) -> dict | None:
def _coerce_halacha(raw: dict, is_binding: bool = True) -> dict | None:
"""Validate and normalize one LLM-returned halacha dict.
Returns ``None`` if the entry is missing required fields.
Returns ``None`` if the entry is missing required fields. ``is_binding``
only affects the default rule_type when the LLM returned an unknown
value — for binding sources we default to ``binding``, otherwise to
``persuasive`` (never pretend an appeals committee created halacha).
"""
if not isinstance(raw, dict):
return None
@@ -147,9 +218,13 @@ def _coerce_halacha(raw: dict) -> dict | None:
if not rule_statement or not supporting_quote:
return None
rule_type = (raw.get("rule_type") or "binding").strip().lower()
default_rule_type = "binding" if is_binding else "persuasive"
rule_type = (raw.get("rule_type") or default_rule_type).strip().lower()
if rule_type not in _VALID_RULE_TYPES:
rule_type = "binding"
rule_type = default_rule_type
# Guard: don't let a non-binding source produce 'binding' rule_type
if not is_binding and rule_type == "binding":
rule_type = "persuasive"
practice_areas_raw = raw.get("practice_areas") or []
if isinstance(practice_areas_raw, str):
@@ -191,11 +266,21 @@ async def _extract_chunk(
chunk_index: int,
chunk_total: int,
context: str,
is_binding: bool,
) -> list[dict]:
"""Run the halacha extractor on one chunk with retry."""
"""Run the halacha extractor on one chunk with retry.
The prompt branches on ``is_binding`` so that non-binding sources
(other appeals committees, district courts) yield application /
persuasive entries rather than a forced 0-result strict halacha pass.
"""
base_prompt = (
HALACHA_EXTRACTION_PROMPT_BINDING if is_binding
else HALACHA_EXTRACTION_PROMPT_PERSUASIVE
)
chunk_label = f" (חלק {chunk_index + 1}/{chunk_total})" if chunk_total > 1 else ""
prompt = (
f"{HALACHA_EXTRACTION_PROMPT}\n\n"
f"{base_prompt}\n\n"
f"## הקלט\n"
f"סוג קטע: {section_type}\n"
f"{context}{chunk_label}\n\n"
@@ -241,9 +326,24 @@ async def extract(case_law_id: UUID | str) -> dict:
if not record:
return {"status": "not_found", "extracted": 0, "stored": 0}
is_binding = bool(record.get("is_binding"))
# Try the targeted sections first (legal_analysis / ruling / conclusion).
# If the chunker labeled everything as 'other' (common when a ruling
# uses non-standard headings or the section markers aren't bracketed
# cleanly), fall back to ALL chunks — better to over-include than to
# silently skip a ruling that has reasoning under an unexpected label.
chunks = await db.list_precedent_chunks(
case_law_id, section_types=EXTRACTABLE_SECTIONS,
)
if not chunks:
chunks = await db.list_precedent_chunks(case_law_id)
if chunks:
logger.info(
"halacha_extractor: case_law=%s — no targeted sections, "
"falling back to all %d chunks",
case_law_id, len(chunks),
)
if not chunks:
await db.set_case_law_halacha_status(case_law_id, "completed")
return {"status": "no_chunks", "extracted": 0, "stored": 0}
@@ -262,7 +362,7 @@ async def extract(case_law_id: UUID | str) -> dict:
async with sem:
return await _extract_chunk(
chunk_row["content"], chunk_row["section_type"],
idx, len(chunks), context,
idx, len(chunks), context, is_binding,
)
chunk_results = await asyncio.gather(
@@ -281,7 +381,7 @@ async def extract(case_law_id: UUID | str) -> dict:
cleaned: list[dict] = []
for raw in raw_halachot:
coerced = _coerce_halacha(raw)
coerced = _coerce_halacha(raw, is_binding=is_binding)
if coerced is None:
continue
coerced["quote_verified"] = _verify_quote(

View File

@@ -28,6 +28,7 @@ from legal_mcp.services import (
embeddings,
extractor,
halacha_extractor,
precedent_metadata_extractor,
)
logger = logging.getLogger(__name__)
@@ -188,16 +189,27 @@ async def ingest_precedent(
]
stored_chunks = await db.store_precedent_chunks(case_law_id, chunk_dicts)
await progress("extracting_halachot", 75, "מחלץ הלכות מחייבות")
await db.set_case_law_extraction_status(case_law_id, "completed")
await progress("extracting_metadata", 65, "מחלץ מטא-דאטה (תקציר, תגיות)")
try:
metadata_result = await precedent_metadata_extractor.extract_and_apply(
case_law_id,
)
except Exception as e:
logger.warning("metadata extraction failed (non-fatal): %s", e)
metadata_result = {"status": "failed", "fields": []}
await progress("extracting_halachot", 80, "מחלץ הלכות / יישומים")
halacha_result = await halacha_extractor.extract(case_law_id)
await progress(
"completed",
100,
msg = (
f"הוכנס לספרייה: {stored_chunks} chunks, "
f"{halacha_result.get('stored', 0)} הלכות ממתינות לאישור",
f"{halacha_result.get('stored', 0)} פריטים ממתינים לאישור"
)
if metadata_result.get("fields"):
msg += f"; מולאו אוטומטית: {', '.join(metadata_result['fields'])}"
await progress("completed", 100, msg)
return {
"status": "completed",
@@ -206,6 +218,7 @@ async def ingest_precedent(
"halachot": halacha_result.get("stored", 0),
"halachot_extracted_raw": halacha_result.get("extracted", 0),
"halachot_verified": halacha_result.get("verified", 0),
"metadata_filled": metadata_result.get("fields", []),
"pages": page_count,
}
@@ -239,6 +252,36 @@ async def reextract_halachot(
return result
async def reextract_metadata(
case_law_id: UUID | str,
progress: ProgressCb | None = None,
) -> dict:
"""Re-run metadata extraction on an existing precedent.
Only fills empty fields (subject_tags, summary, headnote, key_quote,
appeal_subtype, and case_name when it equals the citation). User
values are preserved.
"""
progress = progress or _noop_progress
if isinstance(case_law_id, str):
case_law_id = UUID(case_law_id)
record = await db.get_case_law(case_law_id)
if not record or record.get("source_kind") != "external_upload":
raise ValueError("precedent not found or not chair-uploaded")
await progress("extracting_metadata", 40, "מחלץ מטא-דאטה (תקציר, תגיות)")
result = await precedent_metadata_extractor.extract_and_apply(case_law_id)
fields = result.get("fields") or []
msg = (
f"מולאו {len(fields)} שדות: {', '.join(fields)}"
if fields
else "לא נמצא מה למלא (כל השדות מאוכלסים או לא ניתן לחלץ)"
)
await progress("completed", 100, msg)
return result
async def delete_precedent(case_law_id: UUID | str) -> bool:
"""Delete a precedent and cascade chunks + halachot."""
if isinstance(case_law_id, str):

View File

@@ -0,0 +1,216 @@
"""Auto-extract precedent metadata from a freshly-uploaded ruling.
Runs after chunking. Reads the precedent's full_text and asks Claude to
fill in the metadata fields that an upload form usually leaves empty:
short case_name, summary, headnote, key_quote, subject_tags,
appeal_subtype.
Caller policy: only empty user-supplied fields are filled. Anything the
chair already typed in the upload form is preserved. This is enforced
in ``apply_to_record``.
"""
from __future__ import annotations
import logging
from uuid import UUID
from legal_mcp.config import parse_llm_json
from legal_mcp.services import claude_session, db
logger = logging.getLogger(__name__)
# The prompt is short — we only need the first 12K chars of the ruling
# (header + opening of discussion is enough for naming + summary). For
# subject tags we sample the discussion section too.
_HEAD_CHARS = 12_000
_TAIL_CHARS = 6_000
METADATA_EXTRACTION_PROMPT = """אתה מסייע משפטי בכיר. קרא את פסק הדין/ההחלטה הבא וחלץ ממנו מטא-דאטה לקטלוג הקורפוס.
המטרה: למלא שדות בטופס העלאה שהמשתמש הזין באופן חלקי. **אל תמציא** — אם המידע לא מופיע בטקסט, השאר ריק (מחרוזת ריקה / מערך ריק).
## פלט נדרש
החזר JSON אחד (object — לא array) בפורמט הבא, ללא markdown וללא הסברים:
{
"case_name_short": "שם קצר ל-3-6 מילים (למשל 'אהרון ברק' או 'ב. קרן-נכסים'). אל תכלול מספר תיק. שם המבקש/העורר העיקרי. אם זו החלטה מאוחדת — שם הצד המוביל.",
"appeal_subtype": "תת-סוג ספציפי בתוך תחום המשפט (למשל 'תכנית רחביה', 'מימוש במכר', 'תמ\\"א 38', 'שימוש חורג', 'סופיות ההחלטה'). מילה אחת או צירוף קצר.",
"summary": "תקציר עניני 2-3 משפטים: מה הייתה השאלה, מה הוכרע. בלי שיפוט.",
"headnote": "headnote בסגנון נבו: 1-2 משפטים שמסכמים את העיקרון שנקבע/יושם בפסק. למשל 'תכנית רחביה — היטל השבחה במימוש במכר — אין לחייב כשהזכויות צפות'.",
"key_quote": "ציטוט מילולי בודד, 30-100 מילים, שמייצג את לב הפסק. חייב להופיע מילה במילה בטקסט. אם אין ציטוט מתאים — מחרוזת ריקה.",
"subject_tags": ["תגיות", "נושא", "בעברית"]
}
## כללי איכות
1. **case_name_short** — שם בולט וקצר. בלי 'נ\\'' / 'נגד' / מספרי תיק.
2. **appeal_subtype** — אופציונלי. אם הסוגיה רחבה ולא מסווגת — השאר ריק.
3. **summary** — תיאור ניטרלי, גוף שלישי.
4. **headnote** — לא מצטטים, מסכמים. סגנון נבו: ביטוי קצר אחד.
5. **key_quote** — חייב להיות הדבקה מילולית מהקלט. אם אין ציטוט בולט — השאר ריק.
6. **subject_tags** — 3-7 תגיות בעברית, snake_case (חניה, קווי_בניין, שיקול_דעת, פגם_פרוצדורלי, סמכות, מועדים, פגיעה_במקרקעין, ירידת_ערך, תכנית_רחביה, מימוש_במכר, וכד'). שייך לתחום של ועדת ערר תכנון ובניה.
## הקלט
{context}
--- תחילת הטקסט ---
{text_window}
--- סוף הטקסט ---
"""
def _build_text_window(full_text: str) -> str:
"""Return the head + tail of the ruling, with a marker if truncated.
Most rulings have the parties/subject in the head and the conclusion
in the tail; the middle is the discussion which is captured via the
halacha extractor independently. Sending head+tail keeps the prompt
cheap while preserving naming and conclusion context.
"""
if len(full_text) <= _HEAD_CHARS + _TAIL_CHARS:
return full_text
return (
full_text[:_HEAD_CHARS]
+ "\n\n[... חלק האמצע הושמט עקב אורך — ראה את החלק האחרון של הפסק להלן ...]\n\n"
+ full_text[-_TAIL_CHARS:]
)
async def extract_metadata(case_law_id: UUID | str) -> dict:
"""Run metadata extraction. Returns a dict with the suggested values.
Does NOT write to the DB — caller decides what to merge.
"""
if isinstance(case_law_id, str):
case_law_id = UUID(case_law_id)
record = await db.get_case_law(case_law_id)
if not record:
return {}
full_text = (record.get("full_text") or "").strip()
if not full_text:
return {}
citation = record.get("case_number") or ""
court = record.get("court") or ""
date_str = str(record.get("date") or "")
practice_area = record.get("practice_area") or ""
context = (
f"מראה מקום: {citation}\n"
f"ערכאה: {court}\n"
f"תאריך: {date_str}\n"
f"תחום: {practice_area}"
)
prompt = METADATA_EXTRACTION_PROMPT.format(
context=context, text_window=_build_text_window(full_text),
)
try:
result = await claude_session.query_json(prompt)
except Exception as e:
logger.warning("precedent_metadata_extractor: query failed: %s", e)
return {}
if not isinstance(result, dict):
logger.warning(
"precedent_metadata_extractor: expected dict, got %s",
type(result).__name__,
)
return {}
# Normalize keys / types
out: dict = {}
if isinstance(result.get("case_name_short"), str):
out["case_name_short"] = result["case_name_short"].strip()
if isinstance(result.get("appeal_subtype"), str):
out["appeal_subtype"] = result["appeal_subtype"].strip()
if isinstance(result.get("summary"), str):
out["summary"] = result["summary"].strip()
if isinstance(result.get("headnote"), str):
out["headnote"] = result["headnote"].strip()
if isinstance(result.get("key_quote"), str):
out["key_quote"] = result["key_quote"].strip()
tags = result.get("subject_tags") or []
if isinstance(tags, list):
out["subject_tags"] = [str(t).strip() for t in tags if str(t).strip()]
return out
async def apply_to_record(
case_law_id: UUID | str,
suggested: dict,
) -> dict:
"""Merge suggested metadata into the case_law row, filling ONLY empty fields.
Empty rules:
- string field == "" → fill from suggested
- list field == [] → fill from suggested
- if suggested key is missing or empty, skip
case_name has special handling: if the current case_name equals the
case_number (a tell-tale sign of the upload form sending the long
citation into both fields), treat it as empty and overwrite.
"""
if isinstance(case_law_id, str):
case_law_id = UUID(case_law_id)
record = await db.get_case_law(case_law_id)
if not record:
return {"updated": False, "fields": []}
fields_to_update: dict = {}
cur_case_name = (record.get("case_name") or "").strip()
cur_case_number = (record.get("case_number") or "").strip()
suggested_case_name = (suggested.get("case_name_short") or "").strip()
if suggested_case_name and (
not cur_case_name or cur_case_name == cur_case_number
):
fields_to_update["case_name"] = suggested_case_name
if not (record.get("appeal_subtype") or "").strip():
s = (suggested.get("appeal_subtype") or "").strip()
if s:
fields_to_update["appeal_subtype"] = s
if not (record.get("summary") or "").strip():
s = (suggested.get("summary") or "").strip()
if s:
fields_to_update["summary"] = s
if not (record.get("headnote") or "").strip():
s = (suggested.get("headnote") or "").strip()
if s:
fields_to_update["headnote"] = s
if not (record.get("key_quote") or "").strip():
s = (suggested.get("key_quote") or "").strip()
if s:
fields_to_update["key_quote"] = s
cur_tags = record.get("subject_tags") or []
if not cur_tags:
sug_tags = suggested.get("subject_tags") or []
if sug_tags:
fields_to_update["subject_tags"] = sug_tags
if not fields_to_update:
return {"updated": False, "fields": []}
await db.update_case_law(case_law_id, **fields_to_update)
return {"updated": True, "fields": list(fields_to_update.keys())}
async def extract_and_apply(case_law_id: UUID | str) -> dict:
"""Convenience wrapper: extract → merge into row → return summary."""
suggested = await extract_metadata(case_law_id)
if not suggested:
return {"status": "no_metadata", "fields": []}
result = await apply_to_record(case_law_id, suggested)
return {
"status": "completed" if result["updated"] else "no_changes",
"fields": result["fields"],
"suggested": suggested,
}

View File

@@ -139,6 +139,19 @@ async def precedent_extract_halachot(case_law_id: str) -> str:
return _ok(result)
async def precedent_extract_metadata(case_law_id: str) -> str:
"""חילוץ מטא-דאטה (case_name קצר, summary, headnote, key_quote, subject_tags, appeal_subtype) מהטקסט. ממלא רק שדות ריקים — לא דורס מה שכבר הוזן."""
try:
cid = UUID(case_law_id)
except ValueError:
return _err("case_law_id לא תקין")
try:
result = await precedent_library.reextract_metadata(cid)
except Exception as e:
return _err(str(e))
return _ok(result)
async def search_precedent_library(
query: str,
practice_area: str = "",

View File

@@ -1,7 +1,7 @@
"use client";
import { useState } from "react";
import { Trash2, Plus, RefreshCw } from "lucide-react";
import { Trash2, Plus, RefreshCw, Pencil } from "lucide-react";
import { toast } from "sonner";
import {
Table, TableBody, TableCell, TableHead, TableHeader, TableRow,
@@ -22,6 +22,7 @@ import {
} from "@/lib/api/precedent-library";
import { PRACTICE_AREAS, PRECEDENT_LEVELS, practiceAreaShort } from "./practice-area";
import { PrecedentUploadSheet } from "./precedent-upload-sheet";
import { PrecedentEditSheet } from "./precedent-edit-sheet";
function formatDate(iso: string | null) {
if (!iso) return "—";
@@ -55,7 +56,12 @@ function StatusPill({ p }: { p: Precedent }) {
);
}
function PrecedentRow({ p }: { p: Precedent }) {
function PrecedentRow({
p, onEdit,
}: {
p: Precedent;
onEdit: (id: string) => void;
}) {
const del = useDeletePrecedent();
const reExtract = useReExtractHalachot();
@@ -105,6 +111,14 @@ function PrecedentRow({ p }: { p: Precedent }) {
</TableCell>
<TableCell className="text-end">
<div className="flex items-center gap-1 justify-end">
<Button
variant="ghost" size="sm" onClick={() => onEdit(p.id)}
aria-label={`ערוך את ${p.case_number}`}
title="ערוך פרטים"
className="text-ink-muted hover:text-navy"
>
<Pencil className="w-4 h-4" />
</Button>
<Button
variant="ghost" size="sm" onClick={onReExtract}
disabled={reExtract.isPending}
@@ -133,6 +147,7 @@ export function LibraryListPanel() {
const [precedentLevel, setPrecedentLevel] = useState("");
const [search, setSearch] = useState("");
const [uploadOpen, setUploadOpen] = useState(false);
const [editingId, setEditingId] = useState<string | null>(null);
const { data, isPending, error } = usePrecedents({
practiceArea: practiceArea || undefined,
@@ -222,7 +237,9 @@ export function LibraryListPanel() {
</TableCell>
</TableRow>
) : (
data.items.map((p) => <PrecedentRow key={p.id} p={p} />)
data.items.map((p) => (
<PrecedentRow key={p.id} p={p} onEdit={setEditingId} />
))
)}
</TableBody>
</Table>
@@ -230,6 +247,10 @@ export function LibraryListPanel() {
)}
<PrecedentUploadSheet open={uploadOpen} onOpenChange={setUploadOpen} />
<PrecedentEditSheet
caseLawId={editingId}
onOpenChange={(open) => { if (!open) setEditingId(null); }}
/>
</div>
);
}

View File

@@ -0,0 +1,309 @@
"use client";
import { useEffect, useState } from "react";
import { Save, Sparkles, Loader2 } from "lucide-react";
import { toast } from "sonner";
import {
Sheet, SheetContent, SheetHeader, SheetTitle, SheetDescription,
} from "@/components/ui/sheet";
import { Button } from "@/components/ui/button";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
import { Textarea } from "@/components/ui/textarea";
import { Skeleton } from "@/components/ui/skeleton";
import {
Select, SelectContent, SelectItem, SelectTrigger, SelectValue,
} from "@/components/ui/select";
import {
usePrecedent,
useUpdatePrecedent,
useReExtractMetadata,
type PracticeArea,
type SourceType,
} from "@/lib/api/precedent-library";
import { useProgress } from "@/lib/api/documents";
import {
PRACTICE_AREAS, PRECEDENT_LEVELS, SOURCE_TYPES,
} from "./practice-area";
type Props = {
caseLawId: string | null;
onOpenChange: (open: boolean) => void;
};
/* All editable fields. Pulled fresh from /api/precedent-library/{id}
* each time the sheet opens so the form reflects any auto-fill that
* happened in the background. */
type FormState = {
citation: string;
case_name: string;
court: string;
decision_date: string;
practice_area: PracticeArea;
appeal_subtype: string;
source_type: SourceType;
precedent_level: string;
is_binding: boolean;
subject_tags: string;
summary: string;
headnote: string;
key_quote: string;
};
const EMPTY: FormState = {
citation: "", case_name: "", court: "", decision_date: "",
practice_area: "", appeal_subtype: "", source_type: "",
precedent_level: "", is_binding: true, subject_tags: "",
summary: "", headnote: "", key_quote: "",
};
export function PrecedentEditSheet({ caseLawId, onOpenChange }: Props) {
const open = caseLawId !== null;
const { data: record, isPending } = usePrecedent(caseLawId);
const update = useUpdatePrecedent();
const reextractMeta = useReExtractMetadata();
const [form, setForm] = useState<FormState>(EMPTY);
const [metadataTaskId, setMetadataTaskId] = useState<string | null>(null);
const metadataProgress = useProgress(metadataTaskId);
// Hydrate form when the record loads.
useEffect(() => {
if (!record) return;
// eslint-disable-next-line react-hooks/set-state-in-effect
setForm({
citation: record.case_number || "",
case_name: record.case_name || "",
court: record.court || "",
decision_date: record.date ? record.date.slice(0, 10) : "",
practice_area: (record.practice_area || "") as PracticeArea,
appeal_subtype: record.appeal_subtype || "",
source_type: (record.source_type || "") as SourceType,
precedent_level: record.precedent_level || "",
is_binding: record.is_binding ?? true,
subject_tags: (record.subject_tags || []).join(", "),
summary: record.summary || "",
headnote: record.headnote || "",
key_quote: (record as { key_quote?: string }).key_quote || "",
});
}, [record]);
// Auto-close metadata progress on completion + refresh form
useEffect(() => {
if (metadataProgress?.status === "completed") {
toast.success("חילוץ מטא-דאטה הסתיים — השדות עודכנו");
setMetadataTaskId(null);
} else if (metadataProgress?.status === "failed") {
toast.error(`חילוץ מטא-דאטה נכשל: ${metadataProgress.error || ""}`);
setMetadataTaskId(null);
}
}, [metadataProgress]);
const onSubmit = async (e: React.FormEvent) => {
e.preventDefault();
if (!caseLawId) return;
try {
const patch: Record<string, unknown> = {
case_name: form.case_name.trim(),
court: form.court.trim(),
practice_area: form.practice_area || undefined,
appeal_subtype: form.appeal_subtype.trim(),
source_type: form.source_type || undefined,
precedent_level: form.precedent_level || undefined,
is_binding: form.is_binding,
subject_tags: form.subject_tags
.split(",").map((t) => t.trim()).filter(Boolean),
summary: form.summary.trim(),
headnote: form.headnote.trim(),
key_quote: form.key_quote.trim(),
};
if (form.decision_date) patch.decision_date = form.decision_date;
// citation (case_number) is the unique key; we don't allow editing it
// here to avoid orphaning halachot. To rename, delete + re-upload.
await update.mutateAsync({ id: caseLawId, patch });
toast.success("נשמר");
onOpenChange(false);
} catch (err) {
toast.error(err instanceof Error ? err.message : "שגיאה");
}
};
const onTriggerMetadata = async () => {
if (!caseLawId) return;
try {
const res = await reextractMeta.mutateAsync(caseLawId);
setMetadataTaskId(res.task_id);
toast.message("מחלץ מטא-דאטה ברקע…");
} catch (err) {
toast.error(err instanceof Error ? err.message : "שגיאה");
}
};
const isMetaRunning = metadataTaskId !== null
&& metadataProgress?.status !== "completed"
&& metadataProgress?.status !== "failed";
return (
<Sheet open={open} onOpenChange={(o) => { if (!o) onOpenChange(false); }}>
<SheetContent side="left" className="w-full sm:max-w-2xl overflow-y-auto" dir="rtl">
<SheetHeader>
<SheetTitle className="text-navy">עריכת פרטי פסיקה</SheetTitle>
<SheetDescription className="text-ink-muted">
כל השדות ניתנים לעריכה חוץ ממראה המקום (מזהה ייחודי).
כפתור &quot;חלץ מטא-דאטה אוטומטית&quot; מנתח את הטקסט וממלא רק שדות ריקים.
</SheetDescription>
</SheetHeader>
{isPending || !record ? (
<div className="px-6 pb-6 mt-4 space-y-3">
{[...Array(6)].map((_, i) => <Skeleton key={i} className="h-10 w-full" />)}
</div>
) : (
<form onSubmit={onSubmit} className="px-6 pb-6 space-y-4 mt-4">
<div className="rounded-lg border border-rule bg-rule-soft/40 p-3 flex items-start gap-3">
<div className="flex-1">
<div className="text-[0.78rem] text-ink-muted">מראה מקום (לא ניתן לעריכה)</div>
<div className="text-navy font-mono text-sm break-all" dir="ltr">
{record.case_number}
</div>
</div>
<Button
type="button" size="sm" variant="outline"
onClick={onTriggerMetadata}
disabled={isMetaRunning || reextractMeta.isPending}
className="shrink-0"
>
{isMetaRunning ? (
<Loader2 className="w-3.5 h-3.5 me-1 animate-spin" />
) : (
<Sparkles className="w-3.5 h-3.5 me-1" />
)}
חלץ מטא-דאטה אוטומטית
</Button>
</div>
{isMetaRunning && (metadataProgress as { step?: string } | null)?.step && (
<div className="text-[0.78rem] text-ink-muted">
{(metadataProgress as { step?: string }).step}
</div>
)}
<div className="grid grid-cols-2 gap-3">
<div className="space-y-1">
<Label htmlFor="case-name">שם קצר</Label>
<Input id="case-name" value={form.case_name}
onChange={(e) => setForm({ ...form, case_name: e.target.value })}
placeholder="ערר 403/17 / אהרון ברק" />
</div>
<div className="space-y-1">
<Label htmlFor="court">ערכאה</Label>
<Input id="court" value={form.court}
onChange={(e) => setForm({ ...form, court: e.target.value })} />
</div>
<div className="space-y-1">
<Label htmlFor="date">תאריך</Label>
<Input id="date" type="date" value={form.decision_date}
onChange={(e) => setForm({ ...form, decision_date: e.target.value })} />
</div>
<div className="space-y-1">
<Label htmlFor="appeal-subtype">תת-סוג</Label>
<Input id="appeal-subtype" value={form.appeal_subtype}
onChange={(e) => setForm({ ...form, appeal_subtype: e.target.value })}
placeholder="תכנית רחביה / סופיות ההחלטה" />
</div>
</div>
<div className="space-y-1">
<Label>תחום</Label>
<div className="flex gap-4 flex-wrap">
{PRACTICE_AREAS.map((a) => (
<label key={a.value} className="flex items-center gap-2 cursor-pointer">
<input type="radio" name="practice_area" value={a.value}
checked={form.practice_area === a.value}
onChange={() => setForm({ ...form, practice_area: a.value as PracticeArea })} />
<span className="text-sm">{a.label}</span>
</label>
))}
</div>
</div>
<div className="grid grid-cols-2 gap-3">
<div className="space-y-1">
<Label htmlFor="source-type">סוג מקור</Label>
<Select value={form.source_type || "_none"}
onValueChange={(v) => setForm({ ...form, source_type: v === "_none" ? "" : v as SourceType })}>
<SelectTrigger><SelectValue /></SelectTrigger>
<SelectContent>
<SelectItem value="_none"></SelectItem>
{SOURCE_TYPES.map((s) => (
<SelectItem key={s.value} value={s.value}>{s.label}</SelectItem>
))}
</SelectContent>
</Select>
</div>
<div className="space-y-1">
<Label htmlFor="precedent-level">רמת תקדים</Label>
<Select value={form.precedent_level || "_none"}
onValueChange={(v) => setForm({ ...form, precedent_level: v === "_none" ? "" : v })}>
<SelectTrigger><SelectValue /></SelectTrigger>
<SelectContent>
<SelectItem value="_none"></SelectItem>
{PRECEDENT_LEVELS.map((l) => (
<SelectItem key={l.value} value={l.value}>{l.label}</SelectItem>
))}
</SelectContent>
</Select>
</div>
</div>
<div className="space-y-1">
<Label htmlFor="tags">תגיות נושא (מופרדות בפסיקים)</Label>
<Input id="tags" value={form.subject_tags}
onChange={(e) => setForm({ ...form, subject_tags: e.target.value })}
placeholder="חניה, קווי בניין, שיקול דעת" />
</div>
<div className="space-y-1">
<Label htmlFor="summary">תקציר (2-3 משפטים)</Label>
<Textarea id="summary" value={form.summary} rows={3} dir="rtl"
onChange={(e) => setForm({ ...form, summary: e.target.value })} />
</div>
<div className="space-y-1">
<Label htmlFor="headnote">Headnote (משפט-שניים)</Label>
<Textarea id="headnote" value={form.headnote} rows={2} dir="rtl"
onChange={(e) => setForm({ ...form, headnote: e.target.value })} />
</div>
<div className="space-y-1">
<Label htmlFor="key-quote">ציטוט מרכזי</Label>
<Textarea id="key-quote" value={form.key_quote} rows={3} dir="rtl"
onChange={(e) => setForm({ ...form, key_quote: e.target.value })} />
</div>
<label className="flex items-center gap-2 cursor-pointer">
<input type="checkbox" checked={form.is_binding}
onChange={(e) => setForm({ ...form, is_binding: e.target.checked })} />
<span className="text-sm">הלכה מחייבת (binding)</span>
<span className="text-[0.7rem] text-ink-muted">
בדרך כלל רק עליון/מנהלי. ועדות ערר אחרות = לא מחייב.
</span>
</label>
<div className="flex gap-2 justify-end pt-2 border-t border-rule-soft">
<Button type="button" variant="ghost"
onClick={() => onOpenChange(false)} disabled={update.isPending}>
ביטול
</Button>
<Button type="submit" disabled={update.isPending}
className="bg-navy text-parchment hover:bg-navy-soft">
<Save className="w-4 h-4 me-1" />
שמור
</Button>
</div>
</form>
)}
</SheetContent>
</Sheet>
);
}

View File

@@ -350,6 +350,21 @@ export function useReExtractHalachot() {
});
}
export function useReExtractMetadata() {
const qc = useQueryClient();
return useMutation({
mutationFn: (id: string) =>
apiRequest<{ task_id: string }>(
`/api/precedent-library/${encodeURIComponent(id)}/extract-metadata`,
{ method: "POST" },
),
onSuccess: (_, id) => {
qc.invalidateQueries({ queryKey: libraryKeys.detail(id) });
qc.invalidateQueries({ queryKey: libraryKeys.all });
},
});
}
export function useHalachotPending(limit = 200) {
return useQuery({
queryKey: libraryKeys.halachotPending(),

View File

@@ -3779,6 +3779,37 @@ async def precedent_library_reextract(case_law_id: str):
return {"task_id": task_id}
@app.post("/api/precedent-library/{case_law_id}/extract-metadata")
async def precedent_library_extract_metadata(case_law_id: str):
"""Re-run metadata extraction in background. Fills empty fields only."""
try:
cid = UUID(case_law_id)
except ValueError:
raise HTTPException(400, "case_law_id לא תקין")
record = await db.get_case_law(cid)
if not record:
raise HTTPException(404, "פסיקה לא נמצאה")
task_id = str(uuid4())
label = record.get("case_number") or case_law_id
await _progress.set(task_id, {
"status": "queued", "filename": label, "stage": "queued", "percent": 0,
})
publish = _make_progress_publisher(task_id, label)
async def _run():
try:
await plib_service.reextract_metadata(cid, progress=publish)
except Exception as e:
logger.exception("re-extract metadata failed")
await _progress.set(task_id, {
"status": "failed", "error": str(e), "filename": label,
})
asyncio.create_task(_run())
return {"task_id": task_id}
@app.get("/api/halachot")
async def halachot_list(
case_law_id: str = "",