From 73a79ea7e82dec6a98bba134a3b449e50411db8b Mon Sep 17 00:00:00 2001 From: Chaim Date: Sun, 3 May 2026 10:19:35 +0000 Subject: [PATCH] feat(precedents): metadata auto-fill, edit sheet, persuasive extraction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three improvements to the precedent library based on usage feedback: 1. Auto-fill metadata at upload time. New service precedent_metadata_extractor reads the ruling's full_text and suggests case_name (short), summary, headnote, key_quote, subject_tags, appeal_subtype. The merge policy fills only empty fields, preserving everything the chair typed in the upload form. Wired into the ingest pipeline; also exposed as a re-run endpoint POST /api/precedent-library/{id}/extract-metadata for existing records. 2. Edit sheet in the UI. Pencil icon on each library row opens a pre-populated form covering every field. A Sparkles button on the sheet runs the metadata extractor on demand and refreshes the form. The case_number is read-only because halachot are FK'd to it; renaming requires delete + re-upload. 3. Halacha extractor branches on is_binding. Sources marked binding (Supreme/Administrative) keep the strict halacha prompt. Non-binding sources (other appeals committees, district courts on planning matters) get a different prompt that extracts applications, interpretive principles, and persuasive conclusions — labeled with new rule_types 'application' and 'persuasive'. The fallback also widens chunk selection: if the chunker labeled nothing as legal_analysis/ruling/conclusion, we now run on all chunks rather than returning zero halachot for a usable ruling. Co-Authored-By: Claude Opus 4.7 (1M context) --- .taskmaster/tasks/tasks.json | 72 +++- mcp-server/src/legal_mcp/server.py | 6 + .../legal_mcp/services/halacha_extractor.py | 120 ++++++- .../legal_mcp/services/precedent_library.py | 53 ++- .../services/precedent_metadata_extractor.py | 216 ++++++++++++ .../src/legal_mcp/tools/precedent_library.py | 13 + .../precedents/library-list-panel.tsx | 27 +- .../precedents/precedent-edit-sheet.tsx | 309 ++++++++++++++++++ web-ui/src/lib/api/precedent-library.ts | 15 + web/app.py | 31 ++ 10 files changed, 841 insertions(+), 21 deletions(-) create mode 100644 mcp-server/src/legal_mcp/services/precedent_metadata_extractor.py create mode 100644 web-ui/src/components/precedents/precedent-edit-sheet.tsx diff --git a/.taskmaster/tasks/tasks.json b/.taskmaster/tasks/tasks.json index c150ffd..2f4c4ed 100644 --- a/.taskmaster/tasks/tasks.json +++ b/.taskmaster/tasks/tasks.json @@ -1089,13 +1089,79 @@ "priority": "medium", "subtasks": [], "updatedAt": "2026-05-03T08:36:24.711Z" + }, + { + "id": "9", + "title": "Service: precedent_metadata_extractor.py", + "description": "LLM-based extractor that auto-fills empty metadata fields after upload: short case_name (e.g. 'אהרון ברק' from long citation), summary (2-3 sentences), headnote, key_quote, subject_tags array, appeal_subtype. Reuses claude_session.query_json. Returns dict; caller decides which empty fields to merge (never overrides user values).", + "details": "", + "testStrategy": "", + "status": "done", + "dependencies": [], + "priority": "high", + "subtasks": [], + "updatedAt": "2026-05-03T10:19:15.105Z" + }, + { + "id": "10", + "title": "Halacha extractor: dual mode (binding vs persuasive)", + "description": "Update halacha_extractor.py prompt to branch on is_binding: binding=true → strict halacha extraction (current). binding=false → extract reasoning principles, applications of established halachot, persuasive conclusions. New rule_types: 'application' (applying known rule to facts), 'persuasive' (committee's reasoning citable as authority). Schema unchanged (rule_type already TEXT).", + "details": "", + "testStrategy": "", + "status": "done", + "dependencies": [], + "priority": "high", + "subtasks": [], + "updatedAt": "2026-05-03T10:19:15.117Z" + }, + { + "id": "11", + "title": "Ingest pipeline: add metadata extraction stage", + "description": "In services/precedent_library.py:ingest_precedent, after halacha extraction, run metadata_extractor and PATCH the case_law row with auto-filled fields (only those left empty by user). Publish progress 'extracting_metadata'.", + "details": "", + "testStrategy": "", + "status": "done", + "dependencies": [ + "9" + ], + "priority": "high", + "subtasks": [], + "updatedAt": "2026-05-03T10:19:15.128Z" + }, + { + "id": "12", + "title": "UI: precedent edit sheet", + "description": "Add edit button to library-list-panel rows that opens a Sheet with all editable fields (case_name, citation, court, date, practice_area, appeal_subtype, subject_tags, summary, headnote, key_quote, source_type, precedent_level, is_binding). Pre-populated from current values. Submit calls PATCH /api/precedent-library/{id} via useUpdatePrecedent. After save, invalidate library list query.", + "details": "", + "testStrategy": "", + "status": "done", + "dependencies": [], + "priority": "high", + "subtasks": [], + "updatedAt": "2026-05-03T10:19:15.134Z" + }, + { + "id": "13", + "title": "Test on 403-17: fix metadata + re-extract", + "description": "After deploy: PATCH 403-17 to set case_name='ערר 403/17', then trigger precedent_extract_halachot to test the dual-mode extraction on a non-binding committee decision.", + "details": "", + "testStrategy": "", + "status": "pending", + "dependencies": [ + "9", + "10", + "11", + "12" + ], + "priority": "medium", + "subtasks": [] } ], "metadata": { "version": "1.0.0", - "lastModified": "2026-05-03T08:36:24.711Z", - "taskCount": 8, - "completedCount": 8, + "lastModified": "2026-05-03T10:19:15.134Z", + "taskCount": 13, + "completedCount": 12, "tags": [ "legal-ai" ] diff --git a/mcp-server/src/legal_mcp/server.py b/mcp-server/src/legal_mcp/server.py index f81591c..aaaf101 100644 --- a/mcp-server/src/legal_mcp/server.py +++ b/mcp-server/src/legal_mcp/server.py @@ -210,6 +210,12 @@ async def precedent_extract_halachot(case_law_id: str) -> str: return await plib.precedent_extract_halachot(case_law_id) +@mcp.tool() +async def precedent_extract_metadata(case_law_id: str) -> str: + """חילוץ מטא-דאטה (case_name קצר, summary, headnote, key_quote, subject_tags, appeal_subtype) מהטקסט. ממלא רק שדות ריקים.""" + return await plib.precedent_extract_metadata(case_law_id) + + @mcp.tool() async def search_precedent_library( query: str, diff --git a/mcp-server/src/legal_mcp/services/halacha_extractor.py b/mcp-server/src/legal_mcp/services/halacha_extractor.py index e4d09f0..9917615 100644 --- a/mcp-server/src/legal_mcp/services/halacha_extractor.py +++ b/mcp-server/src/legal_mcp/services/halacha_extractor.py @@ -41,7 +41,23 @@ CHUNK_RETRY_ATTEMPTS = 1 EXTRACTABLE_SECTIONS = ("legal_analysis", "ruling", "conclusion") -HALACHA_EXTRACTION_PROMPT = """אתה משפטן בכיר המתמחה בדיני תכנון ובניה (ועדות ערר, היטל השבחה, פיצויים לפי סעיף 197 לחוק התכנון והבניה). תפקידך: לחלץ הלכות מחייבות מתוך פסק דין/החלטה משפטית. +# Two prompts — choose by source's is_binding flag. +# +# The binding prompt extracts strict halachot (rules a future panel MUST +# follow). It rejects obiter dicta, factual findings, and citations of +# other rulings that the present court only mentioned in passing. +# +# The persuasive prompt is for sources that don't establish binding law +# (most appeals committee decisions, district courts on planning matters, +# etc.). For those, the value is in **how the panel reasoned and applied** +# established law to facts — not in new halachot. The user explicitly +# wants to be able to cite "another committee reached the same conclusion" +# even though it is not binding. +# +# The schema's rule_type field accepts six values: +# binding | interpretive | procedural | obiter | application | persuasive + +HALACHA_EXTRACTION_PROMPT_BINDING = """אתה משפטן בכיר המתמחה בדיני תכנון ובניה (ועדות ערר, היטל השבחה, פיצויים לפי סעיף 197 לחוק התכנון והבניה). תפקידך: לחלץ הלכות מחייבות מתוך פסק דין/החלטה משפטית של ערכאה עליונה (עליון / מנהלי). ## הגדרות מחייבות @@ -94,8 +110,60 @@ HALACHA_EXTRACTION_PROMPT = """אתה משפטן בכיר המתמחה בדינ """ +HALACHA_EXTRACTION_PROMPT_PERSUASIVE = """אתה משפטן בכיר המתמחה בדיני תכנון ובניה. תפקידך: לחלץ עקרונות, יישומים ומסקנות מתוך החלטה של ועדת ערר אחרת או של בית משפט שאינו ערכאה עליונה לסוגיה. + +## חשוב — מה לחלץ ומה לא + +המקור הזה **אינו** מקור להלכות מחייבות חדשות (binding rules). הלכות מחייבות מגיעות מהעליון/מנהלי. עם זאת, יש כאן ערך משמעותי שצריך לחלץ — איך הפנל הזה ניתח ויישם את הדין הקיים. כשנכתוב החלטה עתידית, נצטט מהמקור הזה כ"גם ועדת הערר ב-X הגיעה למסקנה דומה" — לא כסמכות מחייבת, אלא כתמיכה משכנעת. + +**יש לחלץ:** +- **יישום של הלכה ידועה** (rule_type=`application`) — הפנל החיל הלכה ידועה (של עליון/מנהלי) על עובדות הנידונות. תצטט את ניסוח הכלל **כפי שהוצג כאן** (לא בהכרח כפי שנקבע במקור) ואת התוצאה. +- **עקרון פרשני שאומץ** (rule_type=`interpretive`) — איך הפנל פירש סעיף חוק / תכנית, באופן שניתן לאמץ. +- **כלל פרוצדורלי** (rule_type=`procedural`) — קביעות בנושאי סמכות, מועדים, הליך. +- **מסקנה מנומקת ומשכנעת** (rule_type=`persuasive`) — מסקנה שלמה של הפנל בסוגיה, עם ההיגיון התומך, ניתנת לציטוט כאסמכתא משכנעת. + +**אין לחלץ:** +- ממצאים עובדתיים ספציפיים לתיק ("העורר לא הוכיח X"). +- ציטוטים מפסקי דין אחרים ללא ניתוח של הפנל. +- אמרות אגב חסרות חשיבות. + +## תחומים אפשריים (practice_areas) — תחומי ועדת הערר בלבד +- rishuy_uvniya — רישוי ובניה (תיקי 1xxx: היתרים, שימוש חורג, תכניות, קווי בניין, גובה, חניה) +- betterment_levy — היטל השבחה (תיקי 8xxx: שומה, מערכות, תכניות המקנות בה, מועד קובע, סופיות ההחלטה) +- compensation_197 — פיצויים לפי ס' 197 (תיקי 9xxx: פגיעה במקרקעין, ירידת ערך, ס' 200/פטור) + +## פלט נדרש +החזר JSON array בלבד, ללא markdown, ללא הסברים: +[ + { + "rule_statement": "ניסוח הכלל / המסקנה / היישום בלשון משפטית מדויקת, 1-3 משפטים.", + "rule_type": "application", + "reasoning_summary": "תמצית ההיגיון של הפנל (1-2 משפטים).", + "supporting_quote": "ציטוט מילולי מדויק מהקלט שתומך בכלל. חייב להופיע מילה במילה.", + "page_reference": "פס' 12 / עמ' 8 — ככל שניתן לזהות.", + "practice_areas": ["betterment_levy"], + "subject_tags": ["מועד_קביעת_שומה", "תכנית_רחביה"], + "cites": ["עע\\"מ 3975/22"], + "confidence": 0.85 + } +] + +## כללי איכות +1. **נאמנות מוחלטת לציטוט** — supporting_quote חייב להיות הדבקה מדויקת מהקלט. אם אין ציטוט מתאים — אל תוסיף את ההלכה. +2. **מספר הלכות** — החלטה ארוכה של ועדת ערר יכולה להניב 2-8 פריטים (יישומים + מסקנות). אם אין מה לחלץ — החזר []. +3. **rule_type מדויק** — application = יישום הלכה ידועה. interpretive = פרשנות. procedural = פרוצדורה. persuasive = מסקנה כללית בעלת ערך כאסמכתא. +4. **לא לפצל יתר על המידה** — שני סעיפים זהים מבחינה רעיונית = פריט אחד. +5. **שפה** — עברית משפטית מקצועית, גוף שלישי. +6. **subject_tags** — 2-5 תגיות בעברית, snake_case. +7. **confidence** — 0..1. דייק. +""" + + _VALID_PRACTICE_AREAS = {"rishuy_uvniya", "betterment_levy", "compensation_197"} -_VALID_RULE_TYPES = {"binding", "interpretive", "procedural", "obiter"} +_VALID_RULE_TYPES = { + "binding", "interpretive", "procedural", "obiter", + "application", "persuasive", +} def _normalize_for_comparison(text: str) -> str: @@ -135,10 +203,13 @@ def _verify_quote(supporting_quote: str, full_text: str) -> bool: return False -def _coerce_halacha(raw: dict) -> dict | None: +def _coerce_halacha(raw: dict, is_binding: bool = True) -> dict | None: """Validate and normalize one LLM-returned halacha dict. - Returns ``None`` if the entry is missing required fields. + Returns ``None`` if the entry is missing required fields. ``is_binding`` + only affects the default rule_type when the LLM returned an unknown + value — for binding sources we default to ``binding``, otherwise to + ``persuasive`` (never pretend an appeals committee created halacha). """ if not isinstance(raw, dict): return None @@ -147,9 +218,13 @@ def _coerce_halacha(raw: dict) -> dict | None: if not rule_statement or not supporting_quote: return None - rule_type = (raw.get("rule_type") or "binding").strip().lower() + default_rule_type = "binding" if is_binding else "persuasive" + rule_type = (raw.get("rule_type") or default_rule_type).strip().lower() if rule_type not in _VALID_RULE_TYPES: - rule_type = "binding" + rule_type = default_rule_type + # Guard: don't let a non-binding source produce 'binding' rule_type + if not is_binding and rule_type == "binding": + rule_type = "persuasive" practice_areas_raw = raw.get("practice_areas") or [] if isinstance(practice_areas_raw, str): @@ -191,11 +266,21 @@ async def _extract_chunk( chunk_index: int, chunk_total: int, context: str, + is_binding: bool, ) -> list[dict]: - """Run the halacha extractor on one chunk with retry.""" + """Run the halacha extractor on one chunk with retry. + + The prompt branches on ``is_binding`` so that non-binding sources + (other appeals committees, district courts) yield application / + persuasive entries rather than a forced 0-result strict halacha pass. + """ + base_prompt = ( + HALACHA_EXTRACTION_PROMPT_BINDING if is_binding + else HALACHA_EXTRACTION_PROMPT_PERSUASIVE + ) chunk_label = f" (חלק {chunk_index + 1}/{chunk_total})" if chunk_total > 1 else "" prompt = ( - f"{HALACHA_EXTRACTION_PROMPT}\n\n" + f"{base_prompt}\n\n" f"## הקלט\n" f"סוג קטע: {section_type}\n" f"{context}{chunk_label}\n\n" @@ -241,9 +326,24 @@ async def extract(case_law_id: UUID | str) -> dict: if not record: return {"status": "not_found", "extracted": 0, "stored": 0} + is_binding = bool(record.get("is_binding")) + + # Try the targeted sections first (legal_analysis / ruling / conclusion). + # If the chunker labeled everything as 'other' (common when a ruling + # uses non-standard headings or the section markers aren't bracketed + # cleanly), fall back to ALL chunks — better to over-include than to + # silently skip a ruling that has reasoning under an unexpected label. chunks = await db.list_precedent_chunks( case_law_id, section_types=EXTRACTABLE_SECTIONS, ) + if not chunks: + chunks = await db.list_precedent_chunks(case_law_id) + if chunks: + logger.info( + "halacha_extractor: case_law=%s — no targeted sections, " + "falling back to all %d chunks", + case_law_id, len(chunks), + ) if not chunks: await db.set_case_law_halacha_status(case_law_id, "completed") return {"status": "no_chunks", "extracted": 0, "stored": 0} @@ -262,7 +362,7 @@ async def extract(case_law_id: UUID | str) -> dict: async with sem: return await _extract_chunk( chunk_row["content"], chunk_row["section_type"], - idx, len(chunks), context, + idx, len(chunks), context, is_binding, ) chunk_results = await asyncio.gather( @@ -281,7 +381,7 @@ async def extract(case_law_id: UUID | str) -> dict: cleaned: list[dict] = [] for raw in raw_halachot: - coerced = _coerce_halacha(raw) + coerced = _coerce_halacha(raw, is_binding=is_binding) if coerced is None: continue coerced["quote_verified"] = _verify_quote( diff --git a/mcp-server/src/legal_mcp/services/precedent_library.py b/mcp-server/src/legal_mcp/services/precedent_library.py index 32a035c..f2921f7 100644 --- a/mcp-server/src/legal_mcp/services/precedent_library.py +++ b/mcp-server/src/legal_mcp/services/precedent_library.py @@ -28,6 +28,7 @@ from legal_mcp.services import ( embeddings, extractor, halacha_extractor, + precedent_metadata_extractor, ) logger = logging.getLogger(__name__) @@ -188,16 +189,27 @@ async def ingest_precedent( ] stored_chunks = await db.store_precedent_chunks(case_law_id, chunk_dicts) - await progress("extracting_halachot", 75, "מחלץ הלכות מחייבות") await db.set_case_law_extraction_status(case_law_id, "completed") + + await progress("extracting_metadata", 65, "מחלץ מטא-דאטה (תקציר, תגיות)") + try: + metadata_result = await precedent_metadata_extractor.extract_and_apply( + case_law_id, + ) + except Exception as e: + logger.warning("metadata extraction failed (non-fatal): %s", e) + metadata_result = {"status": "failed", "fields": []} + + await progress("extracting_halachot", 80, "מחלץ הלכות / יישומים") halacha_result = await halacha_extractor.extract(case_law_id) - await progress( - "completed", - 100, + msg = ( f"הוכנס לספרייה: {stored_chunks} chunks, " - f"{halacha_result.get('stored', 0)} הלכות ממתינות לאישור", + f"{halacha_result.get('stored', 0)} פריטים ממתינים לאישור" ) + if metadata_result.get("fields"): + msg += f"; מולאו אוטומטית: {', '.join(metadata_result['fields'])}" + await progress("completed", 100, msg) return { "status": "completed", @@ -206,6 +218,7 @@ async def ingest_precedent( "halachot": halacha_result.get("stored", 0), "halachot_extracted_raw": halacha_result.get("extracted", 0), "halachot_verified": halacha_result.get("verified", 0), + "metadata_filled": metadata_result.get("fields", []), "pages": page_count, } @@ -239,6 +252,36 @@ async def reextract_halachot( return result +async def reextract_metadata( + case_law_id: UUID | str, + progress: ProgressCb | None = None, +) -> dict: + """Re-run metadata extraction on an existing precedent. + + Only fills empty fields (subject_tags, summary, headnote, key_quote, + appeal_subtype, and case_name when it equals the citation). User + values are preserved. + """ + progress = progress or _noop_progress + if isinstance(case_law_id, str): + case_law_id = UUID(case_law_id) + + record = await db.get_case_law(case_law_id) + if not record or record.get("source_kind") != "external_upload": + raise ValueError("precedent not found or not chair-uploaded") + + await progress("extracting_metadata", 40, "מחלץ מטא-דאטה (תקציר, תגיות)") + result = await precedent_metadata_extractor.extract_and_apply(case_law_id) + fields = result.get("fields") or [] + msg = ( + f"מולאו {len(fields)} שדות: {', '.join(fields)}" + if fields + else "לא נמצא מה למלא (כל השדות מאוכלסים או לא ניתן לחלץ)" + ) + await progress("completed", 100, msg) + return result + + async def delete_precedent(case_law_id: UUID | str) -> bool: """Delete a precedent and cascade chunks + halachot.""" if isinstance(case_law_id, str): diff --git a/mcp-server/src/legal_mcp/services/precedent_metadata_extractor.py b/mcp-server/src/legal_mcp/services/precedent_metadata_extractor.py new file mode 100644 index 0000000..29f5122 --- /dev/null +++ b/mcp-server/src/legal_mcp/services/precedent_metadata_extractor.py @@ -0,0 +1,216 @@ +"""Auto-extract precedent metadata from a freshly-uploaded ruling. + +Runs after chunking. Reads the precedent's full_text and asks Claude to +fill in the metadata fields that an upload form usually leaves empty: +short case_name, summary, headnote, key_quote, subject_tags, +appeal_subtype. + +Caller policy: only empty user-supplied fields are filled. Anything the +chair already typed in the upload form is preserved. This is enforced +in ``apply_to_record``. +""" + +from __future__ import annotations + +import logging +from uuid import UUID + +from legal_mcp.config import parse_llm_json +from legal_mcp.services import claude_session, db + +logger = logging.getLogger(__name__) + + +# The prompt is short — we only need the first 12K chars of the ruling +# (header + opening of discussion is enough for naming + summary). For +# subject tags we sample the discussion section too. +_HEAD_CHARS = 12_000 +_TAIL_CHARS = 6_000 + + +METADATA_EXTRACTION_PROMPT = """אתה מסייע משפטי בכיר. קרא את פסק הדין/ההחלטה הבא וחלץ ממנו מטא-דאטה לקטלוג הקורפוס. + +המטרה: למלא שדות בטופס העלאה שהמשתמש הזין באופן חלקי. **אל תמציא** — אם המידע לא מופיע בטקסט, השאר ריק (מחרוזת ריקה / מערך ריק). + +## פלט נדרש +החזר JSON אחד (object — לא array) בפורמט הבא, ללא markdown וללא הסברים: + +{ + "case_name_short": "שם קצר ל-3-6 מילים (למשל 'אהרון ברק' או 'ב. קרן-נכסים'). אל תכלול מספר תיק. שם המבקש/העורר העיקרי. אם זו החלטה מאוחדת — שם הצד המוביל.", + "appeal_subtype": "תת-סוג ספציפי בתוך תחום המשפט (למשל 'תכנית רחביה', 'מימוש במכר', 'תמ\\"א 38', 'שימוש חורג', 'סופיות ההחלטה'). מילה אחת או צירוף קצר.", + "summary": "תקציר עניני 2-3 משפטים: מה הייתה השאלה, מה הוכרע. בלי שיפוט.", + "headnote": "headnote בסגנון נבו: 1-2 משפטים שמסכמים את העיקרון שנקבע/יושם בפסק. למשל 'תכנית רחביה — היטל השבחה במימוש במכר — אין לחייב כשהזכויות צפות'.", + "key_quote": "ציטוט מילולי בודד, 30-100 מילים, שמייצג את לב הפסק. חייב להופיע מילה במילה בטקסט. אם אין ציטוט מתאים — מחרוזת ריקה.", + "subject_tags": ["תגיות", "נושא", "בעברית"] +} + +## כללי איכות +1. **case_name_short** — שם בולט וקצר. בלי 'נ\\'' / 'נגד' / מספרי תיק. +2. **appeal_subtype** — אופציונלי. אם הסוגיה רחבה ולא מסווגת — השאר ריק. +3. **summary** — תיאור ניטרלי, גוף שלישי. +4. **headnote** — לא מצטטים, מסכמים. סגנון נבו: ביטוי קצר אחד. +5. **key_quote** — חייב להיות הדבקה מילולית מהקלט. אם אין ציטוט בולט — השאר ריק. +6. **subject_tags** — 3-7 תגיות בעברית, snake_case (חניה, קווי_בניין, שיקול_דעת, פגם_פרוצדורלי, סמכות, מועדים, פגיעה_במקרקעין, ירידת_ערך, תכנית_רחביה, מימוש_במכר, וכד'). שייך לתחום של ועדת ערר תכנון ובניה. + +## הקלט +{context} + +--- תחילת הטקסט --- +{text_window} +--- סוף הטקסט --- +""" + + +def _build_text_window(full_text: str) -> str: + """Return the head + tail of the ruling, with a marker if truncated. + + Most rulings have the parties/subject in the head and the conclusion + in the tail; the middle is the discussion which is captured via the + halacha extractor independently. Sending head+tail keeps the prompt + cheap while preserving naming and conclusion context. + """ + if len(full_text) <= _HEAD_CHARS + _TAIL_CHARS: + return full_text + return ( + full_text[:_HEAD_CHARS] + + "\n\n[... חלק האמצע הושמט עקב אורך — ראה את החלק האחרון של הפסק להלן ...]\n\n" + + full_text[-_TAIL_CHARS:] + ) + + +async def extract_metadata(case_law_id: UUID | str) -> dict: + """Run metadata extraction. Returns a dict with the suggested values. + + Does NOT write to the DB — caller decides what to merge. + """ + if isinstance(case_law_id, str): + case_law_id = UUID(case_law_id) + + record = await db.get_case_law(case_law_id) + if not record: + return {} + full_text = (record.get("full_text") or "").strip() + if not full_text: + return {} + + citation = record.get("case_number") or "" + court = record.get("court") or "" + date_str = str(record.get("date") or "") + practice_area = record.get("practice_area") or "" + + context = ( + f"מראה מקום: {citation}\n" + f"ערכאה: {court}\n" + f"תאריך: {date_str}\n" + f"תחום: {practice_area}" + ) + prompt = METADATA_EXTRACTION_PROMPT.format( + context=context, text_window=_build_text_window(full_text), + ) + + try: + result = await claude_session.query_json(prompt) + except Exception as e: + logger.warning("precedent_metadata_extractor: query failed: %s", e) + return {} + + if not isinstance(result, dict): + logger.warning( + "precedent_metadata_extractor: expected dict, got %s", + type(result).__name__, + ) + return {} + + # Normalize keys / types + out: dict = {} + if isinstance(result.get("case_name_short"), str): + out["case_name_short"] = result["case_name_short"].strip() + if isinstance(result.get("appeal_subtype"), str): + out["appeal_subtype"] = result["appeal_subtype"].strip() + if isinstance(result.get("summary"), str): + out["summary"] = result["summary"].strip() + if isinstance(result.get("headnote"), str): + out["headnote"] = result["headnote"].strip() + if isinstance(result.get("key_quote"), str): + out["key_quote"] = result["key_quote"].strip() + tags = result.get("subject_tags") or [] + if isinstance(tags, list): + out["subject_tags"] = [str(t).strip() for t in tags if str(t).strip()] + return out + + +async def apply_to_record( + case_law_id: UUID | str, + suggested: dict, +) -> dict: + """Merge suggested metadata into the case_law row, filling ONLY empty fields. + + Empty rules: + - string field == "" → fill from suggested + - list field == [] → fill from suggested + - if suggested key is missing or empty, skip + + case_name has special handling: if the current case_name equals the + case_number (a tell-tale sign of the upload form sending the long + citation into both fields), treat it as empty and overwrite. + """ + if isinstance(case_law_id, str): + case_law_id = UUID(case_law_id) + record = await db.get_case_law(case_law_id) + if not record: + return {"updated": False, "fields": []} + + fields_to_update: dict = {} + + cur_case_name = (record.get("case_name") or "").strip() + cur_case_number = (record.get("case_number") or "").strip() + suggested_case_name = (suggested.get("case_name_short") or "").strip() + if suggested_case_name and ( + not cur_case_name or cur_case_name == cur_case_number + ): + fields_to_update["case_name"] = suggested_case_name + + if not (record.get("appeal_subtype") or "").strip(): + s = (suggested.get("appeal_subtype") or "").strip() + if s: + fields_to_update["appeal_subtype"] = s + + if not (record.get("summary") or "").strip(): + s = (suggested.get("summary") or "").strip() + if s: + fields_to_update["summary"] = s + + if not (record.get("headnote") or "").strip(): + s = (suggested.get("headnote") or "").strip() + if s: + fields_to_update["headnote"] = s + + if not (record.get("key_quote") or "").strip(): + s = (suggested.get("key_quote") or "").strip() + if s: + fields_to_update["key_quote"] = s + + cur_tags = record.get("subject_tags") or [] + if not cur_tags: + sug_tags = suggested.get("subject_tags") or [] + if sug_tags: + fields_to_update["subject_tags"] = sug_tags + + if not fields_to_update: + return {"updated": False, "fields": []} + + await db.update_case_law(case_law_id, **fields_to_update) + return {"updated": True, "fields": list(fields_to_update.keys())} + + +async def extract_and_apply(case_law_id: UUID | str) -> dict: + """Convenience wrapper: extract → merge into row → return summary.""" + suggested = await extract_metadata(case_law_id) + if not suggested: + return {"status": "no_metadata", "fields": []} + result = await apply_to_record(case_law_id, suggested) + return { + "status": "completed" if result["updated"] else "no_changes", + "fields": result["fields"], + "suggested": suggested, + } diff --git a/mcp-server/src/legal_mcp/tools/precedent_library.py b/mcp-server/src/legal_mcp/tools/precedent_library.py index d12be7e..e704ef5 100644 --- a/mcp-server/src/legal_mcp/tools/precedent_library.py +++ b/mcp-server/src/legal_mcp/tools/precedent_library.py @@ -139,6 +139,19 @@ async def precedent_extract_halachot(case_law_id: str) -> str: return _ok(result) +async def precedent_extract_metadata(case_law_id: str) -> str: + """חילוץ מטא-דאטה (case_name קצר, summary, headnote, key_quote, subject_tags, appeal_subtype) מהטקסט. ממלא רק שדות ריקים — לא דורס מה שכבר הוזן.""" + try: + cid = UUID(case_law_id) + except ValueError: + return _err("case_law_id לא תקין") + try: + result = await precedent_library.reextract_metadata(cid) + except Exception as e: + return _err(str(e)) + return _ok(result) + + async def search_precedent_library( query: str, practice_area: str = "", diff --git a/web-ui/src/components/precedents/library-list-panel.tsx b/web-ui/src/components/precedents/library-list-panel.tsx index 55e7c1e..6b17140 100644 --- a/web-ui/src/components/precedents/library-list-panel.tsx +++ b/web-ui/src/components/precedents/library-list-panel.tsx @@ -1,7 +1,7 @@ "use client"; import { useState } from "react"; -import { Trash2, Plus, RefreshCw } from "lucide-react"; +import { Trash2, Plus, RefreshCw, Pencil } from "lucide-react"; import { toast } from "sonner"; import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow, @@ -22,6 +22,7 @@ import { } from "@/lib/api/precedent-library"; import { PRACTICE_AREAS, PRECEDENT_LEVELS, practiceAreaShort } from "./practice-area"; import { PrecedentUploadSheet } from "./precedent-upload-sheet"; +import { PrecedentEditSheet } from "./precedent-edit-sheet"; function formatDate(iso: string | null) { if (!iso) return "—"; @@ -55,7 +56,12 @@ function StatusPill({ p }: { p: Precedent }) { ); } -function PrecedentRow({ p }: { p: Precedent }) { +function PrecedentRow({ + p, onEdit, +}: { + p: Precedent; + onEdit: (id: string) => void; +}) { const del = useDeletePrecedent(); const reExtract = useReExtractHalachot(); @@ -105,6 +111,14 @@ function PrecedentRow({ p }: { p: Precedent }) {
+
); } diff --git a/web-ui/src/components/precedents/precedent-edit-sheet.tsx b/web-ui/src/components/precedents/precedent-edit-sheet.tsx new file mode 100644 index 0000000..ed1290e --- /dev/null +++ b/web-ui/src/components/precedents/precedent-edit-sheet.tsx @@ -0,0 +1,309 @@ +"use client"; + +import { useEffect, useState } from "react"; +import { Save, Sparkles, Loader2 } from "lucide-react"; +import { toast } from "sonner"; +import { + Sheet, SheetContent, SheetHeader, SheetTitle, SheetDescription, +} from "@/components/ui/sheet"; +import { Button } from "@/components/ui/button"; +import { Input } from "@/components/ui/input"; +import { Label } from "@/components/ui/label"; +import { Textarea } from "@/components/ui/textarea"; +import { Skeleton } from "@/components/ui/skeleton"; +import { + Select, SelectContent, SelectItem, SelectTrigger, SelectValue, +} from "@/components/ui/select"; +import { + usePrecedent, + useUpdatePrecedent, + useReExtractMetadata, + type PracticeArea, + type SourceType, +} from "@/lib/api/precedent-library"; +import { useProgress } from "@/lib/api/documents"; +import { + PRACTICE_AREAS, PRECEDENT_LEVELS, SOURCE_TYPES, +} from "./practice-area"; + +type Props = { + caseLawId: string | null; + onOpenChange: (open: boolean) => void; +}; + +/* All editable fields. Pulled fresh from /api/precedent-library/{id} + * each time the sheet opens so the form reflects any auto-fill that + * happened in the background. */ +type FormState = { + citation: string; + case_name: string; + court: string; + decision_date: string; + practice_area: PracticeArea; + appeal_subtype: string; + source_type: SourceType; + precedent_level: string; + is_binding: boolean; + subject_tags: string; + summary: string; + headnote: string; + key_quote: string; +}; + +const EMPTY: FormState = { + citation: "", case_name: "", court: "", decision_date: "", + practice_area: "", appeal_subtype: "", source_type: "", + precedent_level: "", is_binding: true, subject_tags: "", + summary: "", headnote: "", key_quote: "", +}; + +export function PrecedentEditSheet({ caseLawId, onOpenChange }: Props) { + const open = caseLawId !== null; + const { data: record, isPending } = usePrecedent(caseLawId); + const update = useUpdatePrecedent(); + const reextractMeta = useReExtractMetadata(); + + const [form, setForm] = useState(EMPTY); + const [metadataTaskId, setMetadataTaskId] = useState(null); + const metadataProgress = useProgress(metadataTaskId); + + // Hydrate form when the record loads. + useEffect(() => { + if (!record) return; + // eslint-disable-next-line react-hooks/set-state-in-effect + setForm({ + citation: record.case_number || "", + case_name: record.case_name || "", + court: record.court || "", + decision_date: record.date ? record.date.slice(0, 10) : "", + practice_area: (record.practice_area || "") as PracticeArea, + appeal_subtype: record.appeal_subtype || "", + source_type: (record.source_type || "") as SourceType, + precedent_level: record.precedent_level || "", + is_binding: record.is_binding ?? true, + subject_tags: (record.subject_tags || []).join(", "), + summary: record.summary || "", + headnote: record.headnote || "", + key_quote: (record as { key_quote?: string }).key_quote || "", + }); + }, [record]); + + // Auto-close metadata progress on completion + refresh form + useEffect(() => { + if (metadataProgress?.status === "completed") { + toast.success("חילוץ מטא-דאטה הסתיים — השדות עודכנו"); + setMetadataTaskId(null); + } else if (metadataProgress?.status === "failed") { + toast.error(`חילוץ מטא-דאטה נכשל: ${metadataProgress.error || ""}`); + setMetadataTaskId(null); + } + }, [metadataProgress]); + + const onSubmit = async (e: React.FormEvent) => { + e.preventDefault(); + if (!caseLawId) return; + try { + const patch: Record = { + case_name: form.case_name.trim(), + court: form.court.trim(), + practice_area: form.practice_area || undefined, + appeal_subtype: form.appeal_subtype.trim(), + source_type: form.source_type || undefined, + precedent_level: form.precedent_level || undefined, + is_binding: form.is_binding, + subject_tags: form.subject_tags + .split(",").map((t) => t.trim()).filter(Boolean), + summary: form.summary.trim(), + headnote: form.headnote.trim(), + key_quote: form.key_quote.trim(), + }; + if (form.decision_date) patch.decision_date = form.decision_date; + // citation (case_number) is the unique key; we don't allow editing it + // here to avoid orphaning halachot. To rename, delete + re-upload. + await update.mutateAsync({ id: caseLawId, patch }); + toast.success("נשמר"); + onOpenChange(false); + } catch (err) { + toast.error(err instanceof Error ? err.message : "שגיאה"); + } + }; + + const onTriggerMetadata = async () => { + if (!caseLawId) return; + try { + const res = await reextractMeta.mutateAsync(caseLawId); + setMetadataTaskId(res.task_id); + toast.message("מחלץ מטא-דאטה ברקע…"); + } catch (err) { + toast.error(err instanceof Error ? err.message : "שגיאה"); + } + }; + + const isMetaRunning = metadataTaskId !== null + && metadataProgress?.status !== "completed" + && metadataProgress?.status !== "failed"; + + return ( + { if (!o) onOpenChange(false); }}> + + + עריכת פרטי פסיקה + + כל השדות ניתנים לעריכה חוץ ממראה המקום (מזהה ייחודי). + כפתור "חלץ מטא-דאטה אוטומטית" מנתח את הטקסט וממלא רק שדות ריקים. + + + + {isPending || !record ? ( +
+ {[...Array(6)].map((_, i) => )} +
+ ) : ( +
+
+
+
מראה מקום (לא ניתן לעריכה)
+
+ {record.case_number} +
+
+ +
+ + {isMetaRunning && (metadataProgress as { step?: string } | null)?.step && ( +
+ {(metadataProgress as { step?: string }).step} +
+ )} + +
+
+ + setForm({ ...form, case_name: e.target.value })} + placeholder="ערר 403/17 / אהרון ברק" /> +
+
+ + setForm({ ...form, court: e.target.value })} /> +
+
+ + setForm({ ...form, decision_date: e.target.value })} /> +
+
+ + setForm({ ...form, appeal_subtype: e.target.value })} + placeholder="תכנית רחביה / סופיות ההחלטה" /> +
+
+ +
+ +
+ {PRACTICE_AREAS.map((a) => ( + + ))} +
+
+ +
+
+ + +
+
+ + +
+
+ +
+ + setForm({ ...form, subject_tags: e.target.value })} + placeholder="חניה, קווי בניין, שיקול דעת" /> +
+ +
+ +