שני פערים שצפו מ-/precedents בחילוץ-ההלכות:
1. **practice_area לא סומן** — השדה הועבר ל-LLM כקונטקסט-קריאה-בלבד ולא חולץ
מעולם, כך שהעלאות שהשאירו אותו ריק נשארו ריקות והרדיו ב-/precedents הופיע
ללא בחירה. עכשיו נגזר ב-apply_to_record: עדיפות לגזירה דטרמיניסטית מקידומת
מספר-התיק (1xxx→rishuy, 8xxx→היטל, 9xxx→197 — מקור-אמת לדוקטי ועדת-ערר,
INV-AH rule-based), ובנפילה — סיווג-תוכן של ה-LLM (שדה practice_area חדש
בפרומפט, אנום-סגור) עבור פסקי-בית-משפט שהקידומת שלהם אינה מקודדת תחום.
ממלא רק כשריק (G1 — נרמול במקור, לא תיקון-בקריאה).
2. **שם-יו"ר לא חולץ** (למשל 1132-09-24) — המיזוג היה מגודר על
source_kind=='internal_committee' בלבד, ודילג בשקט על החלטות-ועדה שהועלו
במסלול הפסיקה החיצוני (external_upload + source_type=appeals_committee, כמו
החלטת ת"א מנבו) — היו"ר ישב בבלוק-החתימה אך לא חולץ. עכשיו מגודר על "האם זו
החלטת-ועדה" (source_type/level אפקטיביים), לעולם לא על פסק-בית-משפט. ה-CHECK
כופה non-empty רק ל-internal_committee, לכן כתיבה ל-external בטוחה.
חיזוק-פרומפט (לבקשת היו"ר): chair_name מציין מפורשות את בלוק-החתימה הדו-טורי
(מזכיר↔יו"ר — לקחת את צד-היו"ר) ומזהיר לא לחלץ יו"ר של פסקי-דין **מצוטטים**
בגוף ההחלטה.
UI (לוגיקה-בלבד, פטור משער-העיצוב): edit-sheet מסנכרן-מחדש מהרשומה הטרייה בכל
פתיחה (re-arm על סגירה) ו-usePrecedent עושה poll בזמן חילוץ — כך מילוי-רקע של
practice_area/chair_name מופיע בלי refresh מלא ("הכפתור לא נשאר מסומן").
בדיקות: test_metadata_extract_chair_practice_area.py (6 תרחישי-מיזוג, offline).
Invariants: G1 (נרמול-במקור), G2 (אותו extractor, לא מסלול מקביל),
INV-AH (גזירה דטרמיניסטית מועדפת, abstention כשאין ודאות).
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
166 lines
6.3 KiB
Python
166 lines
6.3 KiB
Python
"""Regression tests for two metadata-merge gaps surfaced from /precedents:
|
|
|
|
1. chair_name was filled ONLY for source_kind='internal_committee', so ועדת-ערר
|
|
decisions uploaded via the EXTERNAL precedent path (source_kind='external_upload',
|
|
source_type='appeals_committee' — e.g. 1132-09-24, a Tel-Aviv decision pulled
|
|
from נבו) never got their chair extracted even though it sits in the signature.
|
|
|
|
2. practice_area (the /precedents radio facet) was never set by extraction — it was
|
|
passed to the LLM as read-only context only. Committee/court uploads that left it
|
|
blank stayed blank, so the radio rendered nothing selected. It is now derived
|
|
deterministically from the case_number prefix (authoritative for ועדת-ערר dockets)
|
|
with the LLM's content classification as the fallback for court dockets whose
|
|
prefix doesn't encode a domain.
|
|
|
|
Runs fully OFFLINE — monkeypatches the ``db`` calls ``apply_to_record`` makes.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
from uuid import uuid4
|
|
|
|
import pytest
|
|
|
|
from legal_mcp.services import db, precedent_metadata_extractor as pme
|
|
|
|
|
|
def _run(coro):
|
|
loop = asyncio.new_event_loop()
|
|
try:
|
|
return loop.run_until_complete(coro)
|
|
finally:
|
|
loop.close()
|
|
|
|
|
|
def _wire_db(monkeypatch, record: dict) -> dict:
|
|
"""Stub the db calls apply_to_record makes; return a dict that captures the
|
|
kwargs passed to update_case_law."""
|
|
captured: dict = {}
|
|
|
|
async def _get(_cid):
|
|
return dict(record)
|
|
|
|
async def _update(_cid, **fields):
|
|
captured.update(fields)
|
|
return {**record, **fields}
|
|
|
|
async def _collides(_cn, _cid):
|
|
return False
|
|
|
|
monkeypatch.setattr(db, "get_case_law", _get)
|
|
monkeypatch.setattr(db, "update_case_law", _update)
|
|
monkeypatch.setattr(db, "case_number_collides", _collides)
|
|
# citation_formatted is pre-set in every fixture below, so the deterministic
|
|
# formatter is never reached — stub defensively anyway.
|
|
monkeypatch.setattr(db, "format_precedent_citation", lambda *a, **k: "")
|
|
return captured
|
|
|
|
|
|
def test_external_committee_decision_gets_chair_name(monkeypatch):
|
|
"""source_kind=external_upload + source_type=appeals_committee → chair filled."""
|
|
record = {
|
|
"source_kind": "external_upload",
|
|
"source_type": "appeals_committee",
|
|
"case_number": "1132-09-24",
|
|
"chair_name": "",
|
|
"district": "תל אביב",
|
|
"practice_area": "rishuy_uvniya",
|
|
"citation_formatted": "ערר ... 1132-09-24",
|
|
}
|
|
captured = _wire_db(monkeypatch, record)
|
|
suggested = {"chair_name": "מיכל דגני הלברשטם", "district": "תל אביב"}
|
|
out = _run(pme.apply_to_record(uuid4(), suggested))
|
|
assert out["updated"] is True
|
|
assert captured.get("chair_name") == "מיכל דגני הלברשטם"
|
|
|
|
|
|
def test_court_ruling_never_gets_chair_name(monkeypatch):
|
|
"""A court ruling is not a committee decision — chair must stay empty even if
|
|
the model slips and returns one."""
|
|
record = {
|
|
"source_kind": "external_upload",
|
|
"source_type": "court_ruling",
|
|
"precedent_level": "עליון",
|
|
"case_number": 'ע"א 4768/22',
|
|
"chair_name": "",
|
|
"district": "",
|
|
"practice_area": "betterment_levy",
|
|
"citation_formatted": 'ע"א 4768/22',
|
|
}
|
|
captured = _wire_db(monkeypatch, record)
|
|
suggested = {"chair_name": "פלוני אלמוני"}
|
|
_run(pme.apply_to_record(uuid4(), suggested))
|
|
assert "chair_name" not in captured
|
|
|
|
|
|
def test_practice_area_derived_from_case_number_prefix(monkeypatch):
|
|
"""8xxx docket → betterment_levy, deterministically, even if the LLM
|
|
suggested nothing (or something else)."""
|
|
record = {
|
|
"source_kind": "external_upload",
|
|
"source_type": "appeals_committee",
|
|
"case_number": "8126-03-25",
|
|
"chair_name": "פלונית",
|
|
"district": "ירושלים",
|
|
"practice_area": "",
|
|
"citation_formatted": "ערר ... 8126-03-25",
|
|
}
|
|
captured = _wire_db(monkeypatch, record)
|
|
out = _run(pme.apply_to_record(uuid4(), {}))
|
|
assert out["updated"] is True
|
|
assert captured.get("practice_area") == "betterment_levy"
|
|
|
|
|
|
def test_practice_area_falls_back_to_llm_for_court_docket(monkeypatch):
|
|
"""A Supreme-Court docket prefix (4xxx) encodes no domain → use the LLM's
|
|
content classification."""
|
|
record = {
|
|
"source_kind": "external_upload",
|
|
"source_type": "court_ruling",
|
|
"precedent_level": "עליון",
|
|
"case_number": 'ע"א 4768/22',
|
|
"chair_name": "",
|
|
"district": "",
|
|
"practice_area": "",
|
|
"citation_formatted": 'ע"א 4768/22',
|
|
}
|
|
captured = _wire_db(monkeypatch, record)
|
|
out = _run(pme.apply_to_record(uuid4(), {"practice_area": "betterment_levy"}))
|
|
assert captured.get("practice_area") == "betterment_levy"
|
|
|
|
|
|
def test_practice_area_not_overwritten_when_present(monkeypatch):
|
|
"""An existing practice_area (chair-set or earlier derivation) is preserved —
|
|
the prefix derivation only fills the blank."""
|
|
record = {
|
|
"source_kind": "external_upload",
|
|
"source_type": "appeals_committee",
|
|
"case_number": "8126-03-25", # prefix would say betterment_levy
|
|
"chair_name": "פלונית",
|
|
"district": "ירושלים",
|
|
"practice_area": "compensation_197", # but a human said 197 — keep it
|
|
"citation_formatted": "ערר ... 8126-03-25",
|
|
}
|
|
captured = _wire_db(monkeypatch, record)
|
|
_run(pme.apply_to_record(uuid4(), {"practice_area": "rishuy_uvniya"}))
|
|
assert "practice_area" not in captured
|
|
|
|
|
|
def test_invalid_llm_practice_area_is_dropped(monkeypatch):
|
|
"""The LLM returning a non-domain value (legacy 'appeals_committee' / free text)
|
|
must not be written — and with no usable prefix, practice_area stays blank."""
|
|
record = {
|
|
"source_kind": "external_upload",
|
|
"source_type": "court_ruling",
|
|
"precedent_level": "עליון",
|
|
"case_number": 'ע"א 4768/22',
|
|
"chair_name": "",
|
|
"district": "",
|
|
"practice_area": "",
|
|
"citation_formatted": 'ע"א 4768/22',
|
|
}
|
|
captured = _wire_db(monkeypatch, record)
|
|
_run(pme.apply_to_record(uuid4(), {"practice_area": "appeals_committee"}))
|
|
assert "practice_area" not in captured
|