"""Regression tests for two metadata-merge gaps surfaced from /precedents: 1. chair_name was filled ONLY for source_kind='internal_committee', so ועדת-ערר decisions uploaded via the EXTERNAL precedent path (source_kind='external_upload', source_type='appeals_committee' — e.g. 1132-09-24, a Tel-Aviv decision pulled from נבו) never got their chair extracted even though it sits in the signature. 2. practice_area (the /precedents radio facet) was never set by extraction — it was passed to the LLM as read-only context only. Committee/court uploads that left it blank stayed blank, so the radio rendered nothing selected. It is now derived deterministically from the case_number prefix (authoritative for ועדת-ערר dockets) with the LLM's content classification as the fallback for court dockets whose prefix doesn't encode a domain. Runs fully OFFLINE — monkeypatches the ``db`` calls ``apply_to_record`` makes. """ from __future__ import annotations import asyncio from uuid import uuid4 import pytest from legal_mcp.services import db, precedent_metadata_extractor as pme def _run(coro): loop = asyncio.new_event_loop() try: return loop.run_until_complete(coro) finally: loop.close() def _wire_db(monkeypatch, record: dict) -> dict: """Stub the db calls apply_to_record makes; return a dict that captures the kwargs passed to update_case_law.""" captured: dict = {} async def _get(_cid): return dict(record) async def _update(_cid, **fields): captured.update(fields) return {**record, **fields} async def _collides(_cn, _cid): return False monkeypatch.setattr(db, "get_case_law", _get) monkeypatch.setattr(db, "update_case_law", _update) monkeypatch.setattr(db, "case_number_collides", _collides) # citation_formatted is pre-set in every fixture below, so the deterministic # formatter is never reached — stub defensively anyway. monkeypatch.setattr(db, "format_precedent_citation", lambda *a, **k: "") return captured def test_external_committee_decision_gets_chair_name(monkeypatch): """source_kind=external_upload + source_type=appeals_committee → chair filled.""" record = { "source_kind": "external_upload", "source_type": "appeals_committee", "case_number": "1132-09-24", "chair_name": "", "district": "תל אביב", "practice_area": "rishuy_uvniya", "citation_formatted": "ערר ... 1132-09-24", } captured = _wire_db(monkeypatch, record) suggested = {"chair_name": "מיכל דגני הלברשטם", "district": "תל אביב"} out = _run(pme.apply_to_record(uuid4(), suggested)) assert out["updated"] is True assert captured.get("chair_name") == "מיכל דגני הלברשטם" def test_court_ruling_never_gets_chair_name(monkeypatch): """A court ruling is not a committee decision — chair must stay empty even if the model slips and returns one.""" record = { "source_kind": "external_upload", "source_type": "court_ruling", "precedent_level": "עליון", "case_number": 'ע"א 4768/22', "chair_name": "", "district": "", "practice_area": "betterment_levy", "citation_formatted": 'ע"א 4768/22', } captured = _wire_db(monkeypatch, record) suggested = {"chair_name": "פלוני אלמוני"} _run(pme.apply_to_record(uuid4(), suggested)) assert "chair_name" not in captured def test_practice_area_derived_from_case_number_prefix(monkeypatch): """8xxx docket → betterment_levy, deterministically, even if the LLM suggested nothing (or something else).""" record = { "source_kind": "external_upload", "source_type": "appeals_committee", "case_number": "8126-03-25", "chair_name": "פלונית", "district": "ירושלים", "practice_area": "", "citation_formatted": "ערר ... 8126-03-25", } captured = _wire_db(monkeypatch, record) out = _run(pme.apply_to_record(uuid4(), {})) assert out["updated"] is True assert captured.get("practice_area") == "betterment_levy" def test_practice_area_falls_back_to_llm_for_court_docket(monkeypatch): """A Supreme-Court docket prefix (4xxx) encodes no domain → use the LLM's content classification.""" record = { "source_kind": "external_upload", "source_type": "court_ruling", "precedent_level": "עליון", "case_number": 'ע"א 4768/22', "chair_name": "", "district": "", "practice_area": "", "citation_formatted": 'ע"א 4768/22', } captured = _wire_db(monkeypatch, record) out = _run(pme.apply_to_record(uuid4(), {"practice_area": "betterment_levy"})) assert captured.get("practice_area") == "betterment_levy" def test_practice_area_not_overwritten_when_present(monkeypatch): """An existing practice_area (chair-set or earlier derivation) is preserved — the prefix derivation only fills the blank.""" record = { "source_kind": "external_upload", "source_type": "appeals_committee", "case_number": "8126-03-25", # prefix would say betterment_levy "chair_name": "פלונית", "district": "ירושלים", "practice_area": "compensation_197", # but a human said 197 — keep it "citation_formatted": "ערר ... 8126-03-25", } captured = _wire_db(monkeypatch, record) _run(pme.apply_to_record(uuid4(), {"practice_area": "rishuy_uvniya"})) assert "practice_area" not in captured def test_invalid_llm_practice_area_is_dropped(monkeypatch): """The LLM returning a non-domain value (legacy 'appeals_committee' / free text) must not be written — and with no usable prefix, practice_area stays blank.""" record = { "source_kind": "external_upload", "source_type": "court_ruling", "precedent_level": "עליון", "case_number": 'ע"א 4768/22', "chair_name": "", "district": "", "practice_area": "", "citation_formatted": 'ע"א 4768/22', } captured = _wire_db(monkeypatch, record) _run(pme.apply_to_record(uuid4(), {"practice_area": "appeals_committee"})) assert "practice_area" not in captured