From fc0c36b2f84c2d824bb522175e4f21b2e21f59ee Mon Sep 17 00:00:00 2001 From: Chaim Date: Tue, 2 Jun 2026 12:09:40 +0000 Subject: [PATCH] fix(#77 backend): make case_number editable + separate citation field on committee upload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two identity fixes for the precedent corpus: 1. PrecedentUpdateRequest += case_number — the canonical identifier was not in the edit model, so a wrong id captured at upload (e.g. the full citation pasted into the field) could not be corrected. update_case_law already whitelists case_number. 2. /api/internal-decisions/upload += citation form field — case_number is now the clean identifier (e.g. 8027-25) and citation is the full מראה-מקום, stored as citation_formatted up-front (previously the UI sent the citation AS case_number, leaving the id polluted and citation_formatted empty until extraction). Stored via a post-ingest update_case_law, not the core INSERT. Frontend (separate case_number field in the upload + edit sheets) follows in a second PR after api:types regen. Co-Authored-By: Claude Opus 4.8 (1M context) --- web/app.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/web/app.py b/web/app.py index 9c7e062..df9c159 100644 --- a/web/app.py +++ b/web/app.py @@ -5145,6 +5145,11 @@ def _make_progress_publisher(task_id: str, filename: str): class PrecedentUpdateRequest(BaseModel): + # case_number is the canonical identifier (e.g. "8027-25"). It is editable + # so a wrong identifier captured at upload (e.g. the full citation pasted + # into the field) can be corrected from the edit screen. update_case_law + # already whitelists it. + case_number: str | None = None case_name: str | None = None court: str | None = None decision_date: str | None = None @@ -5488,6 +5493,7 @@ async def internal_decisions_upload( file: UploadFile = File(...), case_number: str = Form(...), case_name: str = Form(""), + citation: str = Form(""), court: str = Form(""), decision_date: str = Form(""), chair_name: str = Form(""), @@ -5498,7 +5504,14 @@ async def internal_decisions_upload( is_binding: bool = Form(True), summary: str = Form(""), ): - """Upload a planning appeals-committee decision to the internal corpus.""" + """Upload a planning appeals-committee decision to the internal corpus. + + ``case_number`` is the canonical identifier (e.g. "8027-25"); ``citation`` + is the full מראה-מקום (e.g. "ערר ... 8027/25 פלוני נ' הוועדה ..."). They + are distinct fields — previously the UI sent the citation as case_number, + leaving the identifier polluted and citation_formatted empty until the + metadata extractor ran. citation is stored as citation_formatted up-front + so it survives even if extraction is delayed.""" if practice_area and practice_area not in _PRACTICE_AREAS: raise HTTPException(400, "practice_area לא תקין") if not case_number.strip(): @@ -5556,6 +5569,20 @@ async def internal_decisions_upload( # — precedent_library_upload and missing-precedent — already do # this; this path was missing it). case_law_id = result.get("case_law_id") if isinstance(result, dict) else None + # Persist the מראה-מקום the chair typed, up-front. The metadata + # extractor only fills citation_formatted when it is empty, so this + # preserves the user's exact citation rather than waiting on (or + # being overwritten by) extraction. + if case_law_id and citation.strip(): + try: + await db.update_case_law( + UUID(case_law_id), citation_formatted=citation.strip() + ) + except Exception: + logger.warning( + "internal-decision %s: storing citation_formatted failed", + case_number, + ) extraction_queued = True if case_law_id: # Route to the correct company CEO. _get_company_id keys off