Merge pull request 'feat(mcp): FU-14 GAP-52 — idempotency על case_create/precedent_attach/document_upload' (#63) from fix/fu14-gap52-idempotency into main
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m46s
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m46s
This commit was merged in pull request #63.
This commit is contained in:
@@ -56,8 +56,8 @@ Zalando *RESTful API Guidelines* | סטטוס: verified
|
|||||||
[G3](00-constitution.md#inv-g3-ingest-אחיד-ו-idempotent). **הנדסי.**
|
[G3](00-constitution.md#inv-g3-ingest-אחיד-ו-idempotent). **הנדסי.**
|
||||||
**מקורות:** Stripe — *Idempotent requests* (https://docs.stripe.com/api/idempotent_requests) ·
|
**מקורות:** Stripe — *Idempotent requests* (https://docs.stripe.com/api/idempotent_requests) ·
|
||||||
Kleppmann *DDIA* (idempotence) · IETF — *Idempotency-Key header* draft (https://datatracker.ietf.org/doc/draft-ietf-httpapi-idempotency-key-header/) | סטטוס: verified
|
Kleppmann *DDIA* (idempotence) · IETF — *Idempotency-Key header* draft (https://datatracker.ietf.org/doc/draft-ietf-httpapi-idempotency-key-header/) | סטטוס: verified
|
||||||
**אכיפה:** upsert/ON CONFLICT בכלי-מוטציה. **כיום חלקי** — `missing_precedent_create`/`precedent_link_cases`/`extract_internal_citations` idempotent; `case_create`/`document_upload`/`precedent_attach` לא.
|
**אכיפה:** upsert/ON CONFLICT (או בדיקת-מפתח ברמת-אפליקציה) בכלי-מוטציה. **GAP-52 ✅ נסגר (2026-06-06):** `case_create` (מפתח case_number, UNIQUE), `precedent_attach` (מפתח case_id+section_id+citation+quote), `document_upload` (מפתח case_id+SHA-256 של הקובץ — מדלג על OCR/embed כפול) — כולם מחזירים את הקיים במקום כפילות. נבחרה בדיקת-מפתח ברמת-אפליקציה (לא UNIQUE-constraint) כדי לא לשבור startup על נתונים-קיימים כפולים. קודמים: `missing_precedent_create`/`precedent_link_cases`/`extract_internal_citations`.
|
||||||
**הפרה ידועה:** [gap-audit GAP-52](gap-audit.md).
|
**הפרה ידועה:** —
|
||||||
|
|
||||||
### INV-TOOL4: סימטריית extract/get + persistence
|
### INV-TOOL4: סימטריית extract/get + persistence
|
||||||
**כלל:** לכל כלי-חילוץ שכותב ל-DB יש **כלי-קריאה (get) מקביל**, והפלט **נשמר durably** (לא מוחזר-ונאבד).
|
**כלל:** לכל כלי-חילוץ שכותב ל-DB יש **כלי-קריאה (get) מקביל**, והפלט **נשמר durably** (לא מוחזר-ונאבד).
|
||||||
|
|||||||
@@ -198,7 +198,8 @@
|
|||||||
### FU-14 — חוזה כלי-ה-MCP
|
### FU-14 — חוזה כלי-ה-MCP
|
||||||
- **מכסה:** GAP-44,45,47..54 · **invariants:** INV-TOOL1–TOOL5 · **effort:** L · **תלויות:** FU-1
|
- **מכסה:** GAP-44,45,47..54 · **invariants:** INV-TOOL1–TOOL5 · **effort:** L · **תלויות:** FU-1
|
||||||
- **סוג:** code — envelope אחיד, מיזוג חיפוש/בלוקים, idempotency, limit-caps, get-symmetry, set_outcome SSoT
|
- **סוג:** code — envelope אחיד, מיזוג חיפוש/בלוקים, idempotency, limit-caps, get-symmetry, set_outcome SSoT
|
||||||
- **סטטוס חלקי (פרוסה 1, 2026-06-06):** ✅ **GAP-44** — נוסף `get_appraiser_facts` (ה-get המקביל ל-extract, INV-TOOL4); ✅ **GAP-53** — נוסף `_clamp_limit` (תקרה 200, INV-TOOL5) על ~13 כלי list/search + הוספת limit ל-`list_chair_feedback` (שהיה ללא תקרה). נותר: GAP-45 (status-tool), GAP-48 (envelope), GAP-49/50 (מיזוג+rename — שובר), GAP-51 (set_outcome enum SSoT), GAP-52 (idempotency).
|
- **סטטוס חלקי (פרוסה 1, 2026-06-06):** ✅ **GAP-44** — נוסף `get_appraiser_facts` (ה-get המקביל ל-extract, INV-TOOL4); ✅ **GAP-53** — נוסף `_clamp_limit` (תקרה 200, INV-TOOL5) על ~13 כלי list/search + הוספת limit ל-`list_chair_feedback` (שהיה ללא תקרה).
|
||||||
|
- **סטטוס חלקי (פרוסה 2, 2026-06-06):** ✅ **GAP-52** (INV-TOOL3 idempotency) — `case_create`/`precedent_attach`/`document_upload` מחזירים קיים במקום כפילות (בדיקת-מפתח ברמת-אפליקציה; document_upload לפי SHA-256 → מדלג OCR/embed כפול). נותר: GAP-45 (status-tool), GAP-51 (set_outcome enum SSoT — דורש הכרעת-domain), GAP-48 (envelope), GAP-49/50 (מיזוג+rename — שובר).
|
||||||
|
|
||||||
### FU-15 — deploy/env/secrets
|
### FU-15 — deploy/env/secrets
|
||||||
- **מכסה:** GAP-55..62 · **invariants:** INV-ENV1–ENV5 · **effort:** M · **תלויות:** —
|
- **מכסה:** GAP-55..62 · **invariants:** INV-ENV1–ENV5 · **effort:** M · **תלויות:** —
|
||||||
|
|||||||
@@ -105,6 +105,11 @@ CREATE TABLE IF NOT EXISTS documents (
|
|||||||
created_at TIMESTAMPTZ DEFAULT now()
|
created_at TIMESTAMPTZ DEFAULT now()
|
||||||
);
|
);
|
||||||
|
|
||||||
|
-- INV-TOOL3 / GAP-52: SHA-256 of the uploaded file bytes, for idempotent upload
|
||||||
|
-- (re-uploading the same file to a case returns the existing document). Empty
|
||||||
|
-- default = legacy rows with unknown hash; never matched as a duplicate.
|
||||||
|
ALTER TABLE documents ADD COLUMN IF NOT EXISTS content_hash text NOT NULL DEFAULT '';
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS document_chunks (
|
CREATE TABLE IF NOT EXISTS document_chunks (
|
||||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||||
document_id UUID REFERENCES documents(id) ON DELETE CASCADE,
|
document_id UUID REFERENCES documents(id) ON DELETE CASCADE,
|
||||||
@@ -1471,19 +1476,37 @@ async def create_document(
|
|||||||
title: str,
|
title: str,
|
||||||
file_path: str,
|
file_path: str,
|
||||||
page_count: int | None = None,
|
page_count: int | None = None,
|
||||||
|
content_hash: str = "",
|
||||||
) -> dict:
|
) -> dict:
|
||||||
pool = await get_pool()
|
pool = await get_pool()
|
||||||
doc_id = uuid4()
|
doc_id = uuid4()
|
||||||
async with pool.acquire() as conn:
|
async with pool.acquire() as conn:
|
||||||
await conn.execute(
|
await conn.execute(
|
||||||
"""INSERT INTO documents (id, case_id, doc_type, title, file_path, page_count)
|
"""INSERT INTO documents (id, case_id, doc_type, title, file_path, page_count, content_hash)
|
||||||
VALUES ($1, $2, $3, $4, $5, $6)""",
|
VALUES ($1, $2, $3, $4, $5, $6, $7)""",
|
||||||
doc_id, case_id, doc_type, title, file_path, page_count,
|
doc_id, case_id, doc_type, title, file_path, page_count, content_hash,
|
||||||
)
|
)
|
||||||
row = await conn.fetchrow("SELECT * FROM documents WHERE id = $1", doc_id)
|
row = await conn.fetchrow("SELECT * FROM documents WHERE id = $1", doc_id)
|
||||||
return _row_to_doc(row)
|
return _row_to_doc(row)
|
||||||
|
|
||||||
|
|
||||||
|
async def get_document_by_hash(case_id: UUID, content_hash: str) -> dict | None:
|
||||||
|
"""Return an existing document for this case with the same file hash, or None.
|
||||||
|
|
||||||
|
INV-TOOL3 / GAP-52: deterministic key for idempotent upload. Empty hashes
|
||||||
|
(legacy rows) are never matched.
|
||||||
|
"""
|
||||||
|
if not content_hash:
|
||||||
|
return None
|
||||||
|
pool = await get_pool()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
row = await conn.fetchrow(
|
||||||
|
"SELECT * FROM documents WHERE case_id = $1 AND content_hash = $2 LIMIT 1",
|
||||||
|
case_id, content_hash,
|
||||||
|
)
|
||||||
|
return _row_to_doc(row) if row else None
|
||||||
|
|
||||||
|
|
||||||
async def update_document(doc_id: UUID, **fields) -> None:
|
async def update_document(doc_id: UUID, **fields) -> None:
|
||||||
if not fields:
|
if not fields:
|
||||||
return
|
return
|
||||||
|
|||||||
@@ -153,6 +153,13 @@ async def case_create(
|
|||||||
ריק = יוסק אוטומטית ממספר התיק
|
ריק = יוסק אוטומטית ממספר התיק
|
||||||
proceeding_type: 'ערר' / 'בל"מ'. ריק = יוסק מ-appeal_subtype/subject.
|
proceeding_type: 'ערר' / 'בל"מ'. ריק = יוסק מ-appeal_subtype/subject.
|
||||||
"""
|
"""
|
||||||
|
# INV-TOOL3 / GAP-52: idempotent on case_number (already UNIQUE in schema).
|
||||||
|
# Re-creating an existing case returns it instead of raising a unique-violation.
|
||||||
|
_existing = await db.get_case_by_number(case_number)
|
||||||
|
if _existing:
|
||||||
|
_existing["idempotent_existing"] = True
|
||||||
|
return json.dumps(_existing, default=str, ensure_ascii=False, indent=2)
|
||||||
|
|
||||||
from datetime import date as date_type
|
from datetime import date as date_type
|
||||||
|
|
||||||
h_date = None
|
h_date = None
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
import json
|
import json
|
||||||
import shutil
|
import shutil
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@@ -37,6 +38,19 @@ async def document_upload(
|
|||||||
if not title:
|
if not title:
|
||||||
title = source.stem
|
title = source.stem
|
||||||
|
|
||||||
|
# INV-TOOL3 / GAP-52: idempotent on (case_id, file content hash). Re-uploading
|
||||||
|
# the same bytes returns the existing document and skips re-copy + re-OCR +
|
||||||
|
# re-embed (the expensive part).
|
||||||
|
content_hash = hashlib.sha256(source.read_bytes()).hexdigest()
|
||||||
|
existing_doc = await db.get_document_by_hash(case_id, content_hash)
|
||||||
|
if existing_doc:
|
||||||
|
return json.dumps({
|
||||||
|
"status": "exists",
|
||||||
|
"message": f"הקובץ כבר הועלה לתיק {case_number} (זהה ב-hash) — מוחזר הקיים, ללא עיבוד מחדש.",
|
||||||
|
"document": existing_doc,
|
||||||
|
"idempotent_existing": True,
|
||||||
|
}, ensure_ascii=False, indent=2, default=str)
|
||||||
|
|
||||||
# Copy file to case directory
|
# Copy file to case directory
|
||||||
case_dir = config.find_case_dir(case_number) / "documents" / "originals"
|
case_dir = config.find_case_dir(case_number) / "documents" / "originals"
|
||||||
case_dir.mkdir(parents=True, exist_ok=True)
|
case_dir.mkdir(parents=True, exist_ok=True)
|
||||||
@@ -52,6 +66,7 @@ async def document_upload(
|
|||||||
doc_type=initial_doc_type,
|
doc_type=initial_doc_type,
|
||||||
title=title,
|
title=title,
|
||||||
file_path=str(dest),
|
file_path=str(dest),
|
||||||
|
content_hash=content_hash,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Process document (extract → classify → chunk → embed → store)
|
# Process document (extract → classify → chunk → embed → store)
|
||||||
|
|||||||
@@ -43,6 +43,14 @@ async def precedent_attach(
|
|||||||
except ValueError:
|
except ValueError:
|
||||||
return json.dumps({"error": "pdf_document_id לא תקין"}, ensure_ascii=False)
|
return json.dumps({"error": "pdf_document_id לא תקין"}, ensure_ascii=False)
|
||||||
|
|
||||||
|
# INV-TOOL3 / GAP-52: idempotent on (case_id, section_id, citation, quote).
|
||||||
|
# Re-attaching the same quote to the same section returns the existing row.
|
||||||
|
for _p in await db.list_case_precedents(UUID(case["id"])):
|
||||||
|
if (_p.get("citation") == citation and _p.get("quote") == quote
|
||||||
|
and (_p.get("section_id") or None) == (section_id or None)):
|
||||||
|
_p["idempotent_existing"] = True
|
||||||
|
return json.dumps(_p, ensure_ascii=False, indent=2, default=str)
|
||||||
|
|
||||||
row = await db.create_case_precedent(
|
row = await db.create_case_precedent(
|
||||||
case_id=UUID(case["id"]),
|
case_id=UUID(case["id"]),
|
||||||
quote=quote,
|
quote=quote,
|
||||||
|
|||||||
Reference in New Issue
Block a user