From 46bcaa8fa3b9933810aecfe40365f3828b180d12 Mon Sep 17 00:00:00 2001 From: Chaim Date: Sun, 14 Jun 2026 14:39:46 +0000 Subject: [PATCH] =?UTF-8?q?feat(plans):=20=D7=94=D7=A8=D7=A9=D7=90=D7=95?= =?UTF-8?q?=D7=AA-=D7=A1=D7=95=D7=9B=D7=A0=D7=99=D7=9D=20+=20dedup/merge/e?= =?UTF-8?q?dit=20+=20API=20=D7=9C=D7=AA=D7=95=D7=A8-=D7=90=D7=99=D7=A9?= =?UTF-8?q?=D7=95=D7=A8=20=D7=AA=D7=9B=D7=A0=D7=99=D7=95=D7=AA=20(backend)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-ups למרשם-התכניות (PR #252), חלק ה-backend (ללא-UI): - סוכנים: הוספת extract_plans/plan_get/plan_search/plan_list ל-CEO ול-חוקר (+ plan_upsert לחוקר); plan_review נשאר אנושי בלבד (G10). .claude/agents/*.md. - db: _plan_core_token + find_similar_plans (הצפת כפילות-וריאנט לאישור ידני, בלי מיזוג-אוטומטי), update_plan (עריכה+renumber, guard התנגשות→merge), merge_plans (איחוד aliases, מילוי-חוסר, סתירות→discrepancies, מחיקת מקור). - plans_extractor: צירוף possible_duplicates לפלט החילוץ. - web/app.py: GET /api/plans(+/{id},/{id}/duplicates) · POST /api/plans · PATCH /api/plans/{id} · POST /api/plans/{id}/review · POST /api/plans/merge; +קטגוריית "תכניות הממתינות לאישור" ב-/api/chair/pending. תיקון-נתונים (DB, מחוץ ל-PR): הל/מח/250 ד' → 7.1.2002, י"פ 5045. ה-UI (טאב /precedents + מונה /approvals) ב-PR נפרד אחרי שער Claude Design. Invariants: G1 (נרמול בכתיבה/עריכה) · G2 · G3 · G10 (review_status + מיזוג ידני, אישור אנושי) · INV-DM2/DM5 · INV-AH · X9 · אין בליעה שקטה (discrepancies). Co-Authored-By: Claude Opus 4.8 (1M context) --- .claude/agents/legal-ceo.md | 4 + .claude/agents/legal-researcher.md | 5 + mcp-server/src/legal_mcp/services/db.py | 157 +++++++++++++++++ .../src/legal_mcp/services/plans_extractor.py | 17 +- web/app.py | 159 ++++++++++++++++++ 5 files changed, 340 insertions(+), 2 deletions(-) diff --git a/.claude/agents/legal-ceo.md b/.claude/agents/legal-ceo.md index b9290a2..f6c428f 100644 --- a/.claude/agents/legal-ceo.md +++ b/.claude/agents/legal-ceo.md @@ -41,6 +41,10 @@ tools: - mcp__legal-ai__halacha_corroboration - mcp__legal-ai__corroboration_rebuild - mcp__legal-ai__extract_appraiser_facts + - mcp__legal-ai__extract_plans + - mcp__legal-ai__plan_get + - mcp__legal-ai__plan_search + - mcp__legal-ai__plan_list - mcp__legal-ai__write_interim_draft - mcp__legal-ai__export_interim_draft --- diff --git a/.claude/agents/legal-researcher.md b/.claude/agents/legal-researcher.md index 6b6ff25..52edc0d 100644 --- a/.claude/agents/legal-researcher.md +++ b/.claude/agents/legal-researcher.md @@ -37,6 +37,11 @@ tools: - mcp__legal-ai__missing_precedent_create - mcp__legal-ai__missing_precedent_list - mcp__legal-ai__missing_precedent_close + - mcp__legal-ai__extract_plans + - mcp__legal-ai__plan_get + - mcp__legal-ai__plan_search + - mcp__legal-ai__plan_list + - mcp__legal-ai__plan_upsert - mcp__legal-ai__workflow_status --- diff --git a/mcp-server/src/legal_mcp/services/db.py b/mcp-server/src/legal_mcp/services/db.py index 811207e..9c26ce6 100644 --- a/mcp-server/src/legal_mcp/services/db.py +++ b/mcp-server/src/legal_mcp/services/db.py @@ -3698,6 +3698,163 @@ async def set_plan_review_status(plan_id: UUID, status: str) -> dict | None: return _plan_row_to_dict(row) +# words that describe a plan but aren't its identifying code — stripped for the +# comparable "core token" used in near-duplicate detection (NOT for the key). +_PLAN_DESC_WORDS = re.compile( + r"(?:תכנית|תוכנית|מתאר|מקומית|מחוזית|ארצית|מפורטת|כוללנית|מספר|מס['\"׳״]?)" +) + + +def _plan_core_token(s: str) -> str: + """Best-effort comparable 'core' of a plan identifier — for near-duplicate + surfacing only, NEVER a key (bare numbers collide). Strips descriptive words + (תכנית/מתאר/מקומית/מס') and quotes, keeps the alphanumeric code: + "תכנית מתאר מקומית מס' 62" → "62" · "מי/820" → "מי/820" · "5166/ב" → "5166/ב". + """ + t = _normalize_plan_number(s) + t = _PLAN_DESC_WORDS.sub(" ", t) + t = t.replace('"', " ").replace("'", " ").replace("׳", " ").replace("״", " ") + return " ".join(t.split()).strip() + + +async def find_similar_plans( + plan_number: str, + display_name: str = "", + exclude_id: UUID | None = None, + limit: int = 10, +) -> list[dict]: + """Surface plans that LOOK like the same scheme written differently — so the + chair can merge them (manual gate, G10). Does NOT merge. Each hit carries a + `match_reason`. The registry is small (dozens), so we scan and score in Python. + """ + num = _normalize_plan_number(plan_number) + core = _plan_core_token(plan_number) or _plan_core_token(display_name) + if not num and not core: + return [] + pool = await get_pool() + rows = await pool.fetch("SELECT * FROM plans") + out: list[dict] = [] + exclude = str(exclude_id) if exclude_id else None + for r in rows: + d = _plan_row_to_dict(r) + if exclude and d["id"] == exclude: + continue + cand_num = d["plan_number"] + reason = None + if num and cand_num and (num == cand_num or num in cand_num or cand_num in num): + reason = "מספר חופף" + elif num and num in (d.get("aliases") or []): + reason = "alias תואם" + elif core and core == _plan_core_token(cand_num): + reason = "קוד-ליבה זהה" + if reason: + out.append({**d, "match_reason": reason}) + if len(out) >= limit: + break + return out + + +async def update_plan( + plan_id: UUID, + plan_number: str, + display_name: str = "", + plan_type: str = "", + gazette_date=None, + yalkut_number: str = "", + purpose: str = "", + aliases: list[str] | None = None, +) -> dict | None: + """Edit a plan by id (chair UI). Re-normalizes plan_number (G1) and re-renders + the citation. Refuses to collide with another row's number — that's a merge, not + an edit (no silent dup). `aliases=None` keeps existing aliases. + """ + num = _normalize_plan_number(plan_number) + if not num: + raise ValueError("plan_number ריק לאחר נרמול") + gd = _coerce_plan_date(gazette_date) + display_name = (display_name or "").strip() or num + citation = format_plan_citation({ + "display_name": display_name, "plan_number": num, + "gazette_date": gd, "yalkut_number": yalkut_number, "purpose": purpose, + }) + pool = await get_pool() + async with pool.acquire() as conn: + async with conn.transaction(): + clash = await conn.fetchrow( + "SELECT id FROM plans WHERE plan_number = $1 AND id <> $2", num, plan_id, + ) + if clash: + raise ValueError( + f"מספר-תכנית {num} כבר קיים ברשומה אחרת — השתמש במיזוג (merge), לא בעריכה" + ) + row = await conn.fetchrow( + """UPDATE plans SET + plan_number = $2, display_name = $3, plan_type = $4, + gazette_date = $5, yalkut_number = $6, purpose = $7, + citation_formatted = $8, + aliases = COALESCE($9, aliases), updated_at = now() + WHERE id = $1 RETURNING *""", + plan_id, num, display_name, plan_type or "", gd, + yalkut_number or "", purpose or "", citation, + sorted(set(aliases)) if aliases is not None else None, + ) + return _plan_row_to_dict(row) + + +async def merge_plans(source_id: UUID, target_id: UUID) -> dict: + """Fold `source` into `target` (chair-initiated): union aliases (+ source's + number/name as aliases), fill empty target validity from source, record any + CONFLICTING validity in target.discrepancies (no silent loss), delete source. + Returns the merged target. + """ + if source_id == target_id: + raise ValueError("מקור ויעד זהים") + pool = await get_pool() + async with pool.acquire() as conn: + async with conn.transaction(): + src = await conn.fetchrow("SELECT * FROM plans WHERE id = $1 FOR UPDATE", source_id) + tgt = await conn.fetchrow("SELECT * FROM plans WHERE id = $1 FOR UPDATE", target_id) + if not src or not tgt: + raise ValueError("מקור או יעד לא נמצאו") + src, tgt = dict(src), dict(tgt) + + aliases = set(tgt.get("aliases") or []) | set(src.get("aliases") or []) + for a in (src["plan_number"], src["display_name"]): + if a and a not in (tgt["plan_number"], tgt["display_name"]): + aliases.add(a) + + disc = tgt.get("discrepancies") or [] + if isinstance(disc, str): + disc = json.loads(disc) + for field in ("gazette_date", "yalkut_number", "purpose"): + sv, tv = src[field], tgt[field] + if sv and tv and str(sv) != str(tv): + disc.append({ + "field": field, "old": str(tv), "new": str(sv), + "source_case_number": src.get("source_case_number") or "", + "via": "merge", + }) + + new_gd = tgt["gazette_date"] or src["gazette_date"] + new_yalkut = tgt["yalkut_number"] or src["yalkut_number"] + new_purpose = tgt["purpose"] or src["purpose"] + new_type = tgt["plan_type"] or src["plan_type"] + citation = format_plan_citation({ + "display_name": tgt["display_name"], "plan_number": tgt["plan_number"], + "gazette_date": new_gd, "yalkut_number": new_yalkut, "purpose": new_purpose, + }) + row = await conn.fetchrow( + """UPDATE plans SET aliases = $2, gazette_date = $3, yalkut_number = $4, + purpose = $5, plan_type = $6, citation_formatted = $7, + discrepancies = $8, updated_at = now() + WHERE id = $1 RETURNING *""", + target_id, sorted(aliases), new_gd, new_yalkut, new_purpose, + new_type, citation, json.dumps(disc, ensure_ascii=False), + ) + await conn.execute("DELETE FROM plans WHERE id = $1", source_id) + return _plan_row_to_dict(row) + + # ── V7: External precedent library + halachot ───────────────────── diff --git a/mcp-server/src/legal_mcp/services/plans_extractor.py b/mcp-server/src/legal_mcp/services/plans_extractor.py index 6a79c74..2fe62c8 100644 --- a/mcp-server/src/legal_mcp/services/plans_extractor.py +++ b/mcp-server/src/legal_mcp/services/plans_extractor.py @@ -182,12 +182,25 @@ async def extract_plans_for_case(case_id: UUID) -> dict: "status": "completed", "candidates": len(cands), }) + # Surface near-duplicates for the chair to merge manually (G10) — never + # auto-merged. A variant of an existing plan written differently won't share + # the normalized key, so flag it here instead of silently creating a dup. + plans_out = list(seen_numbers.values()) + dup_hits = 0 + for p in plans_out: + sims = await db.find_similar_plans( + p["plan_number"], p.get("display_name", ""), exclude_id=UUID(p["id"]), + ) + p["possible_duplicates"] = sims + dup_hits += len(sims) + return { "status": "completed", "case_number": source_case_number, "documents_scanned": len(by_doc), "total_candidates": total_candidates, - "distinct_plans": len(seen_numbers), - "plans": list(seen_numbers.values()), + "distinct_plans": len(plans_out), + "possible_duplicate_hits": dup_hits, + "plans": plans_out, "by_document": by_doc, } diff --git a/web/app.py b/web/app.py index c8125b5..e95d879 100644 --- a/web/app.py +++ b/web/app.py @@ -5774,6 +5774,22 @@ async def api_chair_pending(): "sample": [{"text": r["cn"], "source": r["name"]} for r in se_sample], }) + # 6) תכניות הממתינות לאישור (מרשם-התכניות, V38 / G10) — נראות בבלוק ט רק לאחר אישור + pl_count = await conn.fetchval( + "SELECT count(*) FROM plans WHERE review_status='pending_review'") + pl_oldest = await conn.fetchval( + "SELECT min(created_at) FROM plans WHERE review_status='pending_review'") + pl_sample = await conn.fetch( + "SELECT coalesce(display_name, plan_number) AS name, coalesce(purpose,'') AS purpose " + "FROM plans WHERE review_status='pending_review' ORDER BY created_at ASC LIMIT 5") + categories.append({ + "key": "plans", "label": "תכניות הממתינות לאישור", + "description": "תכניות שחולצו אוטומטית מהחלטות — מצוטטות בבלוק ט רק לאחר אישורך.", + "count": pl_count, "severity": "medium" if pl_count else "ok", + "href": "/precedents", "oldest_at": pl_oldest.isoformat() if pl_oldest else None, + "sample": [{"text": r["name"], "source": (r["purpose"] or "")[:80]} for r in pl_sample], + }) + total_pending = sum(c["count"] for c in categories) return { "total_pending": total_pending, @@ -7436,6 +7452,149 @@ async def halacha_batch_review(req: HalachaBatchReviewRequest): return {"updated": updated} +# ── Planning-schemes registry (V38) — מרשם-התכניות ───────────────── +# Chair review queue + manual add/edit/merge for the canonical plans registry. +# LLM-extracted rows arrive pending_review; only approved validity feeds block-tet +# (INV-DM5/G10). Variant duplicates are surfaced, never auto-merged (chair decides). + +_ALLOWED_PLAN_STATUS = {"pending_review", "approved", "rejected"} + + +class PlanUpsertRequest(BaseModel): + plan_number: str + display_name: str = "" + plan_type: str = "" + gazette_date: str = "" # ISO YYYY-MM-DD (empty = unknown) + yalkut_number: str = "" + purpose: str = "" + review_status: str = "approved" # manual chair add → approved by default + aliases: list[str] = [] + + +class PlanEditRequest(BaseModel): + plan_number: str + display_name: str = "" + plan_type: str = "" + gazette_date: str = "" + yalkut_number: str = "" + purpose: str = "" + aliases: list[str] | None = None + + +class PlanReviewRequest(BaseModel): + review_status: str + + +class PlanMergeRequest(BaseModel): + source_id: str + target_id: str + + +@app.get("/api/plans") +async def plans_list(review_status: str = "", q: str = "", limit: int = 500): + """List the plans registry; filter by review_status (queue) or fuzzy q (search).""" + if review_status and review_status not in _ALLOWED_PLAN_STATUS: + raise HTTPException(400, "review_status לא תקין") + if q: + rows = await db.search_plans(q, limit) + else: + rows = await db.list_plans(review_status, limit) + return {"items": rows, "count": len(rows)} + + +@app.get("/api/plans/{plan_id}/duplicates") +async def plan_duplicates(plan_id: str): + """Near-duplicate candidates for a plan — for the chair to merge (G10, no auto-merge).""" + try: + pid = UUID(plan_id) + except ValueError: + raise HTTPException(400, "plan_id לא תקין") + plan = await db.get_plan_by_id(pid) + if not plan: + raise HTTPException(404, "תכנית לא נמצאה") + sims = await db.find_similar_plans( + plan["plan_number"], plan.get("display_name", ""), exclude_id=pid, + ) + return {"items": sims, "count": len(sims)} + + +@app.get("/api/plans/{plan_id}") +async def plan_get(plan_id: str): + try: + pid = UUID(plan_id) + except ValueError: + raise HTTPException(400, "plan_id לא תקין") + plan = await db.get_plan_by_id(pid) + if not plan: + raise HTTPException(404, "תכנית לא נמצאה") + return plan + + +@app.post("/api/plans") +async def plan_create(req: PlanUpsertRequest): + """Manual chair add/upsert (idempotent on normalized plan_number).""" + if req.review_status not in _ALLOWED_PLAN_STATUS: + raise HTTPException(400, "review_status לא תקין") + try: + return await db.upsert_plan( + plan_number=req.plan_number, display_name=req.display_name, + aliases=req.aliases, plan_type=req.plan_type, + gazette_date=req.gazette_date or None, yalkut_number=req.yalkut_number, + purpose=req.purpose, review_status=req.review_status, + model_used="chair_manual", + ) + except ValueError as e: + raise HTTPException(400, str(e)) + + +@app.patch("/api/plans/{plan_id}") +async def plan_edit(plan_id: str, req: PlanEditRequest): + """Edit/fix a plan by id (chair). Refuses a number collision (→ merge instead).""" + try: + pid = UUID(plan_id) + except ValueError: + raise HTTPException(400, "plan_id לא תקין") + try: + row = await db.update_plan( + pid, plan_number=req.plan_number, display_name=req.display_name, + plan_type=req.plan_type, gazette_date=req.gazette_date or None, + yalkut_number=req.yalkut_number, purpose=req.purpose, aliases=req.aliases, + ) + except ValueError as e: + raise HTTPException(400, str(e)) + if not row: + raise HTTPException(404, "תכנית לא נמצאה") + return row + + +@app.post("/api/plans/{plan_id}/review") +async def plan_review(plan_id: str, req: PlanReviewRequest): + """Chair gate (G10): approve / reject / reset.""" + if req.review_status not in _ALLOWED_PLAN_STATUS: + raise HTTPException(400, "review_status לא תקין") + try: + pid = UUID(plan_id) + except ValueError: + raise HTTPException(400, "plan_id לא תקין") + row = await db.set_plan_review_status(pid, req.review_status) + if not row: + raise HTTPException(404, "תכנית לא נמצאה") + return row + + +@app.post("/api/plans/merge") +async def plan_merge(req: PlanMergeRequest): + """Fold source plan into target (chair-initiated dedup); source is deleted.""" + try: + sid, tid = UUID(req.source_id), UUID(req.target_id) + except ValueError: + raise HTTPException(400, "מזהה לא תקין") + try: + return await db.merge_plans(sid, tid) + except ValueError as e: + raise HTTPException(400, str(e)) + + # ── Missing Precedents (TaskMaster #35) ──────────────────────────── # Track citations from party briefs that aren't yet in the precedent # corpus. Researcher logs gaps; chair closes them by uploading the