From c8344342a81a6f00c9a4a78275086ee8870e135a Mon Sep 17 00:00:00 2001
From: Chaim <chaim@marcus-law.co.il>
Date: Mon, 8 Jun 2026 10:57:57 +0000
Subject: [PATCH] =?UTF-8?q?fix(style-panel):=20idempotency=20+=20dedup=20?=
 =?UTF-8?q?=E2=80=94=20re-running=20--apply=20never=20duplicates=20lessons?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

style_lesson_panel.py: before writing 2/2-keep lessons, skip any whose normalized
lesson_text already exists on the corpus (any source), and collapse duplicates within
a run. Makes the run-learning button safe to click repeatedly (the curator may re-run
the pipeline) — it converges instead of piling up duplicate decision_lessons.

Verified on בל"מ 8126-03-25: re-running --apply with 7 existing lessons wrote 0
("1 כפילויות דולגו"), count stayed 7.

Invariants: INV-LRN1/G10 unchanged (proposals only, manual fold).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 scripts/style_lesson_panel.py | 38 +++++++++++++++++++++++++++++++----
 1 file changed, 34 insertions(+), 4 deletions(-)

diff --git a/scripts/style_lesson_panel.py b/scripts/style_lesson_panel.py
index eaa1da5..2533298 100644
--- a/scripts/style_lesson_panel.py
+++ b/scripts/style_lesson_panel.py
@@ -199,6 +199,23 @@ async def _resolve_corpus_id(decision_number: str) -> str | None:
     return str(row["id"]) if row else None
 
 
+def _norm(text: str) -> str:
+    """Normalize a lesson for dedup — collapse whitespace, strip."""
+    return " ".join((text or "").split())
+
+
+async def _existing_lesson_texts(corpus_id: str) -> set[str]:
+    """Normalized lesson_texts already attached to this corpus (any source) —
+    so re-running --apply is idempotent and never duplicates a lesson."""
+    pool = await db.get_pool()
+    async with pool.acquire() as conn:
+        rows = await conn.fetch(
+            "SELECT lesson_text FROM decision_lessons WHERE style_corpus_id = $1",
+            UUID(corpus_id),
+        )
+    return {_norm(r["lesson_text"]) for r in rows}
+
+
 async def _load_pair(args) -> dict | None:
     if args.pair_id:
         return await db.get_draft_final_pair(UUID(args.pair_id))
@@ -281,6 +298,19 @@ async def main(args: argparse.Namespace) -> int:
         return 1
 
     keeps = [r for r in results if r["_verdict"] == "agree_yes" and _lesson_text(r["_change"])]
+
+    # Idempotency / dedup — skip keeps already attached to the corpus (any source),
+    # and collapse duplicates WITHIN this run. Re-running --apply writes nothing new.
+    existing = await _existing_lesson_texts(corpus_id)
+    fresh, seen = [], set(existing)
+    for r in keeps:
+        n = _norm(_lesson_text(r["_change"]))
+        if n in seen:
+            continue
+        seen.add(n)
+        fresh.append(r)
+    skipped_dup = len(keeps) - len(fresh)
+
     ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
     audit = Path(__file__).resolve().parent.parent / "data" / "audit"
     audit.mkdir(parents=True, exist_ok=True)
@@ -288,12 +318,12 @@ async def main(args: argparse.Namespace) -> int:
     with backup.open("w", encoding="utf-8", newline="") as f:
         w = csv.writer(f)
         w.writerow(["corpus_id", "category", "source", "lesson_text"])
-        for r in keeps:
+        for r in fresh:
             w.writerow([corpus_id, _category(r["_change"]), "panel:deepseek+gemini",
                         _lesson_text(r["_change"])])
 
     written = 0
-    for r in keeps:
+    for r in fresh:
         await db.add_decision_lesson(
             UUID(corpus_id),
             lesson_text=_lesson_text(r["_change"]),
@@ -305,8 +335,8 @@ async def main(args: argparse.Namespace) -> int:
 
     chair = cc["split"] + cc["incomplete"]
     print(f"\nAPPLIED (reversible): wrote {written} decision_lesson proposals "
-          f"(source=panel:deepseek+gemini) · {chair} escalated to chair · "
-          f"{len(substance)} substance skipped")
+          f"(source=panel:deepseek+gemini) · {skipped_dup} כפילויות דולגו · "
+          f"{chair} escalated to chair · {len(substance)} substance skipped")
     print(f"backup → {backup}")
     print("NB: fold into SKILL.md / legal-decision-lessons.md stays a manual chair gate (INV-G10).")
     return 0