fix(style-panel): idempotency + dedup — הרצה חוזרת לא משכפלת לקחים #162
@@ -199,6 +199,23 @@ async def _resolve_corpus_id(decision_number: str) -> str | None:
|
|||||||
return str(row["id"]) if row else None
|
return str(row["id"]) if row else None
|
||||||
|
|
||||||
|
|
||||||
|
def _norm(text: str) -> str:
|
||||||
|
"""Normalize a lesson for dedup — collapse whitespace, strip."""
|
||||||
|
return " ".join((text or "").split())
|
||||||
|
|
||||||
|
|
||||||
|
async def _existing_lesson_texts(corpus_id: str) -> set[str]:
|
||||||
|
"""Normalized lesson_texts already attached to this corpus (any source) —
|
||||||
|
so re-running --apply is idempotent and never duplicates a lesson."""
|
||||||
|
pool = await db.get_pool()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
rows = await conn.fetch(
|
||||||
|
"SELECT lesson_text FROM decision_lessons WHERE style_corpus_id = $1",
|
||||||
|
UUID(corpus_id),
|
||||||
|
)
|
||||||
|
return {_norm(r["lesson_text"]) for r in rows}
|
||||||
|
|
||||||
|
|
||||||
async def _load_pair(args) -> dict | None:
|
async def _load_pair(args) -> dict | None:
|
||||||
if args.pair_id:
|
if args.pair_id:
|
||||||
return await db.get_draft_final_pair(UUID(args.pair_id))
|
return await db.get_draft_final_pair(UUID(args.pair_id))
|
||||||
@@ -281,6 +298,19 @@ async def main(args: argparse.Namespace) -> int:
|
|||||||
return 1
|
return 1
|
||||||
|
|
||||||
keeps = [r for r in results if r["_verdict"] == "agree_yes" and _lesson_text(r["_change"])]
|
keeps = [r for r in results if r["_verdict"] == "agree_yes" and _lesson_text(r["_change"])]
|
||||||
|
|
||||||
|
# Idempotency / dedup — skip keeps already attached to the corpus (any source),
|
||||||
|
# and collapse duplicates WITHIN this run. Re-running --apply writes nothing new.
|
||||||
|
existing = await _existing_lesson_texts(corpus_id)
|
||||||
|
fresh, seen = [], set(existing)
|
||||||
|
for r in keeps:
|
||||||
|
n = _norm(_lesson_text(r["_change"]))
|
||||||
|
if n in seen:
|
||||||
|
continue
|
||||||
|
seen.add(n)
|
||||||
|
fresh.append(r)
|
||||||
|
skipped_dup = len(keeps) - len(fresh)
|
||||||
|
|
||||||
ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
|
ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
|
||||||
audit = Path(__file__).resolve().parent.parent / "data" / "audit"
|
audit = Path(__file__).resolve().parent.parent / "data" / "audit"
|
||||||
audit.mkdir(parents=True, exist_ok=True)
|
audit.mkdir(parents=True, exist_ok=True)
|
||||||
@@ -288,12 +318,12 @@ async def main(args: argparse.Namespace) -> int:
|
|||||||
with backup.open("w", encoding="utf-8", newline="") as f:
|
with backup.open("w", encoding="utf-8", newline="") as f:
|
||||||
w = csv.writer(f)
|
w = csv.writer(f)
|
||||||
w.writerow(["corpus_id", "category", "source", "lesson_text"])
|
w.writerow(["corpus_id", "category", "source", "lesson_text"])
|
||||||
for r in keeps:
|
for r in fresh:
|
||||||
w.writerow([corpus_id, _category(r["_change"]), "panel:deepseek+gemini",
|
w.writerow([corpus_id, _category(r["_change"]), "panel:deepseek+gemini",
|
||||||
_lesson_text(r["_change"])])
|
_lesson_text(r["_change"])])
|
||||||
|
|
||||||
written = 0
|
written = 0
|
||||||
for r in keeps:
|
for r in fresh:
|
||||||
await db.add_decision_lesson(
|
await db.add_decision_lesson(
|
||||||
UUID(corpus_id),
|
UUID(corpus_id),
|
||||||
lesson_text=_lesson_text(r["_change"]),
|
lesson_text=_lesson_text(r["_change"]),
|
||||||
@@ -305,8 +335,8 @@ async def main(args: argparse.Namespace) -> int:
|
|||||||
|
|
||||||
chair = cc["split"] + cc["incomplete"]
|
chair = cc["split"] + cc["incomplete"]
|
||||||
print(f"\nAPPLIED (reversible): wrote {written} decision_lesson proposals "
|
print(f"\nAPPLIED (reversible): wrote {written} decision_lesson proposals "
|
||||||
f"(source=panel:deepseek+gemini) · {chair} escalated to chair · "
|
f"(source=panel:deepseek+gemini) · {skipped_dup} כפילויות דולגו · "
|
||||||
f"{len(substance)} substance skipped")
|
f"{chair} escalated to chair · {len(substance)} substance skipped")
|
||||||
print(f"backup → {backup}")
|
print(f"backup → {backup}")
|
||||||
print("NB: fold into SKILL.md / legal-decision-lessons.md stays a manual chair gate (INV-G10).")
|
print("NB: fold into SKILL.md / legal-decision-lessons.md stays a manual chair gate (INV-G10).")
|
||||||
return 0
|
return 0
|
||||||
|
|||||||
Reference in New Issue
Block a user