From bfc034b44cadaf06439d16dc348235cb368d0087 Mon Sep 17 00:00:00 2001 From: Chaim Date: Fri, 19 Jun 2026 14:21:27 +0000 Subject: [PATCH] =?UTF-8?q?fix(principles):=20cull=20skips=20already-culle?= =?UTF-8?q?d=20decisions=20=E2=80=94=20resumable=20across=20ceiling-stops?= =?UTF-8?q?=20(#152)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.8 (1M context) --- scripts/cull_principles.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scripts/cull_principles.py b/scripts/cull_principles.py index 0b13ddc..2d52717 100644 --- a/scripts/cull_principles.py +++ b/scripts/cull_principles.py @@ -50,11 +50,16 @@ _JUDGE_CONCURRENCY = 4 async def _decisions(limit, sample): """case_law ids that have 'original' principles, with source metadata.""" pool = await db.get_pool() + # Resumable: skip decisions already culled (their survivors carry reviewer + # 'cull:%'), so a continuation run after a ceiling-stop never re-judges them. + # (Dry-run never sets that reviewer, so nothing is skipped in dry-run.) rows = await pool.fetch( "SELECT cl.id, cl.case_number, cl.source_kind, cl.is_binding, " " count(*) AS n " "FROM halachot h JOIN case_law cl ON cl.id = h.case_law_id " "WHERE h.instance_type = 'original' AND h.review_status <> 'rejected' " + "AND NOT EXISTS (SELECT 1 FROM halachot h2 WHERE h2.case_law_id = cl.id " + " AND h2.instance_type = 'original' AND h2.reviewer LIKE 'cull:%') " "GROUP BY cl.id, cl.case_number, cl.source_kind, cl.is_binding " "ORDER BY n DESC", )