diff --git a/scripts/cull_principles.py b/scripts/cull_principles.py index 0b13ddc..2d52717 100644 --- a/scripts/cull_principles.py +++ b/scripts/cull_principles.py @@ -50,11 +50,16 @@ _JUDGE_CONCURRENCY = 4 async def _decisions(limit, sample): """case_law ids that have 'original' principles, with source metadata.""" pool = await db.get_pool() + # Resumable: skip decisions already culled (their survivors carry reviewer + # 'cull:%'), so a continuation run after a ceiling-stop never re-judges them. + # (Dry-run never sets that reviewer, so nothing is skipped in dry-run.) rows = await pool.fetch( "SELECT cl.id, cl.case_number, cl.source_kind, cl.is_binding, " " count(*) AS n " "FROM halachot h JOIN case_law cl ON cl.id = h.case_law_id " "WHERE h.instance_type = 'original' AND h.review_status <> 'rejected' " + "AND NOT EXISTS (SELECT 1 FROM halachot h2 WHERE h2.case_law_id = cl.id " + " AND h2.instance_type = 'original' AND h2.reviewer LIKE 'cull:%') " "GROUP BY cl.id, cl.case_number, cl.source_kind, cl.is_binding " "ORDER BY n DESC", )