Merge pull request 'fix(principles): cull resumability + cap criterion-A (consensus-first) (#152)' (#305) from worktree-canonical-synthesis into main
Merge PR #305: cull resumability + cap criterion-A (#152)
This commit was merged in pull request #305.
This commit is contained in:
@@ -136,7 +136,10 @@ def apply_cap(judged: list[dict], max_new: int | None = None) -> list[dict]:
|
||||
"""
|
||||
max_new = config.HALACHA_PANEL_MAX_NEW if max_new is None else max_new
|
||||
survivors = [j for j in judged if j.get("verdict") in ("approved", "pending_review")]
|
||||
survivors.sort(key=lambda j: j.get("score", 0.0), reverse=True)
|
||||
# Rank consensus-first, then confidence (chaim 2026-06-20): a unanimous 3-vote
|
||||
# principle outranks a 2-vote one regardless of score — cross-model agreement is
|
||||
# the more reliable keep signal (AC1=0.92). Mirrors cluster_candidates' ordering.
|
||||
survivors.sort(key=lambda j: (j.get("votes", 0), j.get("score", 0.0)), reverse=True)
|
||||
keep_ids = {id(j) for j in survivors[:max_new]}
|
||||
out = []
|
||||
for j in judged:
|
||||
|
||||
@@ -106,17 +106,27 @@ def test_cluster_same_model_twice_counts_one_vote_keeps_best_score():
|
||||
assert cl["rule_statement"] == "X"
|
||||
|
||||
|
||||
def test_apply_cap_downgrades_over_cap_survivors_by_score():
|
||||
def test_apply_cap_downgrades_over_cap_survivors_by_votes_then_score():
|
||||
judged = [
|
||||
{"verdict": "approved", "score": 0.9},
|
||||
{"verdict": "approved", "score": 0.7},
|
||||
{"verdict": "pending_review", "score": 0.8},
|
||||
{"verdict": "rejected", "score": 0.95}, # already rejected stays
|
||||
{"verdict": "approved", "votes": 3, "score": 0.9},
|
||||
{"verdict": "approved", "votes": 3, "score": 0.7},
|
||||
{"verdict": "pending_review", "votes": 2, "score": 0.8},
|
||||
{"verdict": "rejected", "votes": 1, "score": 0.95}, # already rejected stays
|
||||
]
|
||||
out = pe.apply_cap(judged, max_new=2)
|
||||
fv = [j["final_verdict"] for j in out]
|
||||
# top-2 survivors by score = 0.9(approved) + 0.8(pending); 0.7 → over cap → rejected
|
||||
assert fv == ["approved", "rejected", "pending_review", "rejected"]
|
||||
# top-2 by (votes,score) = both 3-vote (0.9, 0.7); the 2-vote/0.8 → over cap → rejected
|
||||
assert fv == ["approved", "approved", "rejected", "rejected"]
|
||||
|
||||
|
||||
def test_apply_cap_votes_outrank_score():
|
||||
# a 2-vote/0.95 must NOT beat a 3-vote/0.80 — consensus dominates confidence
|
||||
judged = [
|
||||
{"verdict": "approved", "votes": 2, "score": 0.95},
|
||||
{"verdict": "approved", "votes": 3, "score": 0.80},
|
||||
]
|
||||
out = pe.apply_cap(judged, max_new=1)
|
||||
assert [j["final_verdict"] for j in out] == ["rejected", "approved"]
|
||||
|
||||
|
||||
def test_apply_cap_keeps_all_when_under_cap():
|
||||
|
||||
@@ -50,11 +50,16 @@ _JUDGE_CONCURRENCY = 4
|
||||
async def _decisions(limit, sample):
|
||||
"""case_law ids that have 'original' principles, with source metadata."""
|
||||
pool = await db.get_pool()
|
||||
# Resumable: skip decisions already culled (their survivors carry reviewer
|
||||
# 'cull:%'), so a continuation run after a ceiling-stop never re-judges them.
|
||||
# (Dry-run never sets that reviewer, so nothing is skipped in dry-run.)
|
||||
rows = await pool.fetch(
|
||||
"SELECT cl.id, cl.case_number, cl.source_kind, cl.is_binding, "
|
||||
" count(*) AS n "
|
||||
"FROM halachot h JOIN case_law cl ON cl.id = h.case_law_id "
|
||||
"WHERE h.instance_type = 'original' AND h.review_status <> 'rejected' "
|
||||
"AND NOT EXISTS (SELECT 1 FROM halachot h2 WHERE h2.case_law_id = cl.id "
|
||||
" AND h2.instance_type = 'original' AND h2.reviewer LIKE 'cull:%') "
|
||||
"GROUP BY cl.id, cl.case_number, cl.source_kind, cl.is_binding "
|
||||
"ORDER BY n DESC",
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user