fix(principles): cap ranks consensus-first (votes, then score) — criterion A (#152)
chaim 2026-06-20: a unanimous 3-vote principle must outrank a 2-vote one regardless of score (cross-model agreement is the more reliable keep signal). apply_cap now sorts survivors by (votes, score), matching cluster_candidates. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -106,17 +106,27 @@ def test_cluster_same_model_twice_counts_one_vote_keeps_best_score():
|
||||
assert cl["rule_statement"] == "X"
|
||||
|
||||
|
||||
def test_apply_cap_downgrades_over_cap_survivors_by_score():
|
||||
def test_apply_cap_downgrades_over_cap_survivors_by_votes_then_score():
|
||||
judged = [
|
||||
{"verdict": "approved", "score": 0.9},
|
||||
{"verdict": "approved", "score": 0.7},
|
||||
{"verdict": "pending_review", "score": 0.8},
|
||||
{"verdict": "rejected", "score": 0.95}, # already rejected stays
|
||||
{"verdict": "approved", "votes": 3, "score": 0.9},
|
||||
{"verdict": "approved", "votes": 3, "score": 0.7},
|
||||
{"verdict": "pending_review", "votes": 2, "score": 0.8},
|
||||
{"verdict": "rejected", "votes": 1, "score": 0.95}, # already rejected stays
|
||||
]
|
||||
out = pe.apply_cap(judged, max_new=2)
|
||||
fv = [j["final_verdict"] for j in out]
|
||||
# top-2 survivors by score = 0.9(approved) + 0.8(pending); 0.7 → over cap → rejected
|
||||
assert fv == ["approved", "rejected", "pending_review", "rejected"]
|
||||
# top-2 by (votes,score) = both 3-vote (0.9, 0.7); the 2-vote/0.8 → over cap → rejected
|
||||
assert fv == ["approved", "approved", "rejected", "rejected"]
|
||||
|
||||
|
||||
def test_apply_cap_votes_outrank_score():
|
||||
# a 2-vote/0.95 must NOT beat a 3-vote/0.80 — consensus dominates confidence
|
||||
judged = [
|
||||
{"verdict": "approved", "votes": 2, "score": 0.95},
|
||||
{"verdict": "approved", "votes": 3, "score": 0.80},
|
||||
]
|
||||
out = pe.apply_cap(judged, max_new=1)
|
||||
assert [j["final_verdict"] for j in out] == ["rejected", "approved"]
|
||||
|
||||
|
||||
def test_apply_cap_keeps_all_when_under_cap():
|
||||
|
||||
Reference in New Issue
Block a user