feat(principles): retroactive cull (Phase C) + source-derived terminology (Phase D, #152)
All checks were successful
G12 Leak-Guard / leak-guard (pull_request) Successful in 4s
Lint — undefined names / undefined-names (pull_request) Successful in 11s

Phase C — scripts/cull_principles.py: re-adjudicates every existing 'original'
principle with the SAME panel regime (panel_keep_score → classify → apply_cap),
reversible (CSV backup + rejected canonical recoverable), usage-throttled.
panel_extraction.panel_keep_score + apply_cap (shared, G2). Dry-run on 3
decisions: 37→15 survive.

Phase D — services/principles.py: source-derived label הלכה (binding court) /
כלל פרשני (committee) / עיקרון (persuasive); umbrella עקרונות משפטיים. Wired into
canonical_halacha_get/list (principle_class+principle_label). UI string changes
deferred to the Claude Design gate. spec INV-LRN7; SCRIPTS.md; 7 new tests; 428 green.

Phase E needs no new code — synthesis already targets pending_synthesis, which the
cull leaves only on survivors (rejected canonicals → 'rejected').

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-19 11:14:59 +00:00
parent 6b2fd562ae
commit 4ca907b97f
9 changed files with 426 additions and 14 deletions

View File

@@ -126,6 +126,29 @@ def classify(votes: int, score: float) -> str:
return "rejected"
def apply_cap(judged: list[dict], max_new: int | None = None) -> list[dict]:
"""Per-decision cap for the retroactive cull (#152, Phase C).
``judged`` = a decision's principles, each with a panel ``verdict`` + ``score``.
Survivors (approved/pending_review) are ranked by score; those beyond ``max_new``
are downgraded to 'rejected' (over-cap). Already-rejected stay rejected. Returns
a new list with ``final_verdict`` set on each (order preserved). Pure.
"""
max_new = config.HALACHA_PANEL_MAX_NEW if max_new is None else max_new
survivors = [j for j in judged if j.get("verdict") in ("approved", "pending_review")]
survivors.sort(key=lambda j: j.get("score", 0.0), reverse=True)
keep_ids = {id(j) for j in survivors[:max_new]}
out = []
for j in judged:
v = j.get("verdict")
if v in ("approved", "pending_review") and id(j) not in keep_ids:
final = "rejected" # over the cap
else:
final = v
out.append({**j, "final_verdict": final})
return out
def cluster_candidates(
per_model: dict[str, list[dict]], embs: dict[int, list[float]],
) -> list[dict]:
@@ -195,6 +218,63 @@ def cluster_candidates(
return out
def _keep_score_system(source_kind: str, is_binding: bool) -> str:
if source_kind == "internal_committee":
nature = ("המקור הוא החלטת ועדת-ערר (מיישמת דין, אינה יוצרת הלכה). ראוי-לשמירה = "
"כלל פרשני חדש ובר-הכללה שהוועדה גיבשה; לא-ראוי = יישום תלוי-עובדות, "
"חזרה על דין מוכר, אמרת-אגב, או חזרה מילולית על הציטוט.")
else:
nature = ("ראוי-לשמירה = עיקרון משפטי בר-הכללה והסתמכות (הלכה/פרשנות/כלל-פרוצדורלי); "
"לא-ראוי = החלה תלוית-עובדות, אמרת-אגב, או חזרה מילולית על הציטוט.")
return (
"אתה משפטן בכיר בוועדת ערר לתכנון ובנייה. הוכרע אם עיקרון שחולץ מפסיקה ראוי "
f"להישמר כתקדים בר-ציטוט. {nature}\n"
"תן גם ציון-ביטחון 0-1 לכך שזהו עיקרון בר-הסתמכות אמיתי.\n"
'החזר JSON בלבד: {"keep": true/false, "score": 0.0-1.0, "reason": "<משפט קצר>"}. ללא markdown.'
)
async def panel_keep_score(
rule_statement: str,
supporting_quote: str,
reasoning_summary: str = "",
*,
source_kind: str = "external_upload",
is_binding: bool = True,
) -> dict:
"""Run the 3-judge panel on ONE existing principle (Phase C cull, #152).
Each judge votes keep + score; votes = # keepers, score = mean of the keepers'
scores (chaim: "ממוצע המצביעים"), verdict via the shared :func:`classify`.
Returns {votes, score, verdict, voters, per_judge} — per_judge keeps raw
replies for the active-learning round (FU-1). Used by the retroactive cull;
the extractor uses :func:`panel_extract` instead.
"""
import asyncio
system = _keep_score_system(source_kind, is_binding)
user = (f"ניסוח העיקרון:\n{rule_statement}\n\n"
f"היגיון:\n{reasoning_summary}\n\nציטוט תומך:\n{supporting_quote}")
async with httpx.AsyncClient() as client:
c, ds, gm = await asyncio.gather(
panel_judges.judge_claude(system, user, max_tokens=300),
panel_judges.judge_deepseek(client, system, user, max_tokens=300),
panel_judges.judge_gemini(client, system, user, max_tokens=2000),
)
raw = {"claude": c, "deepseek": ds, "gemini": gm}
keepers, scores = [], []
for name, reply in raw.items():
if panel_judges.to_bool(reply, "keep"):
keepers.append(name)
try:
scores.append(max(0.0, min(1.0, float(reply.get("score", 0.0)))))
except (TypeError, ValueError):
scores.append(0.0)
votes = len(keepers)
score = round(sum(scores) / votes, 4) if votes else 0.0
return {"votes": votes, "score": score, "verdict": classify(votes, score),
"voters": sorted(keepers), "per_judge": raw}
async def _run_three(system: str, user: str, max_tokens: int) -> dict[str, object]:
async with httpx.AsyncClient() as client:
import asyncio