feat(halacha): ספי-עצירה-רכים לדריינר — 5-שעות 75% / שבועי 65% (עצירה לפני 429) (#259)
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 9s
G12 Leak-Guard / leak-guard (push) Successful in 4s
Lint — undefined names / undefined-names (push) Successful in 10s

Co-authored-by: Chaim <chaim@marcus-law.co.il>
Co-committed-by: Chaim <chaim@marcus-law.co.il>
This commit was merged in pull request #259.
This commit is contained in:
2026-06-15 03:18:56 +00:00
committed by chaim
parent 76a29756c5
commit 1094ac9967
2 changed files with 50 additions and 21 deletions

View File

@@ -77,6 +77,31 @@ NIGHT_START, NIGHT_END = 23, 5 # the drain's normal window (IDT hours)
CATCHUP_END = 7 # soft window end (IDT) for early-morning catch-up — see fix B
def _env_int(name: str, default: int) -> int:
try:
return int(os.environ.get(name, default))
except (TypeError, ValueError):
return default
# Soft utilization ceilings — stop the drain BEFORE a window actually exhausts
# (429s). Hitting a 429 mid-case forces re-extraction of an already-completed
# case under the rate limit, DEGRADING it; stopping at the chair's ceilings instead
# lets the in-flight halacha case finish cleanly and the drain idle until the
# window resets. Reaching a ceiling is treated EXACTLY like 100% exhaustion
# (cooldown until that window's resets_at). Per the chair (2026-06-15): the 5-hour
# ("hourly session") window stops at 75%, the weekly windows at 65%. Both keys map
# to the same windows quota_available / quota_exhausted gate on; overridable via
# env for ops tuning without a redeploy.
CEILING_FIVE_HOUR = _env_int("HALACHA_DRAIN_CEILING_5H", 75)
CEILING_WEEKLY = _env_int("HALACHA_DRAIN_CEILING_WEEKLY", 65)
USAGE_CEILINGS = {
"five_hour": CEILING_FIVE_HOUR,
"seven_day": CEILING_WEEKLY,
"seven_day_sonnet": CEILING_WEEKLY,
}
def _now_utc():
return datetime.now(timezone.utc)
@@ -139,11 +164,12 @@ def quota_available() -> bool:
"""Is the claude.ai quota actually usable right now?
Primary: read the authoritative utilization from the OAuth usage endpoint
(subscription_usage) and treat a window as exhausted only at >=100%. Cheaper
and more precise than a probe — no Opus call, and it sees every limit
(5-hour, weekly all-models, weekly-Sonnet) the way the UI does. The 429 reset
time claude.ai reports is often conservative, so this resumes the drain the
moment a window actually frees up rather than waiting blindly.
(subscription_usage) and treat a window as exhausted at its USAGE_CEILINGS
ceiling (the chair's soft stop-before-429 thresholds, NOT 100%). Cheaper and
more precise than a probe — no Opus call, and it sees every limit (5-hour,
weekly all-models, weekly-Sonnet) the way the UI does. The 429 reset time
claude.ai reports is often conservative, so this resumes the drain the moment
a window actually frees back under its ceiling rather than waiting blindly.
Fallback (endpoint unreachable — it is undocumented): a tiny `claude -p`
probe via the official CLI. Conservative on failure: any non-zero exit,
@@ -151,14 +177,13 @@ def quota_available() -> bool:
usage = subscription_usage()
if usage is not None:
# A drain run needs the 5-hour window, the weekly all-models cap, AND
# the weekly per-model cap all below 100%. On this account the per-model
# cap that's actually populated is Sonnet (seven_day_opus is null — no
# separate Opus cap); the all-models seven_day cap is the backstop for
# Opus usage either way. null utilization → treated as 0% (not limiting).
windows = ("five_hour", "seven_day", "seven_day_sonnet")
utils = [(usage.get(w) or {}).get("utilization") for w in windows]
# the weekly per-model cap all below their ceilings. On this account the
# per-model cap that's actually populated is Sonnet (seven_day_opus is
# null — no separate Opus cap); the all-models seven_day cap is the
# backstop for Opus usage either way. null utilization → treated as 0%.
utils = {w: (usage.get(w) or {}).get("utilization") for w in USAGE_CEILINGS}
# utilization may be None (window inactive / no data) → treat as 0%.
return all((u or 0) < 100 for u in utils)
return all((u or 0) < USAGE_CEILINGS[w] for w, u in utils.items())
# ── fallback: official-CLI probe ──
try:
r = subprocess.run([CLAUDE, "-p", "Reply with exactly: OK"],
@@ -184,15 +209,16 @@ def quota_exhausted():
Returns (exhausted: bool, earliest_reset_utc: datetime|None), or None when the
endpoint is unreachable (caller falls back to the log scrape). A window counts
as exhausting the drain at >=100% utilization — same windows quota_available
gates on (5-hour, weekly all-models, weekly-Sonnet)."""
as exhausting the drain at >= its USAGE_CEILINGS ceiling (the chair's soft
stop-before-429 thresholds) — same windows quota_available gates on (5-hour,
weekly all-models, weekly-Sonnet)."""
usage = subscription_usage()
if usage is None:
return None
exhausted, resets = False, []
for w in ("five_hour", "seven_day", "seven_day_sonnet"):
for w, ceiling in USAGE_CEILINGS.items():
info = usage.get(w) or {}
if (info.get("utilization") or 0) >= 100:
if (info.get("utilization") or 0) >= ceiling:
exhausted = True
r = info.get("resets_at")
if r:
@@ -606,17 +632,20 @@ def cmd_status():
def _w(key):
w = usage.get(key) or {}
u = w.get("utilization")
cap = USAGE_CEILINGS.get(key)
capf = f"/{cap}%" if cap is not None else ""
if u is None:
return ""
return f"{capf}"
r = w.get("resets_at")
try:
rt = f" (איפוס {datetime.fromisoformat(r).astimezone(IDT):%H:%M}" if r else ""
rt += ")" if r else ""
except Exception:
rt = ""
return f"{u:.0f}%{rt}"
print(f"מכסת claude.ai: 5-שעות={_w('five_hour')} · שבועי={_w('seven_day')} · "
f"שבועי-Sonnet={_w('seven_day_sonnet')}")
hit = "" if (u or 0) >= (cap or 100) else ""
return f"{u:.0f}%{capf}{rt}{hit}"
print(f"מכסת claude.ai (ניצול/סף-עצירה): 5-שעות={_w('five_hour')} · "
f"שבועי={_w('seven_day')} · שבועי-Sonnet={_w('seven_day_sonnet')}")
else:
print("מכסת claude.ai: (endpoint לא זמין)")