feat(halacha): ספי-עצירה-רכים לדריינר — 5-שעות 75% / שבועי 65% (עצירה לפני 429) (#259)

Co-authored-by: Chaim <chaim@marcus-law.co.il> Co-committed-by: Chaim <chaim@marcus-law.co.il>
2026-06-15 03:18:56 +00:00
parent 76a29756c5
commit 1094ac9967
2 changed files with 50 additions and 21 deletions
--- a/scripts/halacha_drain_supervisor.py
+++ b/scripts/halacha_drain_supervisor.py
@@ -77,6 +77,31 @@ NIGHT_START, NIGHT_END = 23, 5       # the drain's normal window (IDT hours)
 CATCHUP_END = 7        # soft window end (IDT) for early-morning catch-up — see fix B


+def _env_int(name: str, default: int) -> int:
+    try:
+        return int(os.environ.get(name, default))
+    except (TypeError, ValueError):
+        return default
+
+
+# Soft utilization ceilings — stop the drain BEFORE a window actually exhausts
+# (429s). Hitting a 429 mid-case forces re-extraction of an already-completed
+# case under the rate limit, DEGRADING it; stopping at the chair's ceilings instead
+# lets the in-flight halacha case finish cleanly and the drain idle until the
+# window resets. Reaching a ceiling is treated EXACTLY like 100% exhaustion
+# (cooldown until that window's resets_at). Per the chair (2026-06-15): the 5-hour
+# ("hourly session") window stops at 75%, the weekly windows at 65%. Both keys map
+# to the same windows quota_available / quota_exhausted gate on; overridable via
+# env for ops tuning without a redeploy.
+CEILING_FIVE_HOUR = _env_int("HALACHA_DRAIN_CEILING_5H", 75)
+CEILING_WEEKLY = _env_int("HALACHA_DRAIN_CEILING_WEEKLY", 65)
+USAGE_CEILINGS = {
+    "five_hour": CEILING_FIVE_HOUR,
+    "seven_day": CEILING_WEEKLY,
+    "seven_day_sonnet": CEILING_WEEKLY,
+}
+
+
 def _now_utc():
    return datetime.now(timezone.utc)

@@ -139,11 +164,12 @@ def quota_available() -> bool:
    """Is the claude.ai quota actually usable right now?

    Primary: read the authoritative utilization from the OAuth usage endpoint
-    (subscription_usage) and treat a window as exhausted only at >=100%. Cheaper
-    and more precise than a probe — no Opus call, and it sees every limit
-    (5-hour, weekly all-models, weekly-Sonnet) the way the UI does. The 429 reset
-    time claude.ai reports is often conservative, so this resumes the drain the
-    moment a window actually frees up rather than waiting blindly.
+    (subscription_usage) and treat a window as exhausted at its USAGE_CEILINGS
+    ceiling (the chair's soft stop-before-429 thresholds, NOT 100%). Cheaper and
+    more precise than a probe — no Opus call, and it sees every limit (5-hour,
+    weekly all-models, weekly-Sonnet) the way the UI does. The 429 reset time
+    claude.ai reports is often conservative, so this resumes the drain the moment
+    a window actually frees back under its ceiling rather than waiting blindly.

    Fallback (endpoint unreachable — it is undocumented): a tiny `claude -p`
    probe via the official CLI. Conservative on failure: any non-zero exit,
@@ -151,14 +177,13 @@ def quota_available() -> bool:
    usage = subscription_usage()
    if usage is not None:
        # A drain run needs the 5-hour window, the weekly all-models cap, AND
-        # the weekly per-model cap all below 100%. On this account the per-model
-        # cap that's actually populated is Sonnet (seven_day_opus is null — no
-        # separate Opus cap); the all-models seven_day cap is the backstop for
-        # Opus usage either way. null utilization → treated as 0% (not limiting).
-        windows = ("five_hour", "seven_day", "seven_day_sonnet")
-        utils = [(usage.get(w) or {}).get("utilization") for w in windows]
+        # the weekly per-model cap all below their ceilings. On this account the
+        # per-model cap that's actually populated is Sonnet (seven_day_opus is
+        # null — no separate Opus cap); the all-models seven_day cap is the
+        # backstop for Opus usage either way. null utilization → treated as 0%.
+        utils = {w: (usage.get(w) or {}).get("utilization") for w in USAGE_CEILINGS}
        # utilization may be None (window inactive / no data) → treat as 0%.
-        return all((u or 0) < 100 for u in utils)
+        return all((u or 0) < USAGE_CEILINGS[w] for w, u in utils.items())
    # ── fallback: official-CLI probe ──
    try:
        r = subprocess.run([CLAUDE, "-p", "Reply with exactly: OK"],
@@ -184,15 +209,16 @@ def quota_exhausted():

    Returns (exhausted: bool, earliest_reset_utc: datetime|None), or None when the
    endpoint is unreachable (caller falls back to the log scrape). A window counts
-    as exhausting the drain at >=100% utilization — same windows quota_available
-    gates on (5-hour, weekly all-models, weekly-Sonnet)."""
+    as exhausting the drain at >= its USAGE_CEILINGS ceiling (the chair's soft
+    stop-before-429 thresholds) — same windows quota_available gates on (5-hour,
+    weekly all-models, weekly-Sonnet)."""
    usage = subscription_usage()
    if usage is None:
        return None
    exhausted, resets = False, []
-    for w in ("five_hour", "seven_day", "seven_day_sonnet"):
+    for w, ceiling in USAGE_CEILINGS.items():
        info = usage.get(w) or {}
-        if (info.get("utilization") or 0) >= 100:
+        if (info.get("utilization") or 0) >= ceiling:
            exhausted = True
            r = info.get("resets_at")
            if r:
@@ -606,17 +632,20 @@ def cmd_status():
        def _w(key):
            w = usage.get(key) or {}
            u = w.get("utilization")
+            cap = USAGE_CEILINGS.get(key)
+            capf = f"/{cap}%" if cap is not None else ""
            if u is None:
-                return "—"
+                return f"—{capf}"
            r = w.get("resets_at")
            try:
                rt = f" (איפוס {datetime.fromisoformat(r).astimezone(IDT):%H:%M}" if r else ""
                rt += ")" if r else ""
            except Exception:
                rt = ""
-            return f"{u:.0f}%{rt}"
-        print(f"מכסת claude.ai: 5-שעות={_w('five_hour')} · שבועי={_w('seven_day')} · "
-              f"שבועי-Sonnet={_w('seven_day_sonnet')}")
+            hit = " ⛔" if (u or 0) >= (cap or 100) else ""
+            return f"{u:.0f}%{capf}{rt}{hit}"
+        print(f"מכסת claude.ai (ניצול/סף-עצירה): 5-שעות={_w('five_hour')} · "
+              f"שבועי={_w('seven_day')} · שבועי-Sonnet={_w('seven_day_sonnet')}")
    else:
        print("מכסת claude.ai: (endpoint לא זמין)")