feat(supervisor): read real claude.ai usage % from OAuth endpoint for quota gating

The supervisor's quota check used a tiny `claude -p` probe to decide whether the claude.ai subscription had room. That works but is indirect (an Opus-adjacent round trip) and only answers yes/no. Anthropic exposes the actual utilization — the same 5-hour / weekly / weekly-Opus percentages the Claude Code status bar shows — via the (undocumented) GET /api/oauth/usage endpoint. - subscription_usage(): reads the OAuth token from ~/.claude/.credentials.json and GETs /api/oauth/usage with the required `claude-code/*` User-Agent (without it the request hits an aggressively rate-limited bucket and 429s). Returns the parsed {five_hour, seven_day, seven_day_opus, ...} or None on any failure. - quota_available(): now prefers the endpoint — a drain run resumes only when the 5-hour, weekly, AND weekly-Opus windows are all <100% (the extractor runs Opus). More precise than the probe and sees every limit the way the UI does. Falls back to the `claude -p` probe when the endpoint is unreachable (it's undocumented and may change). - `status` subcommand now prints the live percentages + reset times. Note: this is the data/logic layer only. Surfacing the % on the /operations page is a visual UI change and must go through the Claude Design gate first (web-ui/AGENTS.md) — deferred. Invariants: G1 (resume decision driven by the authoritative usage state). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-13 10:19:17 +00:00
parent 8d13e26cc8
commit 9e46db3c48
2 changed files with 74 additions and 8 deletions
--- a/scripts/halacha_drain_supervisor.py
+++ b/scripts/halacha_drain_supervisor.py
@@ -39,10 +39,19 @@ import json
 import os
 import re
 import subprocess
+import urllib.request
 from datetime import datetime, timedelta, timezone
 from glob import glob

 REPO = "/home/chaim/legal-ai"
+# claude.ai subscription usage — the same 5-hour / 7-day utilization the Claude
+# Code status bar shows, via the (undocumented) OAuth usage endpoint. The token
+# lives in the CLI's own credentials file; the claude-code User-Agent is
+# REQUIRED — without it the request lands in an aggressively rate-limited bucket
+# and 429s. Unofficial endpoint: may change, so callers must tolerate None.
+CLAUDE_CRED_PATH = "/home/chaim/.claude/.credentials.json"
+OAUTH_USAGE_URL = "https://api.anthropic.com/api/oauth/usage"
+_USAGE_UA = "claude-code/2.1.177"
 RUNTIME_DIR = "/home/chaim/halacha-drain-monitor"   # state (outside repo)
 STATE = os.path.join(RUNTIME_DIR, "state.json")
 DRAIN = "legal-halacha-drain"
@@ -89,14 +98,53 @@ CLAUDE = claude_bin()
 _ENV = {**os.environ, "HOME": "/home/chaim"}


-def quota_available() -> bool:
-    """Cheap live probe: is the claude.ai quota actually usable right now?
+def subscription_usage() -> dict | None:
+    """Read the claude.ai subscription usage — the exact 5-hour / 7-day
+    utilization the Claude Code UI shows — from the OAuth usage endpoint.

-    The 429 reset time claude.ai reports is often conservative — quota frees up
-    earlier. Rather than trust that timestamp and wait blindly, we re-probe with
-    a tiny `claude -p` call and resume the moment it succeeds. Conservative on
-    failure: any non-zero exit, timeout, or limit message → treat as still
-    limited (so a flaky probe never resumes the drain into a real 429)."""
+    Returns the parsed JSON (keys: five_hour, seven_day, seven_day_opus,
+    seven_day_sonnet, extra_usage; each window → {utilization 0-100, resets_at})
+    or None on ANY failure. Undocumented endpoint — every caller must tolerate
+    None and fall back."""
+    try:
+        with open(CLAUDE_CRED_PATH) as f:
+            token = json.load(f)["claudeAiOauth"]["accessToken"]
+    except Exception:
+        return None
+    req = urllib.request.Request(OAUTH_USAGE_URL, headers={
+        "Authorization": f"Bearer {token}",
+        "User-Agent": _USAGE_UA,            # required — else aggressive 429
+        "anthropic-beta": "oauth-2025-04-20",
+    })
+    try:
+        with urllib.request.urlopen(req, timeout=15) as resp:
+            return json.loads(resp.read().decode("utf-8"))
+    except Exception:
+        return None
+
+
+def quota_available() -> bool:
+    """Is the claude.ai quota actually usable right now?
+
+    Primary: read the authoritative utilization from the OAuth usage endpoint
+    (subscription_usage) and treat a window as exhausted only at >=100%. Cheaper
+    and more precise than a probe — no Opus call, and it sees every limit
+    (5-hour, weekly all-models, weekly-Opus) the way the UI does. The 429 reset
+    time claude.ai reports is often conservative, so this resumes the drain the
+    moment a window actually frees up rather than waiting blindly.
+
+    Fallback (endpoint unreachable — it is undocumented): a tiny `claude -p`
+    probe via the official CLI. Conservative on failure: any non-zero exit,
+    timeout, or limit message → treat as still limited."""
+    usage = subscription_usage()
+    if usage is not None:
+        # A drain run needs the 5-hour window, the weekly all-models cap, AND
+        # the weekly-Opus cap (the extractor runs Opus) all below 100%.
+        windows = ("five_hour", "seven_day", "seven_day_opus")
+        utils = [(usage.get(w) or {}).get("utilization") for w in windows]
+        # utilization may be None (window inactive / no data) → treat as 0%.
+        return all((u or 0) < 100 for u in utils)
+    # ── fallback: official-CLI probe ──
    try:
        r = subprocess.run([CLAUDE, "-p", "Reply with exactly: OK"],
                           capture_output=True, text=True, timeout=60, env=_ENV,
@@ -466,6 +514,24 @@ def cmd_status():
        print(f"תור: pending={st.get('pending')} processing={st.get('processing')} "
              f"done={st.get('done')} | staging halachot={st.get('halachot_total')}")
    print(f"דריינר pm2: {pm2_status()}")
+    usage = subscription_usage()
+    if usage:
+        def _w(key):
+            w = usage.get(key) or {}
+            u = w.get("utilization")
+            if u is None:
+                return "—"
+            r = w.get("resets_at")
+            try:
+                rt = f" (איפוס {datetime.fromisoformat(r).astimezone(IDT):%H:%M}" if r else ""
+                rt += ")" if r else ""
+            except Exception:
+                rt = ""
+            return f"{u:.0f}%{rt}"
+        print(f"מכסת claude.ai: 5-שעות={_w('five_hour')} · שבועי={_w('seven_day')} · "
+              f"שבועי-Opus={_w('seven_day_opus')}")
+    else:
+        print("מכסת claude.ai: (endpoint לא זמין)")


 def main():