feat(halacha): עצירה-רכה של הדריינר בסף-ניצול (75/65) + מקור-אמת יחיד למכסה (#265)
Co-authored-by: Chaim <chaim@marcus-law.co.il> Co-committed-by: Chaim <chaim@marcus-law.co.il>
This commit was merged in pull request #265.
This commit is contained in:
@@ -49,19 +49,12 @@ import json
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import urllib.request
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from glob import glob
|
||||
|
||||
REPO = "/home/chaim/legal-ai"
|
||||
# claude.ai subscription usage — the same 5-hour / 7-day utilization the Claude
|
||||
# Code status bar shows, via the (undocumented) OAuth usage endpoint. The token
|
||||
# lives in the CLI's own credentials file; the claude-code User-Agent is
|
||||
# REQUIRED — without it the request lands in an aggressively rate-limited bucket
|
||||
# and 429s. Unofficial endpoint: may change, so callers must tolerate None.
|
||||
CLAUDE_CRED_PATH = "/home/chaim/.claude/.credentials.json"
|
||||
OAUTH_USAGE_URL = "https://api.anthropic.com/api/oauth/usage"
|
||||
_USAGE_UA = "claude-code/2.1.177"
|
||||
RUNTIME_DIR = "/home/chaim/halacha-drain-monitor" # state (outside repo)
|
||||
STATE = os.path.join(RUNTIME_DIR, "state.json")
|
||||
DRAIN = "legal-halacha-drain"
|
||||
@@ -77,29 +70,17 @@ NIGHT_START, NIGHT_END = 23, 5 # the drain's normal window (IDT hours)
|
||||
CATCHUP_END = 7 # soft window end (IDT) for early-morning catch-up — see fix B
|
||||
|
||||
|
||||
def _env_int(name: str, default: int) -> int:
|
||||
try:
|
||||
return int(os.environ.get(name, default))
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
|
||||
|
||||
# Soft utilization ceilings — stop the drain BEFORE a window actually exhausts
|
||||
# (429s). Hitting a 429 mid-case forces re-extraction of an already-completed
|
||||
# case under the rate limit, DEGRADING it; stopping at the chair's ceilings instead
|
||||
# lets the in-flight halacha case finish cleanly and the drain idle until the
|
||||
# window resets. Reaching a ceiling is treated EXACTLY like 100% exhaustion
|
||||
# (cooldown until that window's resets_at). Per the chair (2026-06-15): the 5-hour
|
||||
# ("hourly session") window stops at 75%, the weekly windows at 65%. Both keys map
|
||||
# to the same windows quota_available / quota_exhausted gate on; overridable via
|
||||
# env for ops tuning without a redeploy.
|
||||
CEILING_FIVE_HOUR = _env_int("HALACHA_DRAIN_CEILING_5H", 75)
|
||||
CEILING_WEEKLY = _env_int("HALACHA_DRAIN_CEILING_WEEKLY", 65)
|
||||
USAGE_CEILINGS = {
|
||||
"five_hour": CEILING_FIVE_HOUR,
|
||||
"seven_day": CEILING_WEEKLY,
|
||||
"seven_day_sonnet": CEILING_WEEKLY,
|
||||
}
|
||||
# Usage-ceiling logic (subscription_usage / ceiling_status / USAGE_CEILINGS) is
|
||||
# the SINGLE source of truth, shared with the drain — see
|
||||
# legal_mcp/services/usage_limits.py. The supervisor runs as system python3, so
|
||||
# put the (stdlib-only) package on the path before importing it. Resolve relative
|
||||
# to THIS file (same as drain_halacha_queue.py) so the module is loaded from the
|
||||
# same checkout the script lives in.
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)),
|
||||
"..", "mcp-server", "src"))
|
||||
from legal_mcp.services.usage_limits import ( # noqa: E402
|
||||
USAGE_CEILINGS, ceiling_status, subscription_usage,
|
||||
)
|
||||
|
||||
|
||||
def _now_utc():
|
||||
@@ -135,31 +116,6 @@ CLAUDE = claude_bin()
|
||||
_ENV = {**os.environ, "HOME": "/home/chaim"}
|
||||
|
||||
|
||||
def subscription_usage() -> dict | None:
|
||||
"""Read the claude.ai subscription usage — the exact 5-hour / 7-day
|
||||
utilization the Claude Code UI shows — from the OAuth usage endpoint.
|
||||
|
||||
Returns the parsed JSON (keys: five_hour, seven_day, seven_day_opus,
|
||||
seven_day_sonnet, extra_usage; each window → {utilization 0-100, resets_at})
|
||||
or None on ANY failure. Undocumented endpoint — every caller must tolerate
|
||||
None and fall back."""
|
||||
try:
|
||||
with open(CLAUDE_CRED_PATH) as f:
|
||||
token = json.load(f)["claudeAiOauth"]["accessToken"]
|
||||
except Exception:
|
||||
return None
|
||||
req = urllib.request.Request(OAUTH_USAGE_URL, headers={
|
||||
"Authorization": f"Bearer {token}",
|
||||
"User-Agent": _USAGE_UA, # required — else aggressive 429
|
||||
"anthropic-beta": "oauth-2025-04-20",
|
||||
})
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=15) as resp:
|
||||
return json.loads(resp.read().decode("utf-8"))
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def quota_available() -> bool:
|
||||
"""Is the claude.ai quota actually usable right now?
|
||||
|
||||
@@ -176,14 +132,10 @@ def quota_available() -> bool:
|
||||
timeout, or limit message → treat as still limited."""
|
||||
usage = subscription_usage()
|
||||
if usage is not None:
|
||||
# A drain run needs the 5-hour window, the weekly all-models cap, AND
|
||||
# the weekly per-model cap all below their ceilings. On this account the
|
||||
# per-model cap that's actually populated is Sonnet (seven_day_opus is
|
||||
# null — no separate Opus cap); the all-models seven_day cap is the
|
||||
# backstop for Opus usage either way. null utilization → treated as 0%.
|
||||
utils = {w: (usage.get(w) or {}).get("utilization") for w in USAGE_CEILINGS}
|
||||
# utilization may be None (window inactive / no data) → treat as 0%.
|
||||
return all((u or 0) < USAGE_CEILINGS[w] for w, u in utils.items())
|
||||
# All gated windows (5-hour, weekly all-models, weekly-Sonnet) must be
|
||||
# below their ceilings — same evaluation the drain uses (ceiling_status).
|
||||
over, _, _ = ceiling_status(usage)
|
||||
return not over
|
||||
# ── fallback: official-CLI probe ──
|
||||
try:
|
||||
r = subprocess.run([CLAUDE, "-p", "Reply with exactly: OK"],
|
||||
@@ -210,23 +162,13 @@ def quota_exhausted():
|
||||
Returns (exhausted: bool, earliest_reset_utc: datetime|None), or None when the
|
||||
endpoint is unreachable (caller falls back to the log scrape). A window counts
|
||||
as exhausting the drain at >= its USAGE_CEILINGS ceiling (the chair's soft
|
||||
stop-before-429 thresholds) — same windows quota_available gates on (5-hour,
|
||||
weekly all-models, weekly-Sonnet)."""
|
||||
stop-before-429 thresholds) — the SAME evaluation the drain gates on
|
||||
(ceiling_status)."""
|
||||
usage = subscription_usage()
|
||||
if usage is None:
|
||||
return None
|
||||
exhausted, resets = False, []
|
||||
for w, ceiling in USAGE_CEILINGS.items():
|
||||
info = usage.get(w) or {}
|
||||
if (info.get("utilization") or 0) >= ceiling:
|
||||
exhausted = True
|
||||
r = info.get("resets_at")
|
||||
if r:
|
||||
try:
|
||||
resets.append(datetime.fromisoformat(r).astimezone(timezone.utc))
|
||||
except Exception:
|
||||
pass
|
||||
return exhausted, (min(resets) if resets else None)
|
||||
over, reset, _ = ceiling_status(usage)
|
||||
return over, reset
|
||||
|
||||
|
||||
# ── DB access (via the repo venv; the module self-configures) ────────────────
|
||||
@@ -544,15 +486,24 @@ def tick():
|
||||
notes.append("התור ריק — אין מה לחלץ.")
|
||||
elif in_cooldown:
|
||||
mode = "weekly_exhausted" if weekly else "ratelimited"
|
||||
# Stop a running drain while limited — otherwise it keeps spawning Opus
|
||||
# calls that 429 on every chunk, burning the very quota we're waiting on
|
||||
# (and burying the 429 signal under teardown noise). It re-ignites via the
|
||||
# normal trigger path once cooldown clears.
|
||||
if status == "online":
|
||||
# Two cooldown causes, two responses:
|
||||
# • FRESH real 429 (log_rl) — the drain is literally failing on every
|
||||
# chunk, burning the quota we're waiting on and re-extracting/degrading
|
||||
# completed cases. HARD-KILL it (pm2 stop); re-ignites once cooldown
|
||||
# clears.
|
||||
# • SOFT ceiling (≥75%/65% with no fresh 429) — there's still quota
|
||||
# headroom, so DON'T kill: the drain reads the SAME ceiling and stops
|
||||
# itself between cases, finishing the in-flight case cleanly. We just
|
||||
# hold (never re-trigger while in_cooldown).
|
||||
if status == "online" and log_rl:
|
||||
stop_drain()
|
||||
action = "hold-stopped"
|
||||
notes.append(f"rate-limit פעיל — הדריינר נעצר כדי לא לבזבז מכסה על 429; "
|
||||
notes.append(f"429 טרי — הדריינר נקטל כדי לא להלום rate-limit; "
|
||||
f"איפוס ~{cd_dt.astimezone(IDT):%H:%M IDT}.")
|
||||
elif status == "online":
|
||||
action = "hold-soft"
|
||||
notes.append(f"סף-ניצול נחצה (אין 429 טרי) — הדריינר יסיים את התיק "
|
||||
f"הנוכחי ויעצור לבד; איפוס ~{cd_dt.astimezone(IDT):%H:%M IDT}.")
|
||||
else:
|
||||
action = "hold"
|
||||
notes.append(f"rate-limit פעיל; איפוס ~{cd_dt.astimezone(IDT):%H:%M IDT}.")
|
||||
|
||||
Reference in New Issue
Block a user