feat(halacha): עצירה-רכה של הדריינר בסף-ניצול + מקור-אמת יחיד למכסה
בהגיע סף-רך (5-שעות ≥75% / שבועי ≥65%) הדריינר מסיים את התיק שרץ ועוצר לבד בין תיקים (===STOP===) — במקום שהסופרוויזר יקטול אותו ב-pm2 stop באמצע חילוץ. ב-75% יש מכסה לסיים את התיק; הקטילה נשמרת רק ל-429 טרי אמיתי. - חדש legal_mcp/services/usage_limits.py (stdlib-only): מקור-אמת יחיד — subscription_usage / USAGE_CEILINGS / ceiling_status. מיובא גם מ-system-python (supervisor) וגם מה-venv (drain). __init__ ריקים → import בטוח מחוץ ל-venv. - supervisor: מייבא מהמודול (הסרת ההעתקים המקומיים, ~50 שורות פחות); quota_exhausted/quota_available הפכו wrappers דקים; ענף cooldown — קטילה (hold-stopped) רק אם log_rl (429 טרי), אחרת hold-soft בלי pm2 stop. - drain: limit=4→1 (בדיקת-סף בין כל תיק); שער-סף ב-run_in_executor, fail-OPEN כש-endpoint None (הסופרוויזר מגבה ב-429-kill); שמירת קצב 30ש' בין תיקים (pl.INTER_PRECEDENT_COOLDOWN_SEC — limit=1 ביטל את המרווח הפנימי-לסבב). - SCRIPTS.md עודכן (limit=1, שער-סף, hold-soft, מקור-אמת משותף). אומת end-to-end (endpoint חי): (1) drain עם סף מורד → ===STOP=== usage ceiling בלי לעבד תיק; (2) supervisor status=online+סף-רך → action=hold-soft, stop_drain נקרא 0; (3) 429 טרי → hold-stopped, stop_drain נקרא 1. py_compile עובר. court_fetch_service/usage_status (העתק שלישי, async/aiohttp, רגיש-דיפלוי) נדחה במכוון לאיחוד-עתידי — לא נוגעים בגשר-המארח כאן. Invariants: G1 (נרמול-במקור — endpoint יחיד), G2 (אין מסלול-בקרה מקביל — מודול משותף יחיד, drain+supervisor קוראים אותו דבר), X16 (עמידות — עצירה לפני 429 מונעת חילוץ-מחדש משחית). G12 לא רלוונטי. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
103
mcp-server/src/legal_mcp/services/usage_limits.py
Normal file
103
mcp-server/src/legal_mcp/services/usage_limits.py
Normal file
@@ -0,0 +1,103 @@
|
||||
"""claude.ai subscription-usage ceilings — the single source of truth.
|
||||
|
||||
ONE place that reads the (undocumented) OAuth usage endpoint and decides whether
|
||||
a usage window has crossed its soft stop-before-429 ceiling. Imported by BOTH the
|
||||
halacha drain (`scripts/drain_halacha_queue.py`) and its supervisor
|
||||
(`scripts/halacha_drain_supervisor.py`) so the two never drift (G1/G2).
|
||||
|
||||
STRICTLY stdlib — no asyncpg / aiohttp / config imports. The supervisor runs as
|
||||
plain system ``python3`` and imports this module directly; pulling in heavy deps
|
||||
here would break that import. (``legal_mcp/__init__`` and ``services/__init__``
|
||||
are intentionally empty, which is what makes the system-python import work.)
|
||||
|
||||
Soft ceilings (chair, 2026-06-15): stop the drain BEFORE a window exhausts so the
|
||||
in-flight case finishes on the remaining quota and the drain idles until reset,
|
||||
instead of hammering 429 (which burns retries and leaves cases half-extracted).
|
||||
5-hour ("hourly session") window stops at 75%, the weekly windows at 65%.
|
||||
Overridable via env for ops tuning without a redeploy.
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
import urllib.request
|
||||
from datetime import datetime, timezone
|
||||
|
||||
# claude.ai subscription usage. The token lives in the CLI's own credentials
|
||||
# file; the claude-code User-Agent is REQUIRED — without it the request lands in
|
||||
# an aggressively rate-limited bucket and 429s. Unofficial endpoint: may change,
|
||||
# so every caller must tolerate a None return and fall back.
|
||||
CLAUDE_CRED_PATH = "/home/chaim/.claude/.credentials.json"
|
||||
OAUTH_USAGE_URL = "https://api.anthropic.com/api/oauth/usage"
|
||||
USAGE_UA = "claude-code/2.1.177"
|
||||
|
||||
|
||||
def _env_int(name: str, default: int) -> int:
|
||||
try:
|
||||
return int(os.environ.get(name, default))
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
|
||||
|
||||
# Reaching a ceiling is treated EXACTLY like 100% exhaustion (cooldown until that
|
||||
# window's resets_at). Both weekly keys share one threshold; the per-model cap
|
||||
# that's actually populated on this account is Sonnet (seven_day_opus is null) and
|
||||
# the all-models seven_day cap is the backstop for Opus usage either way.
|
||||
CEILING_FIVE_HOUR = _env_int("HALACHA_DRAIN_CEILING_5H", 75)
|
||||
CEILING_WEEKLY = _env_int("HALACHA_DRAIN_CEILING_WEEKLY", 65)
|
||||
USAGE_CEILINGS = {
|
||||
"five_hour": CEILING_FIVE_HOUR,
|
||||
"seven_day": CEILING_WEEKLY,
|
||||
"seven_day_sonnet": CEILING_WEEKLY,
|
||||
}
|
||||
|
||||
|
||||
def subscription_usage() -> dict | None:
|
||||
"""Read the claude.ai subscription usage — the exact 5-hour / 7-day
|
||||
utilization the Claude Code UI shows — from the OAuth usage endpoint.
|
||||
|
||||
Returns the parsed JSON (keys: five_hour, seven_day, seven_day_opus,
|
||||
seven_day_sonnet, extra_usage; each window → {utilization 0-100, resets_at})
|
||||
or None on ANY failure. Undocumented endpoint — every caller must tolerate
|
||||
None and fall back."""
|
||||
try:
|
||||
with open(CLAUDE_CRED_PATH) as f:
|
||||
token = json.load(f)["claudeAiOauth"]["accessToken"]
|
||||
except Exception:
|
||||
return None
|
||||
req = urllib.request.Request(OAUTH_USAGE_URL, headers={
|
||||
"Authorization": f"Bearer {token}",
|
||||
"User-Agent": USAGE_UA, # required — else aggressive 429
|
||||
"anthropic-beta": "oauth-2025-04-20",
|
||||
})
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=15) as resp:
|
||||
return json.loads(resp.read().decode("utf-8"))
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def ceiling_status(usage: dict) -> tuple[bool, datetime | None, str]:
|
||||
"""Evaluate an already-fetched usage dict against USAGE_CEILINGS.
|
||||
|
||||
Returns (over, earliest_reset_utc, detail):
|
||||
• over — True iff ANY gated window is at/above its ceiling
|
||||
• earliest_reset — soonest resets_at among the windows that are over (UTC),
|
||||
or None
|
||||
• detail — short log string, e.g. "5h=78%/75 weekly=40%/65"
|
||||
|
||||
Takes the usage dict as a parameter (does NOT fetch) so the caller owns the
|
||||
single network read. null utilization → treated as 0% (window inactive)."""
|
||||
over, resets, parts = False, [], []
|
||||
label = {"five_hour": "5h", "seven_day": "weekly", "seven_day_sonnet": "weekly-sonnet"}
|
||||
for w, ceiling in USAGE_CEILINGS.items():
|
||||
info = usage.get(w) or {}
|
||||
util = info.get("utilization") or 0
|
||||
parts.append(f"{label.get(w, w)}={util:.0f}%/{ceiling}")
|
||||
if util >= ceiling:
|
||||
over = True
|
||||
r = info.get("resets_at")
|
||||
if r:
|
||||
try:
|
||||
resets.append(datetime.fromisoformat(r).astimezone(timezone.utc))
|
||||
except Exception:
|
||||
pass
|
||||
return over, (min(resets) if resets else None), " ".join(parts)
|
||||
Reference in New Issue
Block a user