refactor(court-fetch): usage_status קורא ממקור-האמת המשותף usage_limits
מסיר את ההעתק השלישי של קריאת ה-OAuth usage endpoint. usage_status שומר את ה-cache-60ש' + serve-stale שלו, אבל את ה-fetch הגולמי קורא כעת מ- legal_mcp.services.usage_limits.subscription_usage() (אותו קורא שהדריינר והסופרוויזר מגודרים עליו) דרך run_in_executor — כי הקורא סינכרוני (urllib). - הסרת 3 הקבועים המשוכפלים (_CLAUDE_CRED_PATH/_OAUTH_USAGE_URL/_USAGE_UA). - התנהגות זהה: HTTP 200 עם נתוני-ניצול, serve-stale ב-None, 502 אם אין cache. אומת: import תחת venv תקין; usage_status(None) → HTTP 200, five_hour=50% / seven_day=45% דרך הקורא המשותף. py_compile עובר. הערה: שירות host-side (legal-court-fetch-service, pm2) — דורש pm2 restart אחרי סנכרון לעץ הראשי. Invariants: G1/G2 — מקור-אמת יחיד אחד ויחיד לקריאת-המכסה (כל שלושת הקוראים). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -43,6 +43,7 @@ if _pkg_root not in sys.path:
|
|||||||
sys.path.insert(0, _pkg_root)
|
sys.path.insert(0, _pkg_root)
|
||||||
|
|
||||||
from legal_mcp.court_fetch_service import camofox_client # noqa: E402
|
from legal_mcp.court_fetch_service import camofox_client # noqa: E402
|
||||||
|
from legal_mcp.services import usage_limits # noqa: E402
|
||||||
|
|
||||||
logger = logging.getLogger("legal_court_fetch_service")
|
logger = logging.getLogger("legal_court_fetch_service")
|
||||||
|
|
||||||
@@ -95,14 +96,6 @@ async def _pm2_run(*args: str, timeout: float = 10) -> tuple[int, bytes, bytes]:
|
|||||||
return proc.returncode or 0, out, err
|
return proc.returncode or 0, out, err
|
||||||
|
|
||||||
|
|
||||||
# claude.ai subscription usage — the 5-hour / weekly utilization % the Claude
|
|
||||||
# Code status bar shows, from the (undocumented) OAuth usage endpoint. Host-only:
|
|
||||||
# the OAuth token lives in the CLI credentials file on the host, never in the
|
|
||||||
# container. Read-only (no auth), like /pm2. The claude-code User-Agent is
|
|
||||||
# REQUIRED — without it the request lands in an aggressively rate-limited bucket.
|
|
||||||
_CLAUDE_CRED_PATH = "/home/chaim/.claude/.credentials.json"
|
|
||||||
_OAUTH_USAGE_URL = "https://api.anthropic.com/api/oauth/usage"
|
|
||||||
_USAGE_UA = "claude-code/2.1.177"
|
|
||||||
# /operations polls every 5s; the usage endpoint 429s if hit that often (it's
|
# /operations polls every 5s; the usage endpoint 429s if hit that often (it's
|
||||||
# meant for a status bar, not a poll loop). Cache the last good payload and only
|
# meant for a status bar, not a poll loop). Cache the last good payload and only
|
||||||
# re-fetch when older than this — Anthropic sees ~1 req/min regardless of how
|
# re-fetch when older than this — Anthropic sees ~1 req/min regardless of how
|
||||||
@@ -115,35 +108,26 @@ async def usage_status(request: web.Request) -> web.Response:
|
|||||||
"""Proxy the claude.ai subscription usage % (host-only — needs the local
|
"""Proxy the claude.ai subscription usage % (host-only — needs the local
|
||||||
OAuth token), cached for _USAGE_TTL_SEC. On a fetch failure (e.g. the
|
OAuth token), cached for _USAGE_TTL_SEC. On a fetch failure (e.g. the
|
||||||
endpoint's own 429) serve the last good payload if we have one, so a
|
endpoint's own 429) serve the last good payload if we have one, so a
|
||||||
transient limit doesn't blank the dashboard."""
|
transient limit doesn't blank the dashboard.
|
||||||
|
|
||||||
|
The raw OAuth read is the SHARED single source of truth
|
||||||
|
(legal_mcp.services.usage_limits.subscription_usage) — the SAME reader the
|
||||||
|
halacha drain + supervisor gate on (G1/G2; no triplicated endpoint/creds/UA
|
||||||
|
constants). It's synchronous urllib, so run it in a thread to keep the aiohttp
|
||||||
|
event loop responsive."""
|
||||||
now = time.monotonic()
|
now = time.monotonic()
|
||||||
if _usage_cache["data"] is not None and (now - _usage_cache["ts"]) < _USAGE_TTL_SEC:
|
if _usage_cache["data"] is not None and (now - _usage_cache["ts"]) < _USAGE_TTL_SEC:
|
||||||
return web.json_response(_usage_cache["data"])
|
return web.json_response(_usage_cache["data"])
|
||||||
|
|
||||||
try:
|
import asyncio as _asyncio
|
||||||
with open(_CLAUDE_CRED_PATH) as f:
|
# subscription_usage returns None on ANY failure (creds missing / endpoint
|
||||||
token = json.load(f)["claudeAiOauth"]["accessToken"]
|
# 429 / network) — it never throws; serve stale if we have it.
|
||||||
except Exception as e:
|
data = await _asyncio.get_event_loop().run_in_executor(
|
||||||
|
None, usage_limits.subscription_usage)
|
||||||
|
if data is None:
|
||||||
if _usage_cache["data"] is not None:
|
if _usage_cache["data"] is not None:
|
||||||
return web.json_response(_usage_cache["data"])
|
return web.json_response(_usage_cache["data"])
|
||||||
return web.json_response({"error": f"no claude credentials: {e}"}, status=502)
|
return web.json_response({"error": "usage unavailable"}, status=502)
|
||||||
|
|
||||||
headers = {
|
|
||||||
"Authorization": f"Bearer {token}",
|
|
||||||
"User-Agent": _USAGE_UA,
|
|
||||||
"anthropic-beta": "oauth-2025-04-20",
|
|
||||||
}
|
|
||||||
try:
|
|
||||||
timeout = aiohttp.ClientTimeout(total=15)
|
|
||||||
async with aiohttp.ClientSession(timeout=timeout) as session:
|
|
||||||
async with session.get(_OAUTH_USAGE_URL, headers=headers) as r:
|
|
||||||
if r.status != 200:
|
|
||||||
raise RuntimeError(f"usage endpoint {r.status}")
|
|
||||||
data = await r.json()
|
|
||||||
except Exception as e: # never throw — serve stale if we have it
|
|
||||||
if _usage_cache["data"] is not None:
|
|
||||||
return web.json_response(_usage_cache["data"])
|
|
||||||
return web.json_response({"error": f"usage fetch failed: {e}"}, status=502)
|
|
||||||
|
|
||||||
_usage_cache["ts"] = now
|
_usage_cache["ts"] = now
|
||||||
_usage_cache["data"] = data
|
_usage_cache["data"] = data
|
||||||
|
|||||||
Reference in New Issue
Block a user