feat(ops): /operations — מוני-תור אחידים, "מה רץ עכשיו", וניהול-תהליכים

הדף הציג את התורים באופן לא-אחיד (by_status גולמי), בלי הבחנה בין "ממתין"
(בקלוג: status=pending) ל"בתור" (התור הפעיל: requested_at IS NOT NULL), בלי
הצגת הפריט שרץ כרגע, ובלי שום שליטה בתהליכים.

מה נוסף:
1. כרטיסי-תור אחידים — בתור / ממתין(בקלוג) / בעיבוד / הושלם / נכשל + "רץ עכשיו"
   (citation/case_number של הפריט בעיבוד) לכל drain (אחזור-פסיקה, מטא-דאטה,
   הלכות, יומונים). שערי-אנוש (אישור-הלכות, פסיקה-חסרה) נשארים מוני-סטטוס.
2. פאנל ניהול-תהליכים בסגנון "שירותי Windows":
   - דמון (court-fetch-service/xvfb/chat/reaper): הפעל-מחדש / עצור / הפעל.
   - cron drain: "הרץ עכשיו" (pm2 restart) + מתג הפעל/כבה תזמון.
3. כל תגי-הסטטוס מתורגמים לעברית.

מנגנון:
- הפעל/כבה תזמון = דגל ב-DB (טבלה drain_controls). pm2 cron_restart מחיה תהליך
  שעוצר ב-stop, לכן ה"כיבוי" האמין הוא דגל שכל drain בודק ב-startup (no-op מיידי
  כשכבוי). הקונטיינר כותב/קורא ישירות מ-DB.
- הרץ-עכשיו + restart/stop/start = proxy ל-pm2 דרך endpoint חדש בגשר-המארח
  (court_fetch_service /pm2/control), מאובטח Bearer + whitelist ל-legal-* בלבד.
- יומונים: drain_digests הועבר מ-crontab ל-pm2 (legal-digest-drain.config.cjs)
  כדי שיופיע ויהיה שליט כמו כל drain. drain_halacha_queue.py הובא לבקרת-גרסאות.

Invariants: מקיים G2 (הרחבת /operations + הגשר הקיים, לא מסלול מקביל) ו-G1
(drain_controls = מקור-אמת יחיד לכיבוי, נורמליזציה במקור ולא תיקון-בקריאה).
אין בליעת שגיאות שקטה (הגשר מחזיר {ok,error}; המוטציות מציגות toast).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-08 08:57:23 +00:00
parent 6647aa92e6
commit 638eef6803
11 changed files with 676 additions and 98 deletions

View File

@@ -22,7 +22,7 @@ sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "mcp-server" / "
import zipfile
from fastapi import BackgroundTasks, FastAPI, File, Form, HTTPException, UploadFile
from fastapi import BackgroundTasks, Body, FastAPI, File, Form, HTTPException, UploadFile
from fastapi.responses import FileResponse, StreamingResponse
from typing import Any, Literal
from pydantic import BaseModel
@@ -6030,6 +6030,49 @@ async def _ops_pm2_services() -> dict:
return {"services": [], "error": f"לא ניתן להגיע לשירות-המארח: {e}"}
async def _ops_pm2_control(name: str, action: str) -> dict:
"""Proxy a mutating pm2 action to the host bridge (Bearer-authenticated)."""
secret = os.environ.get("COURT_FETCH_SHARED_SECRET", "").strip()
headers = {"Authorization": f"Bearer {secret}"} if secret else {}
async with httpx.AsyncClient(timeout=35.0) as client:
r = await client.post(
f"{_COURT_FETCH_SERVICE_URL}/pm2/control",
json={"name": name, "action": action}, headers=headers,
)
try:
payload = r.json()
except Exception:
payload = {"error": r.text[:200]}
if r.status_code != 200:
raise HTTPException(r.status_code, payload.get("error", "pm2 control failed"))
return payload
def _norm_pipeline(
by_status: dict, *,
pending: tuple[str, ...], processing: tuple[str, ...],
done: tuple[str, ...], failed: tuple[str, ...],
queued: int, running_now: list[str], extra: dict | None = None,
) -> dict:
"""Project a raw status-count map into the dashboard's uniform shape:
pending / processing / done / failed / queued + the live running items."""
def total(keys: tuple[str, ...]) -> int:
return sum(int(by_status.get(k, 0) or 0) for k in keys)
out = {
"pending": total(pending),
"processing": total(processing),
"done": total(done),
"failed": total(failed),
"queued": queued,
"running_now": running_now,
"by_status": by_status,
}
if extra:
out.update(extra)
return out
@app.get("/api/operations")
async def operations_snapshot():
"""Everything running in the background: services + pipelines/queues."""
@@ -6038,6 +6081,9 @@ async def operations_snapshot():
async def counts(sql: str) -> dict:
return {r[0]: r[1] for r in await conn.fetch(sql)}
async def col(sql: str) -> list[str]:
return [r[0] for r in await conn.fetch(sql) if r[0]]
court_fetch = await counts(
"SELECT status, count(*) FROM court_fetch_jobs GROUP BY 1"
)
@@ -6047,6 +6093,10 @@ async def operations_snapshot():
"updated_at FROM court_fetch_jobs ORDER BY updated_at DESC LIMIT 15"
)
]
court_running = await col(
"SELECT coalesce(nullif(citation_raw,''), case_number_norm) "
"FROM court_fetch_jobs WHERE status = 'running' ORDER BY updated_at LIMIT 5"
)
meta = await counts(
"SELECT coalesce(metadata_extraction_status,'unknown'), count(*) "
"FROM case_law GROUP BY 1"
@@ -6054,6 +6104,10 @@ async def operations_snapshot():
meta_queued = await conn.fetchval(
"SELECT count(*) FROM case_law WHERE metadata_extraction_requested_at IS NOT NULL"
)
meta_running = await col(
"SELECT case_number FROM case_law WHERE metadata_extraction_status = 'processing' "
"ORDER BY metadata_extraction_requested_at NULLS LAST LIMIT 5"
)
hal_ext = await counts(
"SELECT coalesce(halacha_extraction_status,'unknown'), count(*) "
"FROM case_law GROUP BY 1"
@@ -6061,12 +6115,24 @@ async def operations_snapshot():
hal_queued = await conn.fetchval(
"SELECT count(*) FROM case_law WHERE halacha_extraction_requested_at IS NOT NULL"
)
hal_running = await col(
"SELECT case_number FROM case_law WHERE halacha_extraction_status = 'processing' "
"ORDER BY halacha_extraction_requested_at NULLS LAST LIMIT 5"
)
review = await counts("SELECT review_status, count(*) FROM halachot GROUP BY 1")
missing = await counts("SELECT status, count(*) FROM missing_precedents GROUP BY 1")
digest_ext = await counts(
"SELECT coalesce(extraction_status,'unknown'), count(*) FROM digests GROUP BY 1"
)
digests_total = await conn.fetchval("SELECT count(*) FROM digests")
digests_linked = await conn.fetchval(
"SELECT count(*) FROM digests WHERE linked_case_law_id IS NOT NULL"
)
digest_running = await col(
"SELECT coalesce(nullif(underlying_citation,''), nullif(concept_tag,''), "
"'יומון '||yomon_number) FROM digests WHERE extraction_status = 'processing' "
"ORDER BY updated_at LIMIT 5"
)
ingested_recent = [
dict(r) for r in await conn.fetch(
"SELECT case_number, court, source_url, created_at FROM case_law "
@@ -6075,6 +6141,9 @@ async def operations_snapshot():
]
pm2 = await _ops_pm2_services()
controls = await db.get_drain_controls()
for svc in pm2["services"]:
svc["disabled"] = controls.get(svc.get("name", ""), False)
def _iso(rows: list[dict]) -> list[dict]:
for d in rows:
@@ -6087,17 +6156,72 @@ async def operations_snapshot():
"services": pm2["services"],
"services_error": pm2["error"],
"pipelines": {
"court_fetch": {"by_status": court_fetch, "recent": _iso(court_recent)},
"metadata_extraction": {"by_status": meta, "queued": meta_queued},
"halacha_extraction": {"by_status": hal_ext, "queued": hal_queued},
"court_fetch": {
**_norm_pipeline(
court_fetch,
pending=("pending",), processing=("running",), done=("done",),
failed=("failed", "manual"),
queued=int(court_fetch.get("pending", 0)) + int(court_fetch.get("failed", 0)),
running_now=court_running,
),
"recent": _iso(court_recent),
},
"metadata_extraction": _norm_pipeline(
meta,
pending=("pending",), processing=("processing",), done=("completed",),
failed=("failed",), queued=meta_queued, running_now=meta_running,
),
"halacha_extraction": _norm_pipeline(
hal_ext,
pending=("pending",), processing=("processing",), done=("completed",),
failed=("failed",), queued=hal_queued, running_now=hal_running,
),
"digests": _norm_pipeline(
digest_ext,
pending=("pending",), processing=("processing",), done=("completed",),
failed=("failed",),
queued=int(digest_ext.get("pending", 0)), running_now=digest_running,
extra={"total": digests_total, "linked": digests_linked},
),
# Human gates, not background drains — surfaced as status counts only.
"halacha_review": {"by_status": review},
"missing_precedents": {"by_status": missing},
"digests": {"total": digests_total, "linked": digests_linked},
"ingested_recent": _iso(ingested_recent),
},
}
_OPS_SERVICE_ACTIONS = {"restart", "stop", "start", "run-now"}
@app.post("/api/operations/services/{name}/{action}")
async def operations_service_action(name: str, action: str):
"""Control a background service (restart/stop/start) or run a drain now.
'run-now' maps to pm2 restart — for a one-shot cron drain that fires the
job immediately. Whitelisted to legal-* (enforced again on the host)."""
if action not in _OPS_SERVICE_ACTIONS:
raise HTTPException(400, f"action חייב להיות אחד מ-{sorted(_OPS_SERVICE_ACTIONS)}")
if not name.startswith("legal-"):
raise HTTPException(403, "ניתן לשלוט רק בשירותי legal-*")
pm2_action = "restart" if action == "run-now" else action
return await _ops_pm2_control(name, pm2_action)
@app.post("/api/operations/drains/{name}/disabled")
async def operations_drain_toggle(name: str, body: dict = Body(...)):
"""Switch a cron drain on/off (the 'startup type' in the services panel).
Written straight to drain_controls — no host roundtrip; the drain reads the
flag at startup and no-ops when disabled (pm2 cron_restart can't be trusted
to stay stopped)."""
if not name.startswith("legal-"):
raise HTTPException(403, "ניתן לשלוט רק בשירותי legal-*")
disabled = bool(body.get("disabled"))
await db.set_drain_disabled(name, disabled)
return {"ok": True, "name": name, "disabled": disabled}
@app.get("/api/digests/{digest_id}")
async def digest_get(digest_id: str):
try: