feat(operations): מסך "סוכנים פעילים" + ניהול ריצות (live-runs/log/cancel) (G12/X15, #119)
All checks were successful
G12 Leak-Guard / leak-guard (pull_request) Successful in 6s

פאנל ב-/operations שמראה אילו סוכני Paperclip עובדים כעת (רצים+בתור), הפלט החי
שלהם, ושליטה מבוקרת: עצירת ריצה, איפוס session. סוגר את הנקודה-העיוורת שבה drain
מונע-סוכן (למשל ריקון תור הלכות ע"י ה-CEO heartbeat) עוקף את בקרת /operations
שמכירה רק שירותי pm2, והפלט הגולמי נגיש רק ב-Paperclip UI.

מקור-נתונים: Paperclip heartbeat-runs API (אומת חי):
  GET  /api/companies/{cid}/live-runs        — רצים+בתור (agentName/status/issue/outputSilence)
  GET  /api/heartbeat-runs/{id}/log          — NDJSON של פלט הסוכן
  GET  /api/heartbeat-runs/{id}/events        — timeline
  POST /api/heartbeat-runs/{id}/cancel        — עצירה מבוקרת (לא kill — מכבד watchdog+checkpoint)
  POST /api/agents/{id}/runtime-state/reset-session

ארכיטקטורה (G12/INV-PORT1): כל המגע החדש עם Paperclip דרך השער בלבד —
web/paperclip_client.py (shell) → re-export ב-web/agent_platform_port.py →
web/app.py צורך מהשער. leak_guard.py עובר (seam שלם). אסור kill ישיר על
process_pid (עוקף את השער).

Backend:
- paperclip_client: list_live_runs / get_run_log / get_run_events / cancel_run / reset_agent_session
- agent_platform_port: re-export pc_list_live_runs / pc_get_run_log / pc_get_run_events / pc_cancel_run / pc_reset_agent_session
- app.py: GET /api/operations/agents (אגרגציה CMP+CMPA, עמיד לכשל-חברה),
  GET .../runs/{id}/log, GET .../runs/{id}/events, POST .../runs/{id}/cancel,
  POST .../agents/{id}/reset-session

Frontend: פאנל "סוכנים פעילים" ב-/operations (polling 4s) + dialog ללוג חי
(פרסור NDJSON→טקסט קריא) + כפתורי עצור/אפס. הוספת hooks ל-operations.ts.

בטיחות: cancel על דריינר הלכות בטוח — חילוץ checkpointed per-chunk + resumable
+ self-heal לשורות processing.

Invariants: מקיים G12/INV-PORT1 (שער-הפלטפורמה). נוגע X6 (UI↔API).
api:types יורץ אחרי deploy (openapi.json חי).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-11 13:26:30 +00:00
parent 130ddc3a7e
commit 2f094b8d84
5 changed files with 477 additions and 0 deletions

View File

@@ -1,5 +1,6 @@
"use client";
import { useState } from "react";
import Link from "next/link";
import { AppShell } from "@/components/app-shell";
import { Card, CardContent } from "@/components/ui/card";
@@ -7,13 +8,26 @@ import { Badge } from "@/components/ui/badge";
import { Button } from "@/components/ui/button";
import { Switch } from "@/components/ui/switch";
import { Skeleton } from "@/components/ui/skeleton";
import { ScrollArea } from "@/components/ui/scroll-area";
import {
Dialog,
DialogContent,
DialogHeader,
DialogTitle,
DialogDescription,
} from "@/components/ui/dialog";
import {
useOperations,
useServiceAction,
useDrainToggle,
useAgentRuns,
useRunLog,
useCancelRun,
useResetAgentSession,
type OpsService,
type OperationsSnapshot,
type PipelineStats,
type AgentRun,
} from "@/lib/api/operations";
function mb(bytes: number): string {
@@ -337,6 +351,188 @@ function PipelineCard({
);
}
// ── Live agents — who's working now + output + controls ────────────────────
// The platform's own liveness signal → a Hebrew label + tone.
const SILENCE_HE: Record<string, { label: string; tone: string }> = {
ok: { label: "פעיל", tone: "text-emerald-600" },
suspicion: { label: "שקט חשוד", tone: "text-gold-deep" },
critical: { label: "תקוע?", tone: "text-destructive" },
};
/** Best-effort: turn the captured NDJSON stream into readable lines (tail). */
function parseRunLog(content: string, maxLines = 400): string {
if (!content) return "";
const out: string[] = [];
for (const raw of content.split("\n")) {
const line = raw.trim();
if (!line) continue;
let chunk = line;
try {
const wrap = JSON.parse(line);
chunk = typeof wrap.chunk === "string" ? wrap.chunk : line;
} catch {
// not a wrapper line — keep raw
}
// The chunk is often a claude stream-json event; extract the human bits.
for (const part of chunk.split("\n")) {
const p = part.trim();
if (!p) continue;
try {
const ev = JSON.parse(p);
if (ev?.type === "assistant" && ev?.message?.content) {
const txt = (ev.message.content as Array<{ type: string; text?: string }>)
.filter((c) => c.type === "text" && c.text)
.map((c) => c.text)
.join("");
if (txt) out.push(txt);
} else if (ev?.type === "result" && typeof ev.result === "string") {
out.push(`${ev.result}`);
} else if (ev?.type === "system" && ev?.subtype) {
out.push(`· [${ev.subtype}]`);
} else {
out.push(p);
}
} catch {
out.push(p);
}
}
}
return out.slice(-maxLines).join("\n");
}
function RunLogDialog({ run, onClose }: { run: AgentRun | null; onClose: () => void }) {
const { data, isLoading, error } = useRunLog(run?.run_id ?? null);
const text = data ? parseRunLog(data.content) : "";
return (
<Dialog open={!!run} onOpenChange={(o) => !o && onClose()}>
<DialogContent className="max-w-3xl">
<DialogHeader>
<DialogTitle>פלט הסוכן {run?.agent_name}</DialogTitle>
<DialogDescription>
{run?.company_label} · ריצה <span dir="ltr" className="font-mono">{run?.run_id?.slice(0, 8)}</span> · מתעדכן חי
</DialogDescription>
</DialogHeader>
{isLoading ? (
<Skeleton className="h-72 w-full" />
) : error ? (
<p className="text-sm text-destructive">שגיאה בטעינת הלוג: {String(error)}</p>
) : (
<ScrollArea className="h-[60vh] rounded-md border border-rule-soft bg-rule-soft/20 p-3">
<pre dir="ltr" className="text-[0.72rem] leading-relaxed whitespace-pre-wrap break-words text-navy text-start">
{text || "אין פלט עדיין."}
</pre>
</ScrollArea>
)}
</DialogContent>
</Dialog>
);
}
function LiveAgentsPanel() {
const { data, isLoading } = useAgentRuns();
const cancel = useCancelRun();
const reset = useResetAgentSession();
const [logRun, setLogRun] = useState<AgentRun | null>(null);
const busy = cancel.isPending || reset.isPending;
return (
<Card className="bg-surface border-rule shadow-sm">
<CardContent className="px-6 py-5">
<div className="flex items-center justify-between gap-3 mb-1 flex-wrap">
<h2 className="text-navy text-lg mb-0">סוכנים פעילים</h2>
{data ? (
<div className="flex items-center gap-2 text-[0.72rem]">
<Badge variant="default" className="font-normal">רצים {data.running}</Badge>
<Badge variant="secondary" className="font-normal">בתור {data.queued}</Badge>
</div>
) : null}
</div>
<p className="text-ink-muted text-xs mb-4">
מי מבין סוכני-הוועדה עובד כרגע ומה הפלט שלו כולל עבודה שלא קשורה לתיק (כמו
ריקון תור הלכות ע״י ה-CEO). עצירה היא מבוקרת דרך הפלטפורמה (לא kill).
</p>
{isLoading || !data ? (
<Skeleton className="h-24 w-full" />
) : data.runs.length === 0 ? (
<p className="text-sm text-ink-muted">אין סוכן פעיל כרגע.</p>
) : (
<div className="grid gap-2">
{data.errors.length > 0 ? (
<p className="text-[0.72rem] text-destructive">
לא ניתן לטעון חלק מהחברות: {data.errors.join(" · ")}
</p>
) : null}
{data.runs.map((r) => {
const sil = SILENCE_HE[r.silence_level];
const startMs = r.started_at ? Date.parse(r.started_at) : 0;
return (
<div
key={r.run_id}
className="flex items-center justify-between gap-3 rounded-md border border-rule-soft bg-rule-soft/30 px-3 py-2"
>
<div className="min-w-0">
<div className="flex items-center gap-2 flex-wrap">
<Badge
variant={r.status === "running" ? "default" : "secondary"}
className="font-normal"
>
{r.status === "running" ? "רץ" : "בתור"}
</Badge>
<span className="text-[0.85rem] text-navy font-semibold">{r.agent_name}</span>
{sil ? <span className={`text-[0.68rem] ${sil.tone}`}> {sil.label}</span> : null}
</div>
<div className="text-[0.66rem] text-ink-muted flex items-center gap-2 flex-wrap mt-0.5">
<span>{r.company_label}</span>
{r.status === "running" && startMs ? <span>החל {ago(startMs)}</span> : null}
{r.invocation_source ? (
<span dir="ltr" className="font-mono">{r.invocation_source}</span>
) : null}
{r.continuation_attempt > 0 ? <span>ניסיון #{r.continuation_attempt + 1}</span> : null}
</div>
</div>
<div className="flex items-center gap-1.5 shrink-0">
<Button size="xs" variant="outline" onClick={() => setLogRun(r)}>
פלט
</Button>
<Button
size="xs"
variant="ghost"
className="text-destructive"
disabled={busy || r.status !== "running"}
onClick={() => {
if (confirm(`לעצור את הריצה של "${r.agent_name}"? (עצירה מבוקרת)`)) {
cancel.mutate(r.run_id);
}
}}
>
עצור
</Button>
<Button
size="xs"
variant="ghost"
disabled={busy}
title="איפוס session — ה-wakeup הבא יתחיל נקי"
onClick={() => {
if (confirm(`לאפס את ה-session של "${r.agent_name}"?`)) {
reset.mutate(r.agent_id);
}
}}
>
אפס
</Button>
</div>
</div>
);
})}
</div>
)}
</CardContent>
<RunLogDialog run={logRun} onClose={() => setLogRun(null)} />
</Card>
);
}
export default function OperationsPage() {
const { data, isLoading, error } = useOperations();
@@ -369,6 +565,8 @@ export default function OperationsPage() {
</div>
) : (
<>
<LiveAgentsPanel />
<ServicesPanel data={data} />
<div className="grid gap-4 md:grid-cols-2 xl:grid-cols-3">

View File

@@ -103,3 +103,88 @@ export function useDrainToggle() {
onError: (e) => toast.error(`העדכון נכשל: ${String(e)}`),
});
}
// ── Live agents — which agent is working now + its output + controls ───────
export type AgentRun = {
run_id: string;
agent_id: string;
agent_name: string;
company_id: string;
company_label: string;
status: string; // running | queued | ...
invocation_source: string;
trigger_detail: string;
issue_id: string | null;
adapter_type: string;
started_at: string | null;
created_at: string | null;
last_output_at: string | null;
continuation_attempt: number;
silence_level: string; // "" | ok | suspicion | critical
silence_age_ms: number;
};
export type AgentRunsResponse = {
runs: AgentRun[];
running: number;
queued: number;
errors: string[];
};
export type RunLog = {
runId: string;
store: string;
logRef: string;
content: string; // NDJSON stream the adapter captured
};
/** Queued + running heartbeat runs across all companies. */
export function useAgentRuns() {
return useQuery({
queryKey: ["operations", "agents"],
queryFn: ({ signal }) =>
apiRequest<AgentRunsResponse>("/api/operations/agents", { signal }),
refetchInterval: 4000, // live view of who's working now
staleTime: 2000,
});
}
/** Full output log of one run — fetched on demand (drawer open). */
export function useRunLog(runId: string | null) {
return useQuery({
queryKey: ["operations", "agents", "log", runId],
queryFn: ({ signal }) =>
apiRequest<RunLog>(`/api/operations/agents/runs/${runId}/log`, { signal }),
enabled: !!runId,
refetchInterval: runId ? 4000 : false, // live tail while open
});
}
/** Gracefully cancel a queued/running run (not a raw kill). */
export function useCancelRun() {
const qc = useQueryClient();
return useMutation({
mutationFn: (runId: string) =>
apiRequest(`/api/operations/agents/runs/${runId}/cancel`, { method: "POST" }),
onSuccess: () => {
toast.success("בקשת עצירה נשלחה");
qc.invalidateQueries({ queryKey: ["operations", "agents"] });
},
onError: (e) => toast.error(`העצירה נכשלה: ${String(e)}`),
});
}
/** Reset a wedged agent session so its next wakeup starts clean. */
export function useResetAgentSession() {
const qc = useQueryClient();
return useMutation({
mutationFn: (agentId: string) =>
apiRequest(`/api/operations/agents/${agentId}/reset-session`, { method: "POST" }),
onSuccess: () => {
toast.success("ה-session אופס");
qc.invalidateQueries({ queryKey: ["operations", "agents"] });
},
onError: (e) => toast.error(`האיפוס נכשל: ${String(e)}`),
});
}