Upload progress: Redis-backed store + flushed SSE + client fallback
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 3m24s

The previous in-memory _progress dict + polling SSE handler had a 30s silent
tail after completion. HTTP/2 framing in the proxy chain (Traefik) buffered
the small chunks until the stream closed, so when a transient blip caused
EventSource to reconnect, the server returned 404 and the UI stuck on the
"מתחיל…" placeholder forever. Reproduced live: 445 bytes withheld 31s.

Changes:
  • web/progress_store.py — ProgressStore wraps Redis with TTL (5m), atomic
    GETDEL, dict-like API. Best-effort: Redis errors are logged and swallowed
    so observability outages don't break uploads.
  • web/app.py — _progress is now Redis-backed; every set/get/active/pop is
    awaited. SSE handler emits a heartbeat each tick (forces HTTP/2 flush),
    drops the 30s post-completion sleep, and returns a terminal
    {"status":"unknown"} payload instead of 404 when the task is gone — so
    EventSource closes cleanly instead of reconnect-looping. New _SSE_HEADERS
    set X-Accel-Buffering: no.
  • web-ui useProgress(taskId, caseNumber) — 10s fallback that invalidates
    the case detail if no SSE message arrived; treats "unknown" as terminal
    and triggers a refetch from the source of truth.
  • upload-sheet wires caseNumber through and renders "unknown" as completed.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-30 12:53:23 +00:00
parent 03e7d88aee
commit 9bdfb05350
4 changed files with 272 additions and 74 deletions

View File

@@ -43,6 +43,7 @@ function statusLabel(event: ProgressEvent | null): string {
if (event.status === "processing")
return event.step ? `בעיבוד · ${event.step}` : "בעיבוד";
if (event.status === "completed") return "הושלם";
if (event.status === "unknown") return "הושלם";
if (event.status === "failed") return event.error ?? "נכשל";
return event.status;
}
@@ -52,15 +53,16 @@ function progressPercent(event: ProgressEvent | null): number {
if (event.status === "queued") return 10;
if (event.status === "processing") return 55;
if (event.status === "completed") return 100;
if (event.status === "unknown") return 100;
if (event.status === "failed") return 100;
return 25;
}
function UploadRowView({ row }: { row: UploadRow }) {
const progress = useProgress(row.taskId);
function UploadRowView({ row, caseNumber }: { row: UploadRow; caseNumber: string }) {
const progress = useProgress(row.taskId, caseNumber);
const pct = row.error ? 100 : progressPercent(progress);
const failed = row.error || progress?.status === "failed";
const done = progress?.status === "completed";
const done = progress?.status === "completed" || progress?.status === "unknown";
return (
<li className="rounded-lg border border-rule bg-parchment/40 px-4 py-3 space-y-2">
@@ -197,7 +199,7 @@ export function UploadSheet({ caseNumber }: { caseNumber: string }) {
{rows.length > 0 && (
<ul className="space-y-2">
{rows.map((row) => (
<UploadRowView key={row.id} row={row} />
<UploadRowView key={row.id} row={row} caseNumber={caseNumber} />
))}
</ul>
)}

View File

@@ -22,7 +22,10 @@ export type UploadTaggedResponse = {
};
export type ProgressEvent = {
status: "queued" | "processing" | "completed" | "failed" | string;
/* "unknown" is sent by the backend when the task TTL expired or the
* caller subscribed before any state was published. Treat it as a
* terminal hint to refetch case state from the source of truth. */
status: "queued" | "processing" | "completed" | "failed" | "unknown" | string;
filename?: string;
step?: string;
error?: string;
@@ -191,28 +194,54 @@ export function useExtractAppraiserFacts(caseNumber: string) {
}
export function useProgress(taskId: string | null) {
export function useProgress(taskId: string | null, caseNumber?: string) {
const [event, setEvent] = useState<ProgressEvent | null>(null);
const qc = useQueryClient();
useEffect(() => {
if (!taskId) return;
setEvent(null);
/* Self-heal fallback: if no SSE message arrives within 10s — usually
* because the proxy chain held the chunks or the EventSource is
* silently retrying — synthesize a refresh by invalidating the case
* detail. The actual document state is in the case detail anyway, so
* the UI heals from the source of truth without depending on SSE. */
let firstMessageReceived = false;
const fallback = window.setTimeout(() => {
if (firstMessageReceived) return;
if (caseNumber) qc.invalidateQueries({ queryKey: casesKeys.detail(caseNumber) });
setEvent({ status: "completed" });
}, 10_000);
const close = openSSE<ProgressEvent>(
`/api/progress/${encodeURIComponent(taskId)}`,
{
onMessage: (data) => {
firstMessageReceived = true;
setEvent(data);
if (data.status === "completed" || data.status === "failed") {
/* Close from within the callback — the backend ends the stream
* naturally, but closing eagerly avoids the auto-reconnect loop
* EventSource does after EOF. */
if (
data.status === "completed" ||
data.status === "failed" ||
data.status === "unknown"
) {
/* Close from within the callback so EventSource does not
* auto-reconnect after the server's EOF. For "unknown" we
* also nudge a case-detail refetch — the task state is gone
* but the document row will tell us the truth. */
if (data.status === "unknown" && caseNumber) {
qc.invalidateQueries({ queryKey: casesKeys.detail(caseNumber) });
}
close();
}
},
},
);
return () => close();
}, [taskId]);
return () => {
window.clearTimeout(fallback);
close();
};
}, [taskId, caseNumber, qc]);
return event;
}