mirror of
https://github.com/harivansh-afk/sandbox-agent.git
synced 2026-04-16 23:02:04 +00:00
fix(foundry): add Bun idleTimeout safety net and subscription retry with backoff
Bun.serve() defaults to a 10s idle timeout that can kill long-running requests. Actor RPCs go through the gateway tunnel with a 1s SSE ping, so this likely never fires, but set idleTimeout to 255 as a safety net. Subscription topics (app, org, session, task) previously had no retry mechanism. If the initial connection or a mid-session error occurred, the subscription stayed in error state permanently. Add exponential backoff retry (1s base, 30s max) that cleans up the old connection before each attempt and stops when disposed or no listeners remain. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
bea3b58199
commit
7b23e519c2
2 changed files with 156 additions and 0 deletions
|
|
@ -4,6 +4,11 @@ import { topicDefinitions, type TopicData, type TopicDefinition, type TopicKey,
|
|||
|
||||
const GRACE_PERIOD_MS = 30_000;
|
||||
|
||||
/** Initial retry delay in ms. */
|
||||
const RETRY_BASE_MS = 1_000;
|
||||
/** Maximum retry delay in ms. */
|
||||
const RETRY_MAX_MS = 30_000;
|
||||
|
||||
/**
|
||||
* Remote implementation of SubscriptionManager.
|
||||
* Each cache entry owns one actor connection plus one materialized snapshot.
|
||||
|
|
@ -80,9 +85,12 @@ class TopicEntry<TData, TParams, TEvent> {
|
|||
private unsubscribeEvent: (() => void) | null = null;
|
||||
private unsubscribeError: (() => void) | null = null;
|
||||
private teardownTimer: ReturnType<typeof setTimeout> | null = null;
|
||||
private retryTimer: ReturnType<typeof setTimeout> | null = null;
|
||||
private retryAttempt = 0;
|
||||
private startPromise: Promise<void> | null = null;
|
||||
private eventPromise: Promise<void> = Promise.resolve();
|
||||
private started = false;
|
||||
private disposed = false;
|
||||
|
||||
constructor(
|
||||
private readonly topicKey: TopicKey,
|
||||
|
|
@ -136,7 +144,9 @@ class TopicEntry<TData, TParams, TEvent> {
|
|||
}
|
||||
|
||||
dispose(): void {
|
||||
this.disposed = true;
|
||||
this.cancelTeardown();
|
||||
this.cancelRetry();
|
||||
this.unsubscribeEvent?.();
|
||||
this.unsubscribeError?.();
|
||||
if (this.conn) {
|
||||
|
|
@ -148,6 +158,55 @@ class TopicEntry<TData, TParams, TEvent> {
|
|||
this.error = null;
|
||||
this.lastRefreshAt = null;
|
||||
this.started = false;
|
||||
this.retryAttempt = 0;
|
||||
}
|
||||
|
||||
private cancelRetry(): void {
|
||||
if (this.retryTimer) {
|
||||
clearTimeout(this.retryTimer);
|
||||
this.retryTimer = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Schedules a retry with exponential backoff. Cleans up any existing
|
||||
* connection state before reconnecting.
|
||||
*/
|
||||
private scheduleRetry(): void {
|
||||
if (this.disposed || this.listenerCount === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const delay = Math.min(RETRY_BASE_MS * 2 ** this.retryAttempt, RETRY_MAX_MS);
|
||||
this.retryAttempt++;
|
||||
|
||||
this.retryTimer = setTimeout(() => {
|
||||
this.retryTimer = null;
|
||||
if (this.disposed || this.listenerCount === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Tear down the old connection before retrying
|
||||
this.cleanupConnection();
|
||||
this.started = false;
|
||||
this.startPromise = this.start().finally(() => {
|
||||
this.startPromise = null;
|
||||
});
|
||||
}, delay);
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleans up connection resources without resetting data/status/retry state.
|
||||
*/
|
||||
private cleanupConnection(): void {
|
||||
this.unsubscribeEvent?.();
|
||||
this.unsubscribeError?.();
|
||||
this.unsubscribeEvent = null;
|
||||
this.unsubscribeError = null;
|
||||
if (this.conn) {
|
||||
void this.conn.dispose();
|
||||
}
|
||||
this.conn = null;
|
||||
}
|
||||
|
||||
private async start(): Promise<void> {
|
||||
|
|
@ -164,17 +223,20 @@ class TopicEntry<TData, TParams, TEvent> {
|
|||
this.status = "error";
|
||||
this.error = error instanceof Error ? error : new Error(String(error));
|
||||
this.notify();
|
||||
this.scheduleRetry();
|
||||
});
|
||||
this.data = await this.definition.fetchInitial(this.backend, this.params);
|
||||
this.status = "connected";
|
||||
this.lastRefreshAt = Date.now();
|
||||
this.started = true;
|
||||
this.retryAttempt = 0;
|
||||
this.notify();
|
||||
} catch (error) {
|
||||
this.status = "error";
|
||||
this.error = error instanceof Error ? error : new Error(String(error));
|
||||
this.started = false;
|
||||
this.notify();
|
||||
this.scheduleRetry();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue