sandbox-agent/foundry/packages/backend/src/actors/sandbox-instance/index.ts
Nathan Flurry d8b8b49f37
Fix Foundry UI bugs: org names, sessions, and repo selection (#250)
* Fix Foundry auth: migrate to Better Auth adapter, fix access token retrieval

- Remove @ts-nocheck from better-auth.ts, auth-user/index.ts, app-shell.ts
  and fix all type errors
- Fix getAccessTokenForSession: read GitHub token directly from account
  record instead of calling Better Auth's internal /get-access-token
  endpoint which returns 403 on server-side calls
- Re-implement workspaceAuth helper functions (workspaceAuthColumn,
  normalizeAuthValue, workspaceAuthClause, workspaceAuthWhere) that were
  accidentally deleted
- Remove all retry logic (withRetries, isRetryableAppActorError)
- Implement CORS origin allowlist from configured environment
- Document cachedAppWorkspace singleton pattern
- Add inline org sync fallback in buildAppSnapshot for post-OAuth flow
- Add no-retry rule to CLAUDE.md

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* Add Foundry dev panel from fix-git-data branch

Port the dev panel component that was left out when PR #243 was replaced
by PR #247. Adapted to remove runtime/mock-debug references that don't
exist on the current branch.

- Toggle with Shift+D, persists visibility to localStorage
- Shows context, session, GitHub sync status sections
- Dev-only (import.meta.env.DEV)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* Add full Docker image defaults, fix actor deadlocks, and improve dev experience

- Add Dockerfile.full and --all flag to install-agent CLI for pre-built images
- Centralize Docker image constant (FULL_IMAGE) pinned to 0.3.1-full
- Remove examples/shared/Dockerfile{,.dev} and daytona snapshot example
- Expand Docker docs with full runnable Dockerfile
- Fix self-deadlock in createWorkbenchSession (fire-and-forget provisioning)
- Audit and convert 12 task actions from wait:true to wait:false
- Add bun --hot for dev backend hot reload
- Remove --force from pnpm install in dev Dockerfile for faster startup
- Add env_file support to compose.dev.yaml for automatic credential loading
- Add mock frontend compose config and dev panel
- Update CLAUDE.md with wait:true policy and dev environment setup

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* WIP: async action fixes and interest manager

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* Fix Foundry UI bugs: org names, hanging sessions, and wrong repo creation

- Fix org display name using GitHub description instead of name field
- Fix createWorkbenchSession hanging when sandbox is provisioning
- Fix auto-session creation retry storm on errors
- Fix task creation using wrong repo due to React state race conditions
- Remove Bun hot-reload from backend Dockerfile (causes port drift)
- Add GitHub sync/install status to dev panel

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-13 20:48:22 -07:00

640 lines
20 KiB
TypeScript

import { setTimeout as delay } from "node:timers/promises";
import { eq } from "drizzle-orm";
import { actor, queue } from "rivetkit";
import { Loop, workflow } from "rivetkit/workflow";
import type { ProviderId } from "@sandbox-agent/foundry-shared";
import type {
ProcessCreateRequest,
ProcessInfo,
ProcessLogFollowQuery,
ProcessLogsResponse,
ProcessSignalQuery,
SessionEvent,
SessionRecord,
} from "sandbox-agent";
import { sandboxInstanceDb } from "./db/db.js";
import { sandboxInstance as sandboxInstanceTable } from "./db/schema.js";
import { SandboxInstancePersistDriver } from "./persist.js";
import { getActorRuntimeContext } from "../context.js";
import { selfSandboxInstance } from "../handles.js";
import { logActorWarning, resolveErrorMessage } from "../logging.js";
import { expectQueueResponse } from "../../services/queue.js";
export interface SandboxInstanceInput {
workspaceId: string;
providerId: ProviderId;
sandboxId: string;
}
interface SandboxAgentConnection {
endpoint: string;
token?: string;
}
const SANDBOX_ROW_ID = 1;
const CREATE_SESSION_MAX_ATTEMPTS = 3;
const CREATE_SESSION_RETRY_BASE_MS = 1_000;
const CREATE_SESSION_STEP_TIMEOUT_MS = 10 * 60_000;
function normalizeStatusFromEventPayload(payload: unknown): "running" | "idle" | "error" | null {
if (payload && typeof payload === "object") {
const envelope = payload as {
error?: unknown;
method?: unknown;
result?: unknown;
};
if (envelope.error) {
return "error";
}
if (envelope.result && typeof envelope.result === "object") {
const stopReason = (envelope.result as { stopReason?: unknown }).stopReason;
if (typeof stopReason === "string" && stopReason.length > 0) {
return "idle";
}
}
if (typeof envelope.method === "string") {
const lowered = envelope.method.toLowerCase();
if (lowered.includes("error") || lowered.includes("failed")) {
return "error";
}
if (lowered.includes("ended") || lowered.includes("complete") || lowered.includes("stopped")) {
return "idle";
}
}
}
return null;
}
function stringifyJson(value: unknown): string {
return JSON.stringify(value, (_key, item) => {
if (typeof item === "bigint") return item.toString();
return item;
});
}
function parseMetadata(metadataJson: string): Record<string, unknown> {
try {
const parsed = JSON.parse(metadataJson) as unknown;
if (parsed && typeof parsed === "object") return parsed as Record<string, unknown>;
return {};
} catch {
return {};
}
}
async function loadPersistedAgentConfig(c: any): Promise<SandboxAgentConnection | null> {
try {
const row = await c.db
.select({ metadataJson: sandboxInstanceTable.metadataJson })
.from(sandboxInstanceTable)
.where(eq(sandboxInstanceTable.id, SANDBOX_ROW_ID))
.get();
if (row?.metadataJson) {
const metadata = parseMetadata(row.metadataJson);
const endpoint = typeof metadata.agentEndpoint === "string" ? metadata.agentEndpoint.trim() : "";
const token = typeof metadata.agentToken === "string" ? metadata.agentToken.trim() : "";
if (endpoint) {
return token ? { endpoint, token } : { endpoint };
}
}
} catch {
return null;
}
return null;
}
async function loadFreshDaytonaAgentConfig(c: any): Promise<SandboxAgentConnection> {
const { config, driver } = getActorRuntimeContext();
const daytona = driver.daytona.createClient({
apiUrl: config.providers.daytona.endpoint,
apiKey: config.providers.daytona.apiKey,
});
const sandbox = await daytona.getSandbox(c.state.sandboxId);
const state = String(sandbox.state ?? "unknown").toLowerCase();
if (state !== "started" && state !== "running") {
await daytona.startSandbox(c.state.sandboxId, 60);
}
const preview = await daytona.getPreviewEndpoint(c.state.sandboxId, 2468);
return preview.token ? { endpoint: preview.url, token: preview.token } : { endpoint: preview.url };
}
async function loadFreshProviderAgentConfig(c: any): Promise<SandboxAgentConnection> {
const { providers } = getActorRuntimeContext();
const provider = providers.get(c.state.providerId);
return await provider.ensureSandboxAgent({
workspaceId: c.state.workspaceId,
sandboxId: c.state.sandboxId,
});
}
async function loadAgentConfig(c: any): Promise<SandboxAgentConnection> {
const persisted = await loadPersistedAgentConfig(c);
if (c.state.providerId === "daytona") {
// Keep one stable signed preview endpoint per sandbox-instance actor.
// Rotating preview URLs on every call fragments SDK client state (sessions/events)
// because client caching keys by endpoint.
if (persisted) {
return persisted;
}
return await loadFreshDaytonaAgentConfig(c);
}
// Local sandboxes are tied to the current backend process, so the sandbox-agent
// token can rotate on restart. Always refresh from the provider instead of
// trusting persisted metadata.
if (c.state.providerId === "local") {
return await loadFreshProviderAgentConfig(c);
}
if (persisted) {
return persisted;
}
return await loadFreshProviderAgentConfig(c);
}
async function derivePersistedSessionStatus(
persist: SandboxInstancePersistDriver,
sessionId: string,
): Promise<{ id: string; status: "running" | "idle" | "error" }> {
const session = await persist.getSession(sessionId);
if (!session) {
return { id: sessionId, status: "error" };
}
if (session.destroyedAt) {
return { id: sessionId, status: "idle" };
}
const events = await persist.listEvents({
sessionId,
limit: 25,
});
for (let index = events.items.length - 1; index >= 0; index -= 1) {
const event = events.items[index];
if (!event) continue;
const status = normalizeStatusFromEventPayload(event.payload);
if (status) {
return { id: sessionId, status };
}
}
return { id: sessionId, status: "idle" };
}
function isTransientSessionCreateError(detail: string): boolean {
const lowered = detail.toLowerCase();
if (lowered.includes("timed out") || lowered.includes("timeout") || lowered.includes("504") || lowered.includes("gateway timeout")) {
// ACP timeout errors are expensive and usually deterministic for the same
// request; immediate retries spawn additional sessions/processes and make
// recovery harder.
return false;
}
return (
lowered.includes("502") || lowered.includes("503") || lowered.includes("bad gateway") || lowered.includes("econnreset") || lowered.includes("econnrefused")
);
}
interface EnsureSandboxCommand {
metadata: Record<string, unknown>;
status: string;
agentEndpoint?: string;
agentToken?: string;
}
interface HealthSandboxCommand {
status: string;
message: string;
}
interface CreateSessionCommand {
prompt: string;
cwd?: string;
agent?: "claude" | "codex" | "opencode";
}
interface CreateSessionResult {
id: string | null;
status: "running" | "idle" | "error";
error?: string;
}
interface ListSessionsCommand {
cursor?: string;
limit?: number;
}
interface ListSessionEventsCommand {
sessionId: string;
cursor?: string;
limit?: number;
}
interface SendPromptCommand {
sessionId: string;
prompt: string;
notification?: boolean;
}
interface SessionStatusCommand {
sessionId: string;
}
interface SessionControlCommand {
sessionId: string;
}
const SANDBOX_INSTANCE_QUEUE_NAMES = [
"sandboxInstance.command.ensure",
"sandboxInstance.command.updateHealth",
"sandboxInstance.command.destroy",
"sandboxInstance.command.createSession",
"sandboxInstance.command.sendPrompt",
"sandboxInstance.command.cancelSession",
"sandboxInstance.command.destroySession",
] as const;
type SandboxInstanceQueueName = (typeof SANDBOX_INSTANCE_QUEUE_NAMES)[number];
function sandboxInstanceWorkflowQueueName(name: SandboxInstanceQueueName): SandboxInstanceQueueName {
return name;
}
async function getSandboxAgentClient(c: any) {
const { driver } = getActorRuntimeContext();
const persist = new SandboxInstancePersistDriver(c.db);
const { endpoint, token } = await loadAgentConfig(c);
return driver.sandboxAgent.createClient({
endpoint,
token,
persist,
});
}
async function broadcastProcessesUpdated(c: any): Promise<void> {
const client = await getSandboxAgentClient(c);
const { processes } = await client.listProcesses();
c.broadcast("processesUpdated", {
type: "processesUpdated",
processes,
});
}
async function ensureSandboxMutation(c: any, command: EnsureSandboxCommand): Promise<void> {
const now = Date.now();
const metadata = {
...command.metadata,
agentEndpoint: command.agentEndpoint ?? null,
agentToken: command.agentToken ?? null,
};
const metadataJson = stringifyJson(metadata);
await c.db
.insert(sandboxInstanceTable)
.values({
id: SANDBOX_ROW_ID,
metadataJson,
status: command.status,
updatedAt: now,
})
.onConflictDoUpdate({
target: sandboxInstanceTable.id,
set: {
metadataJson,
status: command.status,
updatedAt: now,
},
})
.run();
}
async function updateHealthMutation(c: any, command: HealthSandboxCommand): Promise<void> {
await c.db
.update(sandboxInstanceTable)
.set({
status: `${command.status}:${command.message}`,
updatedAt: Date.now(),
})
.where(eq(sandboxInstanceTable.id, SANDBOX_ROW_ID))
.run();
}
async function destroySandboxMutation(c: any): Promise<void> {
await c.db.delete(sandboxInstanceTable).where(eq(sandboxInstanceTable.id, SANDBOX_ROW_ID)).run();
}
async function createSessionMutation(c: any, command: CreateSessionCommand): Promise<CreateSessionResult> {
let lastDetail = "sandbox-agent createSession failed";
let attemptsMade = 0;
for (let attempt = 1; attempt <= CREATE_SESSION_MAX_ATTEMPTS; attempt += 1) {
attemptsMade = attempt;
try {
const client = await getSandboxAgentClient(c);
const session = await client.createSession({
prompt: command.prompt,
cwd: command.cwd,
agent: command.agent,
});
return { id: session.id, status: session.status };
} catch (error) {
const detail = error instanceof Error ? error.message : String(error);
lastDetail = detail;
const retryable = isTransientSessionCreateError(detail);
const canRetry = retryable && attempt < CREATE_SESSION_MAX_ATTEMPTS;
if (!canRetry) {
break;
}
const waitMs = CREATE_SESSION_RETRY_BASE_MS * attempt;
logActorWarning("sandbox-instance", "createSession transient failure; retrying", {
workspaceId: c.state.workspaceId,
providerId: c.state.providerId,
sandboxId: c.state.sandboxId,
attempt,
maxAttempts: CREATE_SESSION_MAX_ATTEMPTS,
waitMs,
error: detail,
});
await delay(waitMs);
}
}
const attemptLabel = attemptsMade === 1 ? "attempt" : "attempts";
return {
id: null,
status: "error",
error: `sandbox-agent createSession failed after ${attemptsMade} ${attemptLabel}: ${lastDetail}`,
};
}
async function sendPromptMutation(c: any, command: SendPromptCommand): Promise<void> {
const client = await getSandboxAgentClient(c);
await client.sendPrompt({
sessionId: command.sessionId,
prompt: command.prompt,
notification: command.notification,
});
}
async function cancelSessionMutation(c: any, command: SessionControlCommand): Promise<void> {
const client = await getSandboxAgentClient(c);
await client.cancelSession(command.sessionId);
}
async function destroySessionMutation(c: any, command: SessionControlCommand): Promise<void> {
const client = await getSandboxAgentClient(c);
await client.destroySession(command.sessionId);
}
async function runSandboxInstanceWorkflow(ctx: any): Promise<void> {
await ctx.loop("sandbox-instance-command-loop", async (loopCtx: any) => {
const msg = await loopCtx.queue.next("next-sandbox-instance-command", {
names: [...SANDBOX_INSTANCE_QUEUE_NAMES],
completable: true,
});
if (!msg) {
return Loop.continue(undefined);
}
if (msg.name === "sandboxInstance.command.ensure") {
await loopCtx.step("sandbox-instance-ensure", async () => ensureSandboxMutation(loopCtx, msg.body as EnsureSandboxCommand));
await msg.complete({ ok: true });
return Loop.continue(undefined);
}
if (msg.name === "sandboxInstance.command.updateHealth") {
await loopCtx.step("sandbox-instance-update-health", async () => updateHealthMutation(loopCtx, msg.body as HealthSandboxCommand));
await msg.complete({ ok: true });
return Loop.continue(undefined);
}
if (msg.name === "sandboxInstance.command.destroy") {
await loopCtx.step("sandbox-instance-destroy", async () => destroySandboxMutation(loopCtx));
await msg.complete({ ok: true });
return Loop.continue(undefined);
}
if (msg.name === "sandboxInstance.command.createSession") {
const result = await loopCtx.step({
name: "sandbox-instance-create-session",
timeout: CREATE_SESSION_STEP_TIMEOUT_MS,
run: async () => createSessionMutation(loopCtx, msg.body as CreateSessionCommand),
});
await msg.complete(result);
return Loop.continue(undefined);
}
if (msg.name === "sandboxInstance.command.sendPrompt") {
await loopCtx.step("sandbox-instance-send-prompt", async () => sendPromptMutation(loopCtx, msg.body as SendPromptCommand));
await msg.complete({ ok: true });
return Loop.continue(undefined);
}
if (msg.name === "sandboxInstance.command.cancelSession") {
await loopCtx.step("sandbox-instance-cancel-session", async () => cancelSessionMutation(loopCtx, msg.body as SessionControlCommand));
await msg.complete({ ok: true });
return Loop.continue(undefined);
}
if (msg.name === "sandboxInstance.command.destroySession") {
await loopCtx.step("sandbox-instance-destroy-session", async () => destroySessionMutation(loopCtx, msg.body as SessionControlCommand));
await msg.complete({ ok: true });
}
return Loop.continue(undefined);
});
}
export const sandboxInstance = actor({
db: sandboxInstanceDb,
queues: Object.fromEntries(SANDBOX_INSTANCE_QUEUE_NAMES.map((name) => [name, queue()])),
options: {
name: "Sandbox Instance",
icon: "box",
actionTimeout: 5 * 60_000,
},
createState: (_c, input: SandboxInstanceInput) => ({
workspaceId: input.workspaceId,
providerId: input.providerId,
sandboxId: input.sandboxId,
}),
actions: {
async sandboxAgentConnection(c: any): Promise<SandboxAgentConnection> {
return await loadAgentConfig(c);
},
async createProcess(c: any, request: ProcessCreateRequest): Promise<ProcessInfo> {
const client = await getSandboxAgentClient(c);
const created = await client.createProcess(request);
await broadcastProcessesUpdated(c);
return created;
},
async listProcesses(c: any): Promise<{ processes: ProcessInfo[] }> {
const client = await getSandboxAgentClient(c);
return await client.listProcesses();
},
async getProcessLogs(c: any, request: { processId: string; query?: ProcessLogFollowQuery }): Promise<ProcessLogsResponse> {
const client = await getSandboxAgentClient(c);
return await client.getProcessLogs(request.processId, request.query);
},
async stopProcess(c: any, request: { processId: string; query?: ProcessSignalQuery }): Promise<ProcessInfo> {
const client = await getSandboxAgentClient(c);
const stopped = await client.stopProcess(request.processId, request.query);
await broadcastProcessesUpdated(c);
return stopped;
},
async killProcess(c: any, request: { processId: string; query?: ProcessSignalQuery }): Promise<ProcessInfo> {
const client = await getSandboxAgentClient(c);
const killed = await client.killProcess(request.processId, request.query);
await broadcastProcessesUpdated(c);
return killed;
},
async deleteProcess(c: any, request: { processId: string }): Promise<void> {
const client = await getSandboxAgentClient(c);
await client.deleteProcess(request.processId);
await broadcastProcessesUpdated(c);
},
async providerState(c: any): Promise<{ providerId: ProviderId; sandboxId: string; state: string; at: number }> {
const at = Date.now();
const { config, driver } = getActorRuntimeContext();
if (c.state.providerId === "daytona") {
const daytona = driver.daytona.createClient({
apiUrl: config.providers.daytona.endpoint,
apiKey: config.providers.daytona.apiKey,
});
const sandbox = await daytona.getSandbox(c.state.sandboxId);
const state = String(sandbox.state ?? "unknown").toLowerCase();
return { providerId: c.state.providerId, sandboxId: c.state.sandboxId, state, at };
}
return {
providerId: c.state.providerId,
sandboxId: c.state.sandboxId,
state: "unknown",
at,
};
},
async ensure(c, command: EnsureSandboxCommand): Promise<void> {
const self = selfSandboxInstance(c);
await self.send(sandboxInstanceWorkflowQueueName("sandboxInstance.command.ensure"), command, {
wait: true,
timeout: 60_000,
});
},
async updateHealth(c, command: HealthSandboxCommand): Promise<void> {
const self = selfSandboxInstance(c);
await self.send(sandboxInstanceWorkflowQueueName("sandboxInstance.command.updateHealth"), command, {
wait: true,
timeout: 60_000,
});
},
async destroy(c): Promise<void> {
const self = selfSandboxInstance(c);
await self.send(
sandboxInstanceWorkflowQueueName("sandboxInstance.command.destroy"),
{},
{
wait: true,
timeout: 60_000,
},
);
},
async createSession(c: any, command: CreateSessionCommand): Promise<CreateSessionResult> {
const self = selfSandboxInstance(c);
return expectQueueResponse<CreateSessionResult>(
await self.send(sandboxInstanceWorkflowQueueName("sandboxInstance.command.createSession"), command, {
wait: true,
timeout: 5 * 60_000,
}),
);
},
async listSessions(c: any, command?: ListSessionsCommand): Promise<{ items: SessionRecord[]; nextCursor?: string }> {
const persist = new SandboxInstancePersistDriver(c.db);
try {
const client = await getSandboxAgentClient(c);
const page = await client.listSessions({
cursor: command?.cursor,
limit: command?.limit,
});
return {
items: page.items,
nextCursor: page.nextCursor,
};
} catch (error) {
logActorWarning("sandbox-instance", "listSessions remote read failed; using persisted fallback", {
workspaceId: c.state.workspaceId,
providerId: c.state.providerId,
sandboxId: c.state.sandboxId,
error: resolveErrorMessage(error),
});
return await persist.listSessions({
cursor: command?.cursor,
limit: command?.limit,
});
}
},
async listSessionEvents(c: any, command: ListSessionEventsCommand): Promise<{ items: SessionEvent[]; nextCursor?: string }> {
const persist = new SandboxInstancePersistDriver(c.db);
return await persist.listEvents({
sessionId: command.sessionId,
cursor: command.cursor,
limit: command.limit,
});
},
async sendPrompt(c, command: SendPromptCommand): Promise<void> {
const self = selfSandboxInstance(c);
await self.send(sandboxInstanceWorkflowQueueName("sandboxInstance.command.sendPrompt"), command, {
wait: true,
timeout: 5 * 60_000,
});
},
async cancelSession(c, command: SessionControlCommand): Promise<void> {
const self = selfSandboxInstance(c);
await self.send(sandboxInstanceWorkflowQueueName("sandboxInstance.command.cancelSession"), command, {
wait: true,
timeout: 60_000,
});
},
async destroySession(c, command: SessionControlCommand): Promise<void> {
const self = selfSandboxInstance(c);
await self.send(sandboxInstanceWorkflowQueueName("sandboxInstance.command.destroySession"), command, {
wait: true,
timeout: 60_000,
});
},
async sessionStatus(c, command: SessionStatusCommand): Promise<{ id: string; status: "running" | "idle" | "error" }> {
return await derivePersistedSessionStatus(new SandboxInstancePersistDriver(c.db), command.sessionId);
},
},
run: workflow(runSandboxInstanceWorkflow),
});