From ffb9f1082b3b9efd37c9af65a6b06d68bddb3fe1 Mon Sep 17 00:00:00 2001 From: Nathan Flurry Date: Tue, 17 Mar 2026 14:33:13 -0700 Subject: [PATCH] fix(foundry): fix runner version --- foundry/CLAUDE.md | 20 ++++++++++ foundry/docker/backend.Dockerfile | 3 ++ foundry/docker/backend.dev.Dockerfile | 3 ++ foundry/docker/backend.preview.Dockerfile | 3 ++ foundry/packages/backend/src/actors/index.ts | 7 ++-- .../packages/backend/src/actors/logging.ts | 10 +++++ .../backend/src/actors/task/workspace.ts | 39 ++++++++++++++++++- .../backend/src/config/runner-version.ts | 33 ++++++++++++++++ .../frontend/src/components/mock-layout.tsx | 1 + justfile | 1 - 10 files changed, 114 insertions(+), 6 deletions(-) create mode 100644 foundry/packages/backend/src/config/runner-version.ts diff --git a/foundry/CLAUDE.md b/foundry/CLAUDE.md index d48ab59..2d9bcbb 100644 --- a/foundry/CLAUDE.md +++ b/foundry/CLAUDE.md @@ -72,6 +72,26 @@ Local Docker sandboxes use the `rivetdev/sandbox-agent:foundry-base-latest` imag - The image must be built with `--platform linux/amd64`. The Rust build is memory-intensive; Docker Desktop needs at least 8GB RAM allocated. - When updating the base image contents (new system packages, agent versions), rebuild and push with the publish script, then update the `foundry-base-latest` tag. +## Production GitHub App + OAuth App + +Foundry uses two separate GitHub entities in production: + +- **OAuth App** (`GITHUB_CLIENT_ID` / `GITHUB_CLIENT_SECRET`) — handles "Sign in with GitHub" via Better Auth. This is a standard OAuth App. +- **GitHub App** (`GITHUB_APP_ID` / `GITHUB_APP_CLIENT_ID` / `GITHUB_APP_CLIENT_SECRET` / `GITHUB_APP_PRIVATE_KEY`) — handles webhooks, installation tokens for repo access, and GitHub API sync (repos, PRs). Must be manually installed on each org. + +Key env vars and where they connect: + +- `GITHUB_REDIRECT_URI` — OAuth callback, must point to `https://api.sandboxagent.dev/v1/auth/callback/github` +- `GITHUB_WEBHOOK_SECRET` — must match the secret configured on the GitHub App's Webhook settings page exactly. Mismatches cause silent 500s on webhook delivery (signature verification fails inside the actor, surfaced as a generic RivetKit `internal_error`). +- `BETTER_AUTH_URL` — must be the **API** URL (`https://api.sandboxagent.dev`), not the frontend URL. Better Auth uses this internally for sign-out and session management calls. +- `APP_URL` — the **frontend** URL (`https://foundry.sandboxagent.dev`). + +Troubleshooting: + +- **"GitHub App not installed"** — The GitHub App must be manually installed on each org. Sign-in does not auto-install it. Go to the GitHub App settings → Install App tab. The sign-in flow can only detect existing installations, not create them. +- **Webhooks not arriving** — Check the GitHub App → Advanced tab for delivery history. If deliveries show 500, the webhook secret likely doesn't match `GITHUB_WEBHOOK_SECRET`. Test with: `echo -n '{"test":true}' | openssl dgst -sha256 -hmac "$SECRET"` and curl the endpoint with the computed signature. +- **Deleting all actors wipes GitHub App installation state.** After a full actor reset, you must trigger a webhook (e.g. redeliver from GitHub App Advanced tab, or re-install the app) to repopulate installation records. + ## Railway Logs - Production Foundry Railway logs can be read from a linked checkout with `railway logs --deployment --lines 200` or `railway logs --deployment --lines 200`. diff --git a/foundry/docker/backend.Dockerfile b/foundry/docker/backend.Dockerfile index 3dc1c7d..ae14ddf 100644 --- a/foundry/docker/backend.Dockerfile +++ b/foundry/docker/backend.Dockerfile @@ -19,6 +19,7 @@ RUN pnpm --filter @sandbox-agent/foundry-backend deploy --prod /out FROM oven/bun:1.2 AS runtime ENV NODE_ENV=production ENV HOME=/home/task +ENV RIVET_RUNNER_VERSION_FILE=/etc/foundry/rivet-runner-version WORKDIR /app RUN apt-get update \ && apt-get install -y --no-install-recommends \ @@ -31,6 +32,8 @@ RUN addgroup --system --gid 1001 task \ && adduser --system --uid 1001 --home /home/task --ingroup task task \ && mkdir -p /home/task \ && chown -R task:task /home/task /app +RUN mkdir -p /etc/foundry \ + && date +%s > /etc/foundry/rivet-runner-version COPY --from=build /out ./ USER task EXPOSE 7741 diff --git a/foundry/docker/backend.dev.Dockerfile b/foundry/docker/backend.dev.Dockerfile index 46177c3..c4b6c3a 100644 --- a/foundry/docker/backend.dev.Dockerfile +++ b/foundry/docker/backend.dev.Dockerfile @@ -21,6 +21,9 @@ RUN curl -fsSL "https://releases.rivet.dev/sandbox-agent/${SANDBOX_AGENT_VERSION ENV PATH="/root/.local/bin:${PATH}" ENV SANDBOX_AGENT_BIN="/root/.local/bin/sandbox-agent" +ENV RIVET_RUNNER_VERSION_FILE=/etc/foundry/rivet-runner-version +RUN mkdir -p /etc/foundry \ + && date +%s > /etc/foundry/rivet-runner-version WORKDIR /app diff --git a/foundry/docker/backend.preview.Dockerfile b/foundry/docker/backend.preview.Dockerfile index b35ced8..91cd7c7 100644 --- a/foundry/docker/backend.preview.Dockerfile +++ b/foundry/docker/backend.preview.Dockerfile @@ -20,6 +20,9 @@ RUN curl -fsSL "https://releases.rivet.dev/sandbox-agent/${SANDBOX_AGENT_VERSION ENV PATH="/root/.local/bin:${PATH}" ENV SANDBOX_AGENT_BIN="/root/.local/bin/sandbox-agent" +ENV RIVET_RUNNER_VERSION_FILE=/etc/foundry/rivet-runner-version +RUN mkdir -p /etc/foundry \ + && date +%s > /etc/foundry/rivet-runner-version WORKDIR /workspace/quebec diff --git a/foundry/packages/backend/src/actors/index.ts b/foundry/packages/backend/src/actors/index.ts index 52bb914..74ede4a 100644 --- a/foundry/packages/backend/src/actors/index.ts +++ b/foundry/packages/backend/src/actors/index.ts @@ -6,16 +6,15 @@ import { auditLog } from "./audit-log/index.js"; import { taskSandbox } from "./sandbox/index.js"; import { organization } from "./organization/index.js"; import { logger } from "../logging.js"; +import { resolveRunnerVersion } from "../config/runner-version.js"; -const RUNNER_VERSION = Math.floor(Date.now() / 1000); +const runnerVersion = resolveRunnerVersion(); export const registry = setup({ serverless: { basePath: "/v1/rivet", }, - runner: { - version: RUNNER_VERSION, - }, + runner: { version: runnerVersion }, logging: { baseLogger: logger, }, diff --git a/foundry/packages/backend/src/actors/logging.ts b/foundry/packages/backend/src/actors/logging.ts index afc7d37..a61685f 100644 --- a/foundry/packages/backend/src/actors/logging.ts +++ b/foundry/packages/backend/src/actors/logging.ts @@ -22,6 +22,16 @@ export function resolveErrorStack(error: unknown): string | undefined { return undefined; } +export function logActorInfo(scope: string, message: string, context?: Record): void { + logger.info( + { + scope, + ...(context ?? {}), + }, + message, + ); +} + export function logActorWarning(scope: string, message: string, context?: Record): void { logger.warn( { diff --git a/foundry/packages/backend/src/actors/task/workspace.ts b/foundry/packages/backend/src/actors/task/workspace.ts index 5c49a4d..f7dcc26 100644 --- a/foundry/packages/backend/src/actors/task/workspace.ts +++ b/foundry/packages/backend/src/actors/task/workspace.ts @@ -10,7 +10,7 @@ import { } from "@sandbox-agent/foundry-shared"; import { getActorRuntimeContext } from "../context.js"; import { getOrCreateOrganization, getOrCreateTaskSandbox, getOrCreateUser, getTaskSandbox, selfTask } from "../handles.js"; -import { logActorWarning, resolveErrorMessage } from "../logging.js"; +import { logActorInfo, logActorWarning, resolveErrorMessage } from "../logging.js"; import { SANDBOX_REPO_CWD } from "../sandbox/index.js"; import { resolveSandboxProviderId } from "../../sandbox-config.js"; import { getBetterAuthService } from "../../services/better-auth.js"; @@ -636,11 +636,17 @@ async function ensureSandboxRepo(c: any, sandbox: any, record: any, opts?: { ski // If the repo was already prepared and the caller allows skipping fetch, just return. // The clone, fetch, and checkout already happened on a prior call. if (opts?.skipFetchIfPrepared && sandboxRepoPrepared) { + logActorInfo("task.sandbox", "ensureSandboxRepo skipped (already prepared)"); return; } + const repoStart = performance.now(); + + const t0 = performance.now(); const auth = await resolveOrganizationGithubAuth(c, c.state.organizationId); const metadata = await getRepositoryMetadata(c); + logActorInfo("task.sandbox", "resolveAuth+metadata", { durationMs: Math.round(performance.now() - t0) }); + const baseRef = metadata.defaultBranch ?? "main"; const sandboxRepoRoot = dirname(SANDBOX_REPO_CWD); const script = [ @@ -657,6 +663,8 @@ async function ensureSandboxRepo(c: any, sandbox: any, record: any, opts?: { ski )}; else target_ref=${JSON.stringify(baseRef)}; fi`, `git checkout -B ${JSON.stringify(record.branchName)} \"$target_ref\"`, ]; + + const t1 = performance.now(); const result = await sandbox.runProcess({ command: "bash", args: ["-lc", script.join("; ")], @@ -669,6 +677,11 @@ async function ensureSandboxRepo(c: any, sandbox: any, record: any, opts?: { ski : undefined, timeoutMs: 5 * 60_000, }); + logActorInfo("task.sandbox", "git clone/fetch/checkout", { + branch: record.branchName, + repo: metadata.remoteUrl, + durationMs: Math.round(performance.now() - t1), + }); if ((result.exitCode ?? 0) !== 0) { throw new Error(`sandbox repo preparation failed (${result.exitCode ?? 1}): ${[result.stdout, result.stderr].filter(Boolean).join("")}`); @@ -677,10 +690,13 @@ async function ensureSandboxRepo(c: any, sandbox: any, record: any, opts?: { ski // On first repo preparation, inject the task owner's git credentials into the sandbox // so that push/commit operations are authenticated and attributed to the correct user. if (!sandboxRepoPrepared && opts?.authSessionId) { + const t2 = performance.now(); await maybeSwapTaskOwner(c, opts.authSessionId, sandbox); + logActorInfo("task.sandbox", "maybeSwapTaskOwner", { durationMs: Math.round(performance.now() - t2) }); } sandboxRepoPrepared = true; + logActorInfo("task.sandbox", "ensureSandboxRepo complete", { totalDurationMs: Math.round(performance.now() - repoStart) }); } async function executeInSandbox( @@ -1264,6 +1280,7 @@ export async function createWorkspaceSession(c: any, model?: string, authSession } export async function ensureWorkspaceSession(c: any, sessionId: string, model?: string, authSessionId?: string): Promise { + const ensureStart = performance.now(); const meta = await readSessionMeta(c, sessionId); if (!meta || meta.closed) { return; @@ -1283,10 +1300,18 @@ export async function ensureWorkspaceSession(c: any, sessionId: string, model?: }); try { + const t0 = performance.now(); const runtime = await getTaskSandboxRuntime(c, record); + logActorInfo("task.session", "getTaskSandboxRuntime", { sessionId, durationMs: Math.round(performance.now() - t0) }); + + const t1 = performance.now(); await ensureSandboxRepo(c, runtime.sandbox, record); + logActorInfo("task.session", "ensureSandboxRepo", { sessionId, durationMs: Math.round(performance.now() - t1) }); + const resolvedModel = model ?? meta.model ?? (await resolveDefaultModel(c, authSessionId)); const resolvedAgent = await resolveSandboxAgentForModel(c, resolvedModel); + + const t2 = performance.now(); await runtime.sandbox.createSession({ id: meta.sandboxSessionId ?? sessionId, agent: resolvedAgent, @@ -1295,12 +1320,14 @@ export async function ensureWorkspaceSession(c: any, sessionId: string, model?: cwd: runtime.cwd, }, }); + logActorInfo("task.session", "createSession", { sessionId, agent: resolvedAgent, model: resolvedModel, durationMs: Math.round(performance.now() - t2) }); await updateSessionMeta(c, sessionId, { sandboxSessionId: meta.sandboxSessionId ?? sessionId, status: "ready", errorMessage: null, }); + logActorInfo("task.session", "ensureWorkspaceSession complete", { sessionId, totalDurationMs: Math.round(performance.now() - ensureStart) }); fireRefreshSessionTranscript(c, meta.sandboxSessionId ?? sessionId); } catch (error) { await updateSessionMeta(c, sessionId, { @@ -1415,12 +1442,19 @@ export async function changeWorkspaceModel(c: any, sessionId: string, model: str } export async function sendWorkspaceMessage(c: any, sessionId: string, text: string, attachments: Array, authSessionId?: string): Promise { + const sendStart = performance.now(); const meta = requireSendableSessionMeta(await readSessionMeta(c, sessionId), sessionId); const record = await ensureWorkspaceSeeded(c); + + const t0 = performance.now(); const runtime = await getTaskSandboxRuntime(c, record); + logActorInfo("task.message", "getTaskSandboxRuntime", { sessionId, durationMs: Math.round(performance.now() - t0) }); + + const t1 = performance.now(); // Skip git fetch on subsequent messages — the repo was already prepared during session // creation. This avoids a 5-30s network round-trip to GitHub on every prompt. await ensureSandboxRepo(c, runtime.sandbox, record, { skipFetchIfPrepared: true, authSessionId }); + logActorInfo("task.message", "ensureSandboxRepo", { sessionId, durationMs: Math.round(performance.now() - t1) }); // Check if the task owner needs to swap. If a different user is sending this message, // update the owner record and inject their git credentials into the sandbox. @@ -1450,10 +1484,12 @@ export async function sendWorkspaceMessage(c: any, sessionId: string, text: stri await syncWorkspaceSessionStatus(c, meta.sandboxSessionId, "running", Date.now()); try { + const t2 = performance.now(); await runtime.sandbox.sendPrompt({ sessionId: meta.sandboxSessionId, prompt: prompt.join("\n\n"), }); + logActorInfo("task.message", "sendPrompt", { sessionId, durationMs: Math.round(performance.now() - t2) }); await syncWorkspaceSessionStatus(c, meta.sandboxSessionId, "idle", Date.now()); } catch (error) { await updateSessionMeta(c, sessionId, { @@ -1463,6 +1499,7 @@ export async function sendWorkspaceMessage(c: any, sessionId: string, text: stri await syncWorkspaceSessionStatus(c, meta.sandboxSessionId, "error", Date.now()); throw error; } + logActorInfo("task.message", "sendWorkspaceMessage complete", { sessionId, totalDurationMs: Math.round(performance.now() - sendStart) }); } export async function stopWorkspaceSession(c: any, sessionId: string): Promise { diff --git a/foundry/packages/backend/src/config/runner-version.ts b/foundry/packages/backend/src/config/runner-version.ts new file mode 100644 index 0000000..5c33672 --- /dev/null +++ b/foundry/packages/backend/src/config/runner-version.ts @@ -0,0 +1,33 @@ +import { readFileSync } from "node:fs"; + +function parseRunnerVersion(rawValue: string | undefined): number | undefined { + const value = rawValue?.trim(); + if (!value) { + return undefined; + } + + const parsed = Number.parseInt(value, 10); + if (Number.isNaN(parsed)) { + return undefined; + } + + return parsed; +} + +export function resolveRunnerVersion(): number | undefined { + const envVersion = parseRunnerVersion(process.env.RIVET_RUNNER_VERSION); + if (envVersion !== undefined) { + return envVersion; + } + + const versionFilePath = process.env.RIVET_RUNNER_VERSION_FILE; + if (!versionFilePath) { + return undefined; + } + + try { + return parseRunnerVersion(readFileSync(versionFilePath, "utf8")); + } catch { + return undefined; + } +} diff --git a/foundry/packages/frontend/src/components/mock-layout.tsx b/foundry/packages/frontend/src/components/mock-layout.tsx index 797b650..4089e01 100644 --- a/foundry/packages/frontend/src/components/mock-layout.tsx +++ b/foundry/packages/frontend/src/components/mock-layout.tsx @@ -187,6 +187,7 @@ function toTaskModel( diffs: detail?.diffs ?? {}, fileTree: detail?.fileTree ?? [], minutesUsed: detail?.minutesUsed ?? 0, + sandboxes: detail?.sandboxes ?? [], activeSandboxId: detail?.activeSandboxId ?? null, primaryUserLogin: detail?.primaryUserLogin ?? summary.primaryUserLogin ?? null, primaryUserAvatarUrl: detail?.primaryUserAvatarUrl ?? summary.primaryUserAvatarUrl ?? null, diff --git a/justfile b/justfile index 84b761f..4ec95bc 100644 --- a/justfile +++ b/justfile @@ -186,4 +186,3 @@ foundry-format: [group('foundry')] foundry-docker-build tag='foundry:local': docker build -f foundry/docker/backend.Dockerfile -t {{tag}} . -