fix(foundry): fix runner version

This commit is contained in:
Nathan Flurry 2026-03-17 14:33:13 -07:00
parent f25a92aca8
commit ffb9f1082b
10 changed files with 114 additions and 6 deletions

View file

@ -72,6 +72,26 @@ Local Docker sandboxes use the `rivetdev/sandbox-agent:foundry-base-latest` imag
- The image must be built with `--platform linux/amd64`. The Rust build is memory-intensive; Docker Desktop needs at least 8GB RAM allocated.
- When updating the base image contents (new system packages, agent versions), rebuild and push with the publish script, then update the `foundry-base-latest` tag.
## Production GitHub App + OAuth App
Foundry uses two separate GitHub entities in production:
- **OAuth App** (`GITHUB_CLIENT_ID` / `GITHUB_CLIENT_SECRET`) — handles "Sign in with GitHub" via Better Auth. This is a standard OAuth App.
- **GitHub App** (`GITHUB_APP_ID` / `GITHUB_APP_CLIENT_ID` / `GITHUB_APP_CLIENT_SECRET` / `GITHUB_APP_PRIVATE_KEY`) — handles webhooks, installation tokens for repo access, and GitHub API sync (repos, PRs). Must be manually installed on each org.
Key env vars and where they connect:
- `GITHUB_REDIRECT_URI` — OAuth callback, must point to `https://api.sandboxagent.dev/v1/auth/callback/github`
- `GITHUB_WEBHOOK_SECRET` — must match the secret configured on the GitHub App's Webhook settings page exactly. Mismatches cause silent 500s on webhook delivery (signature verification fails inside the actor, surfaced as a generic RivetKit `internal_error`).
- `BETTER_AUTH_URL` — must be the **API** URL (`https://api.sandboxagent.dev`), not the frontend URL. Better Auth uses this internally for sign-out and session management calls.
- `APP_URL` — the **frontend** URL (`https://foundry.sandboxagent.dev`).
Troubleshooting:
- **"GitHub App not installed"** — The GitHub App must be manually installed on each org. Sign-in does not auto-install it. Go to the GitHub App settings → Install App tab. The sign-in flow can only detect existing installations, not create them.
- **Webhooks not arriving** — Check the GitHub App → Advanced tab for delivery history. If deliveries show 500, the webhook secret likely doesn't match `GITHUB_WEBHOOK_SECRET`. Test with: `echo -n '{"test":true}' | openssl dgst -sha256 -hmac "$SECRET"` and curl the endpoint with the computed signature.
- **Deleting all actors wipes GitHub App installation state.** After a full actor reset, you must trigger a webhook (e.g. redeliver from GitHub App Advanced tab, or re-install the app) to repopulate installation records.
## Railway Logs
- Production Foundry Railway logs can be read from a linked checkout with `railway logs --deployment --lines 200` or `railway logs <deployment-id> --deployment --lines 200`.

View file

@ -19,6 +19,7 @@ RUN pnpm --filter @sandbox-agent/foundry-backend deploy --prod /out
FROM oven/bun:1.2 AS runtime
ENV NODE_ENV=production
ENV HOME=/home/task
ENV RIVET_RUNNER_VERSION_FILE=/etc/foundry/rivet-runner-version
WORKDIR /app
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
@ -31,6 +32,8 @@ RUN addgroup --system --gid 1001 task \
&& adduser --system --uid 1001 --home /home/task --ingroup task task \
&& mkdir -p /home/task \
&& chown -R task:task /home/task /app
RUN mkdir -p /etc/foundry \
&& date +%s > /etc/foundry/rivet-runner-version
COPY --from=build /out ./
USER task
EXPOSE 7741

View file

@ -21,6 +21,9 @@ RUN curl -fsSL "https://releases.rivet.dev/sandbox-agent/${SANDBOX_AGENT_VERSION
ENV PATH="/root/.local/bin:${PATH}"
ENV SANDBOX_AGENT_BIN="/root/.local/bin/sandbox-agent"
ENV RIVET_RUNNER_VERSION_FILE=/etc/foundry/rivet-runner-version
RUN mkdir -p /etc/foundry \
&& date +%s > /etc/foundry/rivet-runner-version
WORKDIR /app

View file

@ -20,6 +20,9 @@ RUN curl -fsSL "https://releases.rivet.dev/sandbox-agent/${SANDBOX_AGENT_VERSION
ENV PATH="/root/.local/bin:${PATH}"
ENV SANDBOX_AGENT_BIN="/root/.local/bin/sandbox-agent"
ENV RIVET_RUNNER_VERSION_FILE=/etc/foundry/rivet-runner-version
RUN mkdir -p /etc/foundry \
&& date +%s > /etc/foundry/rivet-runner-version
WORKDIR /workspace/quebec

View file

@ -6,16 +6,15 @@ import { auditLog } from "./audit-log/index.js";
import { taskSandbox } from "./sandbox/index.js";
import { organization } from "./organization/index.js";
import { logger } from "../logging.js";
import { resolveRunnerVersion } from "../config/runner-version.js";
const RUNNER_VERSION = Math.floor(Date.now() / 1000);
const runnerVersion = resolveRunnerVersion();
export const registry = setup({
serverless: {
basePath: "/v1/rivet",
},
runner: {
version: RUNNER_VERSION,
},
runner: { version: runnerVersion },
logging: {
baseLogger: logger,
},

View file

@ -22,6 +22,16 @@ export function resolveErrorStack(error: unknown): string | undefined {
return undefined;
}
export function logActorInfo(scope: string, message: string, context?: Record<string, unknown>): void {
logger.info(
{
scope,
...(context ?? {}),
},
message,
);
}
export function logActorWarning(scope: string, message: string, context?: Record<string, unknown>): void {
logger.warn(
{

View file

@ -10,7 +10,7 @@ import {
} from "@sandbox-agent/foundry-shared";
import { getActorRuntimeContext } from "../context.js";
import { getOrCreateOrganization, getOrCreateTaskSandbox, getOrCreateUser, getTaskSandbox, selfTask } from "../handles.js";
import { logActorWarning, resolveErrorMessage } from "../logging.js";
import { logActorInfo, logActorWarning, resolveErrorMessage } from "../logging.js";
import { SANDBOX_REPO_CWD } from "../sandbox/index.js";
import { resolveSandboxProviderId } from "../../sandbox-config.js";
import { getBetterAuthService } from "../../services/better-auth.js";
@ -636,11 +636,17 @@ async function ensureSandboxRepo(c: any, sandbox: any, record: any, opts?: { ski
// If the repo was already prepared and the caller allows skipping fetch, just return.
// The clone, fetch, and checkout already happened on a prior call.
if (opts?.skipFetchIfPrepared && sandboxRepoPrepared) {
logActorInfo("task.sandbox", "ensureSandboxRepo skipped (already prepared)");
return;
}
const repoStart = performance.now();
const t0 = performance.now();
const auth = await resolveOrganizationGithubAuth(c, c.state.organizationId);
const metadata = await getRepositoryMetadata(c);
logActorInfo("task.sandbox", "resolveAuth+metadata", { durationMs: Math.round(performance.now() - t0) });
const baseRef = metadata.defaultBranch ?? "main";
const sandboxRepoRoot = dirname(SANDBOX_REPO_CWD);
const script = [
@ -657,6 +663,8 @@ async function ensureSandboxRepo(c: any, sandbox: any, record: any, opts?: { ski
)}; else target_ref=${JSON.stringify(baseRef)}; fi`,
`git checkout -B ${JSON.stringify(record.branchName)} \"$target_ref\"`,
];
const t1 = performance.now();
const result = await sandbox.runProcess({
command: "bash",
args: ["-lc", script.join("; ")],
@ -669,6 +677,11 @@ async function ensureSandboxRepo(c: any, sandbox: any, record: any, opts?: { ski
: undefined,
timeoutMs: 5 * 60_000,
});
logActorInfo("task.sandbox", "git clone/fetch/checkout", {
branch: record.branchName,
repo: metadata.remoteUrl,
durationMs: Math.round(performance.now() - t1),
});
if ((result.exitCode ?? 0) !== 0) {
throw new Error(`sandbox repo preparation failed (${result.exitCode ?? 1}): ${[result.stdout, result.stderr].filter(Boolean).join("")}`);
@ -677,10 +690,13 @@ async function ensureSandboxRepo(c: any, sandbox: any, record: any, opts?: { ski
// On first repo preparation, inject the task owner's git credentials into the sandbox
// so that push/commit operations are authenticated and attributed to the correct user.
if (!sandboxRepoPrepared && opts?.authSessionId) {
const t2 = performance.now();
await maybeSwapTaskOwner(c, opts.authSessionId, sandbox);
logActorInfo("task.sandbox", "maybeSwapTaskOwner", { durationMs: Math.round(performance.now() - t2) });
}
sandboxRepoPrepared = true;
logActorInfo("task.sandbox", "ensureSandboxRepo complete", { totalDurationMs: Math.round(performance.now() - repoStart) });
}
async function executeInSandbox(
@ -1264,6 +1280,7 @@ export async function createWorkspaceSession(c: any, model?: string, authSession
}
export async function ensureWorkspaceSession(c: any, sessionId: string, model?: string, authSessionId?: string): Promise<void> {
const ensureStart = performance.now();
const meta = await readSessionMeta(c, sessionId);
if (!meta || meta.closed) {
return;
@ -1283,10 +1300,18 @@ export async function ensureWorkspaceSession(c: any, sessionId: string, model?:
});
try {
const t0 = performance.now();
const runtime = await getTaskSandboxRuntime(c, record);
logActorInfo("task.session", "getTaskSandboxRuntime", { sessionId, durationMs: Math.round(performance.now() - t0) });
const t1 = performance.now();
await ensureSandboxRepo(c, runtime.sandbox, record);
logActorInfo("task.session", "ensureSandboxRepo", { sessionId, durationMs: Math.round(performance.now() - t1) });
const resolvedModel = model ?? meta.model ?? (await resolveDefaultModel(c, authSessionId));
const resolvedAgent = await resolveSandboxAgentForModel(c, resolvedModel);
const t2 = performance.now();
await runtime.sandbox.createSession({
id: meta.sandboxSessionId ?? sessionId,
agent: resolvedAgent,
@ -1295,12 +1320,14 @@ export async function ensureWorkspaceSession(c: any, sessionId: string, model?:
cwd: runtime.cwd,
},
});
logActorInfo("task.session", "createSession", { sessionId, agent: resolvedAgent, model: resolvedModel, durationMs: Math.round(performance.now() - t2) });
await updateSessionMeta(c, sessionId, {
sandboxSessionId: meta.sandboxSessionId ?? sessionId,
status: "ready",
errorMessage: null,
});
logActorInfo("task.session", "ensureWorkspaceSession complete", { sessionId, totalDurationMs: Math.round(performance.now() - ensureStart) });
fireRefreshSessionTranscript(c, meta.sandboxSessionId ?? sessionId);
} catch (error) {
await updateSessionMeta(c, sessionId, {
@ -1415,12 +1442,19 @@ export async function changeWorkspaceModel(c: any, sessionId: string, model: str
}
export async function sendWorkspaceMessage(c: any, sessionId: string, text: string, attachments: Array<any>, authSessionId?: string): Promise<void> {
const sendStart = performance.now();
const meta = requireSendableSessionMeta(await readSessionMeta(c, sessionId), sessionId);
const record = await ensureWorkspaceSeeded(c);
const t0 = performance.now();
const runtime = await getTaskSandboxRuntime(c, record);
logActorInfo("task.message", "getTaskSandboxRuntime", { sessionId, durationMs: Math.round(performance.now() - t0) });
const t1 = performance.now();
// Skip git fetch on subsequent messages — the repo was already prepared during session
// creation. This avoids a 5-30s network round-trip to GitHub on every prompt.
await ensureSandboxRepo(c, runtime.sandbox, record, { skipFetchIfPrepared: true, authSessionId });
logActorInfo("task.message", "ensureSandboxRepo", { sessionId, durationMs: Math.round(performance.now() - t1) });
// Check if the task owner needs to swap. If a different user is sending this message,
// update the owner record and inject their git credentials into the sandbox.
@ -1450,10 +1484,12 @@ export async function sendWorkspaceMessage(c: any, sessionId: string, text: stri
await syncWorkspaceSessionStatus(c, meta.sandboxSessionId, "running", Date.now());
try {
const t2 = performance.now();
await runtime.sandbox.sendPrompt({
sessionId: meta.sandboxSessionId,
prompt: prompt.join("\n\n"),
});
logActorInfo("task.message", "sendPrompt", { sessionId, durationMs: Math.round(performance.now() - t2) });
await syncWorkspaceSessionStatus(c, meta.sandboxSessionId, "idle", Date.now());
} catch (error) {
await updateSessionMeta(c, sessionId, {
@ -1463,6 +1499,7 @@ export async function sendWorkspaceMessage(c: any, sessionId: string, text: stri
await syncWorkspaceSessionStatus(c, meta.sandboxSessionId, "error", Date.now());
throw error;
}
logActorInfo("task.message", "sendWorkspaceMessage complete", { sessionId, totalDurationMs: Math.round(performance.now() - sendStart) });
}
export async function stopWorkspaceSession(c: any, sessionId: string): Promise<void> {

View file

@ -0,0 +1,33 @@
import { readFileSync } from "node:fs";
function parseRunnerVersion(rawValue: string | undefined): number | undefined {
const value = rawValue?.trim();
if (!value) {
return undefined;
}
const parsed = Number.parseInt(value, 10);
if (Number.isNaN(parsed)) {
return undefined;
}
return parsed;
}
export function resolveRunnerVersion(): number | undefined {
const envVersion = parseRunnerVersion(process.env.RIVET_RUNNER_VERSION);
if (envVersion !== undefined) {
return envVersion;
}
const versionFilePath = process.env.RIVET_RUNNER_VERSION_FILE;
if (!versionFilePath) {
return undefined;
}
try {
return parseRunnerVersion(readFileSync(versionFilePath, "utf8"));
} catch {
return undefined;
}
}

View file

@ -187,6 +187,7 @@ function toTaskModel(
diffs: detail?.diffs ?? {},
fileTree: detail?.fileTree ?? [],
minutesUsed: detail?.minutesUsed ?? 0,
sandboxes: detail?.sandboxes ?? [],
activeSandboxId: detail?.activeSandboxId ?? null,
primaryUserLogin: detail?.primaryUserLogin ?? summary.primaryUserLogin ?? null,
primaryUserAvatarUrl: detail?.primaryUserAvatarUrl ?? summary.primaryUserAvatarUrl ?? null,

View file

@ -186,4 +186,3 @@ foundry-format:
[group('foundry')]
foundry-docker-build tag='foundry:local':
docker build -f foundry/docker/backend.Dockerfile -t {{tag}} .