diff --git a/foundry/packages/backend/src/actors/workspace/actions.ts b/foundry/packages/backend/src/actors/workspace/actions.ts index 584b045..b397fb4 100644 --- a/foundry/packages/backend/src/actors/workspace/actions.ts +++ b/foundry/packages/backend/src/actors/workspace/actions.ts @@ -62,7 +62,12 @@ interface RepoOverviewInput { repoId: string; } -const WORKSPACE_QUEUE_NAMES = ["workspace.command.addRepo", "workspace.command.createTask", "workspace.command.refreshProviderProfiles"] as const; +const WORKSPACE_QUEUE_NAMES = [ + "workspace.command.addRepo", + "workspace.command.createTask", + "workspace.command.refreshProviderProfiles", + "workspace.command.syncGithubSession", +] as const; const SANDBOX_AGENT_REPO = "rivet-dev/sandbox-agent"; type WorkspaceQueueName = (typeof WORKSPACE_QUEUE_NAMES)[number]; @@ -366,6 +371,20 @@ export async function runWorkspaceWorkflow(ctx: any): Promise { refreshProviderProfilesMutation(loopCtx, msg.body as RefreshProviderProfilesCommand), ); await msg.complete({ ok: true }); + return Loop.continue(undefined); + } + + if (msg.name === "workspace.command.syncGithubSession") { + await loopCtx.step({ + name: "workspace-sync-github-session", + timeout: 60_000, + run: async () => { + const { syncGithubOrganizations } = await import("./app-shell.js"); + await syncGithubOrganizations(loopCtx, msg.body as { sessionId: string; accessToken: string }); + }, + }); + await msg.complete({ ok: true }); + return Loop.continue(undefined); } return Loop.continue(undefined); diff --git a/foundry/packages/backend/src/actors/workspace/app-shell.ts b/foundry/packages/backend/src/actors/workspace/app-shell.ts index 01b4289..cbb813d 100644 --- a/foundry/packages/backend/src/actors/workspace/app-shell.ts +++ b/foundry/packages/backend/src/actors/workspace/app-shell.ts @@ -11,7 +11,7 @@ import type { UpdateFoundryOrganizationProfileInput, } from "@sandbox-agent/foundry-shared"; import { getActorRuntimeContext } from "../context.js"; -import { getOrCreateWorkspace } from "../handles.js"; +import { getOrCreateWorkspace, selfWorkspace } from "../handles.js"; import { GitHubAppError } from "../../services/app-github.js"; import { repoIdFromRemote, repoLabelFromRemote } from "../../services/repo.js"; import { logger } from "../../logging.js"; @@ -369,11 +369,50 @@ async function safeListInstallations(accessToken: string): Promise { } } -async function syncGithubSessionFromToken(c: any, sessionId: string, accessToken: string): Promise<{ sessionId: string; redirectTo: string }> { +/** + * Fast path: resolve viewer identity, store user + token in the session, + * and return the redirect URL. Does NOT sync organizations — that work is + * deferred to `syncGithubOrganizations` via the workflow queue so the HTTP + * callback can respond before any proxy timeout triggers a retry. + */ +async function initGithubSession(c: any, sessionId: string, accessToken: string, scopes: string[]): Promise<{ sessionId: string; redirectTo: string }> { assertAppWorkspace(c); const { appShell } = getActorRuntimeContext(); + const viewer = await appShell.github.getViewer(accessToken); + const userId = `user-${slugify(viewer.login)}`; + + await updateAppSession(c, sessionId, { + currentUserId: userId, + currentUserName: viewer.name || viewer.login, + currentUserEmail: viewer.email ?? `${viewer.login}@users.noreply.github.com`, + currentUserGithubLogin: viewer.login, + currentUserRoleLabel: "GitHub user", + githubAccessToken: accessToken, + githubScope: scopes.join(","), + oauthState: null, + oauthStateExpiresAt: null, + }); + + return { + sessionId, + redirectTo: `${appShell.appUrl}/organizations?foundrySession=${encodeURIComponent(sessionId)}`, + }; +} + +/** + * Slow path: list GitHub orgs + installations, sync each org workspace, + * and update the session's eligible organization list. Called from the + * workflow queue so it runs in the background after the callback has + * already returned a redirect to the browser. + * + * Also used synchronously by bootstrapAppGithubSession (dev-only) where + * proxy timeouts are not a concern. + */ +export async function syncGithubOrganizations(c: any, input: { sessionId: string; accessToken: string }): Promise { + assertAppWorkspace(c); + const { appShell } = getActorRuntimeContext(); + const { sessionId, accessToken } = input; const session = await requireAppSessionRow(c, sessionId); - const token = { accessToken, scopes: splitScopes(session.githubScope) }; const viewer = await appShell.github.getViewer(accessToken); const organizations = await safeListOrganizations(accessToken); const installations = await safeListInstallations(accessToken); @@ -424,24 +463,23 @@ async function syncGithubSessionFromToken(c: any, sessionId: string, accessToken ? (linkedOrganizationIds[0] ?? null) : null; - await updateAppSession(c, session.id, { - currentUserId: userId, - currentUserName: viewer.name || viewer.login, - currentUserEmail: viewer.email ?? `${viewer.login}@users.noreply.github.com`, - currentUserGithubLogin: viewer.login, - currentUserRoleLabel: "GitHub user", + await updateAppSession(c, sessionId, { eligibleOrganizationIdsJson: encodeEligibleOrganizationIds(linkedOrganizationIds), activeOrganizationId, - githubAccessToken: accessToken, - githubScope: token.scopes.join(","), - oauthState: null, - oauthStateExpiresAt: null, }); +} - return { - sessionId: session.id, - redirectTo: `${appShell.appUrl}/organizations?foundrySession=${encodeURIComponent(session.id)}`, - }; +/** + * Full synchronous sync: init session + sync orgs in one call. + * Used by bootstrapAppGithubSession (dev-only) where there is no proxy + * timeout concern and we want the session fully populated before returning. + */ +async function syncGithubSessionFromToken(c: any, sessionId: string, accessToken: string): Promise<{ sessionId: string; redirectTo: string }> { + const session = await requireAppSessionRow(c, sessionId); + const scopes = splitScopes(session.githubScope); + const result = await initGithubSession(c, sessionId, accessToken, scopes); + await syncGithubOrganizations(c, { sessionId, accessToken }); + return result; } async function readOrganizationProfileRow(c: any) { @@ -626,14 +664,9 @@ export const workspaceAppActions = { throw new Error("GitHub OAuth state is invalid or expired"); } - // HACK: Clear the oauth state before exchangeCode so that duplicate - // callback requests (e.g. user refresh during the slow sync) fail the - // state check above instead of attempting a second code exchange. - // GitHub OAuth codes are single-use; a second exchangeCode always - // returns bad_verification_code. The root cause of duplicate requests - // is unknown — actor RPCs serialize, appWorkspaceAction does not retry, - // yet production logs show two HTTP requests with different request IDs - // hitting /v1/auth/github/callback with the same code+state ~9s apart. + // Clear state before exchangeCode — GitHub codes are single-use and + // duplicate callback requests (from proxy retries or user refresh) + // must fail the state check rather than attempt a second exchange. // See research/friction/general.mdx 2026-03-13 entry. await updateAppSession(c, session.id, { oauthState: null, @@ -641,10 +674,27 @@ export const workspaceAppActions = { }); const token = await appShell.github.exchangeCode(input.code); - await updateAppSession(c, session.id, { - githubScope: token.scopes.join(","), - }); - return await syncGithubSessionFromToken(c, session.id, token.accessToken); + + // Fast path: store token + user identity and return the redirect + // immediately. The slow org sync (list orgs, list installations, + // sync each workspace) runs in the workflow queue so the HTTP + // response lands before any proxy/infra timeout triggers a retry. + // The frontend already polls when it sees syncStatus === "syncing". + const result = await initGithubSession(c, session.id, token.accessToken, token.scopes); + + // Enqueue the slow org sync to the workflow. fire-and-forget (wait: false) + // because the redirect does not depend on org data — the frontend will + // poll getAppSnapshot until organizations are populated. + const self = selfWorkspace(c); + await self.send( + "workspace.command.syncGithubSession", + { sessionId: session.id, accessToken: token.accessToken }, + { + wait: false, + }, + ); + + return result; }, async bootstrapAppGithubSession(c: any, input: { accessToken: string; sessionId?: string | null }): Promise<{ sessionId: string; redirectTo: string }> { diff --git a/foundry/research/friction/general.mdx b/foundry/research/friction/general.mdx index de19cab..b152287 100644 --- a/foundry/research/friction/general.mdx +++ b/foundry/research/friction/general.mdx @@ -15,13 +15,18 @@ The root cause of the duplicate HTTP request is unknown. It is not `appWorkspace ### Attempted Fix / Workaround 1. Made `completeAppGithubAuth` clear `oauthState`/`oauthStateExpiresAt` immediately after validation and before `exchangeCode`, so any duplicate request fails the state check instead of hitting GitHub with a consumed code. -2. Marked the fix as a HACK since the root cause of the duplicate request is not identified. +2. Split `syncGithubSessionFromToken` into a fast path (`initGithubSession` — exchange code, get viewer, store token+identity) and a slow path (`syncGithubOrganizations` — list orgs, list installations, sync each workspace). +3. `completeAppGithubAuth` now uses the fast path and enqueues the slow org sync to the workspace workflow queue (`workspace.command.syncGithubSession`, fire-and-forget). The HTTP callback returns a 302 redirect in ~2s instead of ~18s, eliminating the proxy timeout window. +4. The frontend already polls `getAppSnapshot` every 500ms when any org has `syncStatus === "syncing"`, so the deferred sync is transparent to the user. +5. `bootstrapAppGithubSession` (dev-only) still calls the full synchronous `syncGithubSessionFromToken` since proxy timeouts are not a concern in dev and it needs the session fully populated before returning. ### Outcome -- Duplicate callback requests now fail fast with "GitHub OAuth state is invalid or expired" instead of producing a `bad_verification_code` error from GitHub. -- The first request completes normally and the user lands on `/organizations`. -- Root cause of the duplicate HTTP request remains uninvestigated. +- OAuth callback responds in ~2s (exchangeCode + getViewer) instead of ~18s. +- Proxy retry window is eliminated — no duplicate requests should occur. +- Duplicate requests are still guarded by the state-clearing idempotency check. +- Organization data populates asynchronously via the workflow queue; the frontend shows loading state and polls until complete. +- Root cause of the duplicate HTTP request (likely Railway/Cloudflare proxy retry on slow GET) remains uninvestigated but is no longer a practical problem. ## 2026-03-05 - uncommitted