mirror of
https://github.com/harivansh-afk/sandbox-agent.git
synced 2026-04-20 01:00:32 +00:00
Defer slow GitHub org sync to workflow queue for fast OAuth callback
Split syncGithubSessionFromToken into a fast path (initGithubSession: exchange code, get viewer, store token+identity) and a slow path (syncGithubOrganizations: list orgs/installations, sync workspaces). completeAppGithubAuth now returns the 302 redirect in ~2s instead of ~18s by enqueuing the org sync to the workspace workflow queue (fire-and-forget). This eliminates the proxy timeout window that was causing duplicate callback requests. bootstrapAppGithubSession (dev-only) still calls the full synchronous sync since proxy timeouts are not a concern and it needs the session fully populated before returning. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
a36431903e
commit
fa4ed388d2
3 changed files with 108 additions and 34 deletions
|
|
@ -62,7 +62,12 @@ interface RepoOverviewInput {
|
||||||
repoId: string;
|
repoId: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
const WORKSPACE_QUEUE_NAMES = ["workspace.command.addRepo", "workspace.command.createTask", "workspace.command.refreshProviderProfiles"] as const;
|
const WORKSPACE_QUEUE_NAMES = [
|
||||||
|
"workspace.command.addRepo",
|
||||||
|
"workspace.command.createTask",
|
||||||
|
"workspace.command.refreshProviderProfiles",
|
||||||
|
"workspace.command.syncGithubSession",
|
||||||
|
] as const;
|
||||||
const SANDBOX_AGENT_REPO = "rivet-dev/sandbox-agent";
|
const SANDBOX_AGENT_REPO = "rivet-dev/sandbox-agent";
|
||||||
|
|
||||||
type WorkspaceQueueName = (typeof WORKSPACE_QUEUE_NAMES)[number];
|
type WorkspaceQueueName = (typeof WORKSPACE_QUEUE_NAMES)[number];
|
||||||
|
|
@ -366,6 +371,20 @@ export async function runWorkspaceWorkflow(ctx: any): Promise<void> {
|
||||||
refreshProviderProfilesMutation(loopCtx, msg.body as RefreshProviderProfilesCommand),
|
refreshProviderProfilesMutation(loopCtx, msg.body as RefreshProviderProfilesCommand),
|
||||||
);
|
);
|
||||||
await msg.complete({ ok: true });
|
await msg.complete({ ok: true });
|
||||||
|
return Loop.continue(undefined);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (msg.name === "workspace.command.syncGithubSession") {
|
||||||
|
await loopCtx.step({
|
||||||
|
name: "workspace-sync-github-session",
|
||||||
|
timeout: 60_000,
|
||||||
|
run: async () => {
|
||||||
|
const { syncGithubOrganizations } = await import("./app-shell.js");
|
||||||
|
await syncGithubOrganizations(loopCtx, msg.body as { sessionId: string; accessToken: string });
|
||||||
|
},
|
||||||
|
});
|
||||||
|
await msg.complete({ ok: true });
|
||||||
|
return Loop.continue(undefined);
|
||||||
}
|
}
|
||||||
|
|
||||||
return Loop.continue(undefined);
|
return Loop.continue(undefined);
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ import type {
|
||||||
UpdateFoundryOrganizationProfileInput,
|
UpdateFoundryOrganizationProfileInput,
|
||||||
} from "@sandbox-agent/foundry-shared";
|
} from "@sandbox-agent/foundry-shared";
|
||||||
import { getActorRuntimeContext } from "../context.js";
|
import { getActorRuntimeContext } from "../context.js";
|
||||||
import { getOrCreateWorkspace } from "../handles.js";
|
import { getOrCreateWorkspace, selfWorkspace } from "../handles.js";
|
||||||
import { GitHubAppError } from "../../services/app-github.js";
|
import { GitHubAppError } from "../../services/app-github.js";
|
||||||
import { repoIdFromRemote, repoLabelFromRemote } from "../../services/repo.js";
|
import { repoIdFromRemote, repoLabelFromRemote } from "../../services/repo.js";
|
||||||
import { logger } from "../../logging.js";
|
import { logger } from "../../logging.js";
|
||||||
|
|
@ -369,11 +369,50 @@ async function safeListInstallations(accessToken: string): Promise<any[]> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function syncGithubSessionFromToken(c: any, sessionId: string, accessToken: string): Promise<{ sessionId: string; redirectTo: string }> {
|
/**
|
||||||
|
* Fast path: resolve viewer identity, store user + token in the session,
|
||||||
|
* and return the redirect URL. Does NOT sync organizations — that work is
|
||||||
|
* deferred to `syncGithubOrganizations` via the workflow queue so the HTTP
|
||||||
|
* callback can respond before any proxy timeout triggers a retry.
|
||||||
|
*/
|
||||||
|
async function initGithubSession(c: any, sessionId: string, accessToken: string, scopes: string[]): Promise<{ sessionId: string; redirectTo: string }> {
|
||||||
assertAppWorkspace(c);
|
assertAppWorkspace(c);
|
||||||
const { appShell } = getActorRuntimeContext();
|
const { appShell } = getActorRuntimeContext();
|
||||||
|
const viewer = await appShell.github.getViewer(accessToken);
|
||||||
|
const userId = `user-${slugify(viewer.login)}`;
|
||||||
|
|
||||||
|
await updateAppSession(c, sessionId, {
|
||||||
|
currentUserId: userId,
|
||||||
|
currentUserName: viewer.name || viewer.login,
|
||||||
|
currentUserEmail: viewer.email ?? `${viewer.login}@users.noreply.github.com`,
|
||||||
|
currentUserGithubLogin: viewer.login,
|
||||||
|
currentUserRoleLabel: "GitHub user",
|
||||||
|
githubAccessToken: accessToken,
|
||||||
|
githubScope: scopes.join(","),
|
||||||
|
oauthState: null,
|
||||||
|
oauthStateExpiresAt: null,
|
||||||
|
});
|
||||||
|
|
||||||
|
return {
|
||||||
|
sessionId,
|
||||||
|
redirectTo: `${appShell.appUrl}/organizations?foundrySession=${encodeURIComponent(sessionId)}`,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Slow path: list GitHub orgs + installations, sync each org workspace,
|
||||||
|
* and update the session's eligible organization list. Called from the
|
||||||
|
* workflow queue so it runs in the background after the callback has
|
||||||
|
* already returned a redirect to the browser.
|
||||||
|
*
|
||||||
|
* Also used synchronously by bootstrapAppGithubSession (dev-only) where
|
||||||
|
* proxy timeouts are not a concern.
|
||||||
|
*/
|
||||||
|
export async function syncGithubOrganizations(c: any, input: { sessionId: string; accessToken: string }): Promise<void> {
|
||||||
|
assertAppWorkspace(c);
|
||||||
|
const { appShell } = getActorRuntimeContext();
|
||||||
|
const { sessionId, accessToken } = input;
|
||||||
const session = await requireAppSessionRow(c, sessionId);
|
const session = await requireAppSessionRow(c, sessionId);
|
||||||
const token = { accessToken, scopes: splitScopes(session.githubScope) };
|
|
||||||
const viewer = await appShell.github.getViewer(accessToken);
|
const viewer = await appShell.github.getViewer(accessToken);
|
||||||
const organizations = await safeListOrganizations(accessToken);
|
const organizations = await safeListOrganizations(accessToken);
|
||||||
const installations = await safeListInstallations(accessToken);
|
const installations = await safeListInstallations(accessToken);
|
||||||
|
|
@ -424,24 +463,23 @@ async function syncGithubSessionFromToken(c: any, sessionId: string, accessToken
|
||||||
? (linkedOrganizationIds[0] ?? null)
|
? (linkedOrganizationIds[0] ?? null)
|
||||||
: null;
|
: null;
|
||||||
|
|
||||||
await updateAppSession(c, session.id, {
|
await updateAppSession(c, sessionId, {
|
||||||
currentUserId: userId,
|
|
||||||
currentUserName: viewer.name || viewer.login,
|
|
||||||
currentUserEmail: viewer.email ?? `${viewer.login}@users.noreply.github.com`,
|
|
||||||
currentUserGithubLogin: viewer.login,
|
|
||||||
currentUserRoleLabel: "GitHub user",
|
|
||||||
eligibleOrganizationIdsJson: encodeEligibleOrganizationIds(linkedOrganizationIds),
|
eligibleOrganizationIdsJson: encodeEligibleOrganizationIds(linkedOrganizationIds),
|
||||||
activeOrganizationId,
|
activeOrganizationId,
|
||||||
githubAccessToken: accessToken,
|
|
||||||
githubScope: token.scopes.join(","),
|
|
||||||
oauthState: null,
|
|
||||||
oauthStateExpiresAt: null,
|
|
||||||
});
|
});
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
/**
|
||||||
sessionId: session.id,
|
* Full synchronous sync: init session + sync orgs in one call.
|
||||||
redirectTo: `${appShell.appUrl}/organizations?foundrySession=${encodeURIComponent(session.id)}`,
|
* Used by bootstrapAppGithubSession (dev-only) where there is no proxy
|
||||||
};
|
* timeout concern and we want the session fully populated before returning.
|
||||||
|
*/
|
||||||
|
async function syncGithubSessionFromToken(c: any, sessionId: string, accessToken: string): Promise<{ sessionId: string; redirectTo: string }> {
|
||||||
|
const session = await requireAppSessionRow(c, sessionId);
|
||||||
|
const scopes = splitScopes(session.githubScope);
|
||||||
|
const result = await initGithubSession(c, sessionId, accessToken, scopes);
|
||||||
|
await syncGithubOrganizations(c, { sessionId, accessToken });
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function readOrganizationProfileRow(c: any) {
|
async function readOrganizationProfileRow(c: any) {
|
||||||
|
|
@ -626,14 +664,9 @@ export const workspaceAppActions = {
|
||||||
throw new Error("GitHub OAuth state is invalid or expired");
|
throw new Error("GitHub OAuth state is invalid or expired");
|
||||||
}
|
}
|
||||||
|
|
||||||
// HACK: Clear the oauth state before exchangeCode so that duplicate
|
// Clear state before exchangeCode — GitHub codes are single-use and
|
||||||
// callback requests (e.g. user refresh during the slow sync) fail the
|
// duplicate callback requests (from proxy retries or user refresh)
|
||||||
// state check above instead of attempting a second code exchange.
|
// must fail the state check rather than attempt a second exchange.
|
||||||
// GitHub OAuth codes are single-use; a second exchangeCode always
|
|
||||||
// returns bad_verification_code. The root cause of duplicate requests
|
|
||||||
// is unknown — actor RPCs serialize, appWorkspaceAction does not retry,
|
|
||||||
// yet production logs show two HTTP requests with different request IDs
|
|
||||||
// hitting /v1/auth/github/callback with the same code+state ~9s apart.
|
|
||||||
// See research/friction/general.mdx 2026-03-13 entry.
|
// See research/friction/general.mdx 2026-03-13 entry.
|
||||||
await updateAppSession(c, session.id, {
|
await updateAppSession(c, session.id, {
|
||||||
oauthState: null,
|
oauthState: null,
|
||||||
|
|
@ -641,10 +674,27 @@ export const workspaceAppActions = {
|
||||||
});
|
});
|
||||||
|
|
||||||
const token = await appShell.github.exchangeCode(input.code);
|
const token = await appShell.github.exchangeCode(input.code);
|
||||||
await updateAppSession(c, session.id, {
|
|
||||||
githubScope: token.scopes.join(","),
|
// Fast path: store token + user identity and return the redirect
|
||||||
});
|
// immediately. The slow org sync (list orgs, list installations,
|
||||||
return await syncGithubSessionFromToken(c, session.id, token.accessToken);
|
// sync each workspace) runs in the workflow queue so the HTTP
|
||||||
|
// response lands before any proxy/infra timeout triggers a retry.
|
||||||
|
// The frontend already polls when it sees syncStatus === "syncing".
|
||||||
|
const result = await initGithubSession(c, session.id, token.accessToken, token.scopes);
|
||||||
|
|
||||||
|
// Enqueue the slow org sync to the workflow. fire-and-forget (wait: false)
|
||||||
|
// because the redirect does not depend on org data — the frontend will
|
||||||
|
// poll getAppSnapshot until organizations are populated.
|
||||||
|
const self = selfWorkspace(c);
|
||||||
|
await self.send(
|
||||||
|
"workspace.command.syncGithubSession",
|
||||||
|
{ sessionId: session.id, accessToken: token.accessToken },
|
||||||
|
{
|
||||||
|
wait: false,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
return result;
|
||||||
},
|
},
|
||||||
|
|
||||||
async bootstrapAppGithubSession(c: any, input: { accessToken: string; sessionId?: string | null }): Promise<{ sessionId: string; redirectTo: string }> {
|
async bootstrapAppGithubSession(c: any, input: { accessToken: string; sessionId?: string | null }): Promise<{ sessionId: string; redirectTo: string }> {
|
||||||
|
|
|
||||||
|
|
@ -15,13 +15,18 @@ The root cause of the duplicate HTTP request is unknown. It is not `appWorkspace
|
||||||
### Attempted Fix / Workaround
|
### Attempted Fix / Workaround
|
||||||
|
|
||||||
1. Made `completeAppGithubAuth` clear `oauthState`/`oauthStateExpiresAt` immediately after validation and before `exchangeCode`, so any duplicate request fails the state check instead of hitting GitHub with a consumed code.
|
1. Made `completeAppGithubAuth` clear `oauthState`/`oauthStateExpiresAt` immediately after validation and before `exchangeCode`, so any duplicate request fails the state check instead of hitting GitHub with a consumed code.
|
||||||
2. Marked the fix as a HACK since the root cause of the duplicate request is not identified.
|
2. Split `syncGithubSessionFromToken` into a fast path (`initGithubSession` — exchange code, get viewer, store token+identity) and a slow path (`syncGithubOrganizations` — list orgs, list installations, sync each workspace).
|
||||||
|
3. `completeAppGithubAuth` now uses the fast path and enqueues the slow org sync to the workspace workflow queue (`workspace.command.syncGithubSession`, fire-and-forget). The HTTP callback returns a 302 redirect in ~2s instead of ~18s, eliminating the proxy timeout window.
|
||||||
|
4. The frontend already polls `getAppSnapshot` every 500ms when any org has `syncStatus === "syncing"`, so the deferred sync is transparent to the user.
|
||||||
|
5. `bootstrapAppGithubSession` (dev-only) still calls the full synchronous `syncGithubSessionFromToken` since proxy timeouts are not a concern in dev and it needs the session fully populated before returning.
|
||||||
|
|
||||||
### Outcome
|
### Outcome
|
||||||
|
|
||||||
- Duplicate callback requests now fail fast with "GitHub OAuth state is invalid or expired" instead of producing a `bad_verification_code` error from GitHub.
|
- OAuth callback responds in ~2s (exchangeCode + getViewer) instead of ~18s.
|
||||||
- The first request completes normally and the user lands on `/organizations`.
|
- Proxy retry window is eliminated — no duplicate requests should occur.
|
||||||
- Root cause of the duplicate HTTP request remains uninvestigated.
|
- Duplicate requests are still guarded by the state-clearing idempotency check.
|
||||||
|
- Organization data populates asynchronously via the workflow queue; the frontend shows loading state and polls until complete.
|
||||||
|
- Root cause of the duplicate HTTP request (likely Railway/Cloudflare proxy retry on slow GET) remains uninvestigated but is no longer a practical problem.
|
||||||
|
|
||||||
## 2026-03-05 - uncommitted
|
## 2026-03-05 - uncommitted
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue