From 0faa6088223ebdcd0d3b20e3909a5daa8e109cda Mon Sep 17 00:00:00 2001
From: Nathan Flurry <git@nathanflurry.com>
Date: Thu, 29 Jan 2026 07:39:07 -0800
Subject: [PATCH] fix: use sandbox-agent CLI for credential extraction in tests
 and add delay for permission approval

---
 docs/openapi.json                        | 28 ++++++++++
 scripts/sandbox-testing/test-sandbox.ts  | 70 ++++++++++++++++++------
 sdks/typescript/src/generated/openapi.ts | 10 ++++
 3 files changed, 90 insertions(+), 18 deletions(-)

diff --git a/docs/openapi.json b/docs/openapi.json
index 6b968f1..5fa7897 100644
--- a/docs/openapi.json
+++ b/docs/openapi.json
@@ -1559,6 +1559,34 @@
           }
         }
       },
+      "StderrOutput": {
+        "type": "object",
+        "required": [
+          "truncated"
+        ],
+        "properties": {
+          "head": {
+            "type": "string",
+            "description": "First N lines of stderr (if truncated) or full stderr (if not truncated)",
+            "nullable": true
+          },
+          "tail": {
+            "type": "string",
+            "description": "Last N lines of stderr (only present if truncated)",
+            "nullable": true
+          },
+          "total_lines": {
+            "type": "integer",
+            "description": "Total number of lines in stderr",
+            "nullable": true,
+            "minimum": 0
+          },
+          "truncated": {
+            "type": "boolean",
+            "description": "Whether the output was truncated"
+          }
+        }
+      },
       "TerminatedBy": {
         "type": "string",
         "enum": [
diff --git a/scripts/sandbox-testing/test-sandbox.ts b/scripts/sandbox-testing/test-sandbox.ts
index 3d2d652..2c55499 100644
--- a/scripts/sandbox-testing/test-sandbox.ts
+++ b/scripts/sandbox-testing/test-sandbox.ts
@@ -46,28 +46,58 @@ const log = {
 	section: (msg: string) => console.log(`\n\x1b[1m=== ${msg} ===\x1b[0m`),
 };
 
-// Credentials extraction (mirrors agent-credentials logic)
-function getAnthropicApiKey(): string | undefined {
-	if (process.env.ANTHROPIC_API_KEY) return process.env.ANTHROPIC_API_KEY;
-	const home = homedir();
-	for (const path of [join(home, ".claude.json"), join(home, ".claude.json.api")]) {
-		try {
-			const data = JSON.parse(readFileSync(path, "utf-8"));
-			const key = data.primaryApiKey || data.apiKey || data.anthropicApiKey;
-			if (key?.startsWith("sk-ant-")) return key;
-		} catch {}
+// Credentials extraction using sandbox-agent CLI
+function extractCredentials(): { anthropicApiKey?: string; openaiApiKey?: string } {
+	// First check environment variables
+	const envCreds = {
+		anthropicApiKey: process.env.ANTHROPIC_API_KEY,
+		openaiApiKey: process.env.OPENAI_API_KEY,
+	};
+
+	// If both are set in env, use them
+	if (envCreds.anthropicApiKey && envCreds.openaiApiKey) {
+		return envCreds;
 	}
-	return undefined;
+
+	// Try to extract using sandbox-agent CLI
+	try {
+		const binaryPath = join(ROOT_DIR, "target/release/sandbox-agent");
+		const debugBinaryPath = join(ROOT_DIR, "target/debug/sandbox-agent");
+		const binary = existsSync(binaryPath) ? binaryPath : existsSync(debugBinaryPath) ? debugBinaryPath : null;
+
+		if (binary) {
+			const output = execSync(`${binary} credentials extract-env --export`, {
+				encoding: "utf-8",
+				stdio: ["pipe", "pipe", "pipe"],
+			});
+
+			// Parse export statements: export KEY="value"
+			for (const line of output.split("\n")) {
+				const match = line.match(/^export (\w+)="(.*)"/);
+				if (match) {
+					const [, key, value] = match;
+					if (key === "ANTHROPIC_API_KEY" && !envCreds.anthropicApiKey) {
+						envCreds.anthropicApiKey = value;
+					} else if (key === "OPENAI_API_KEY" && !envCreds.openaiApiKey) {
+						envCreds.openaiApiKey = value;
+					}
+				}
+			}
+			log.debug(`Extracted credentials via sandbox-agent CLI`);
+		}
+	} catch (err) {
+		log.debug(`Failed to extract credentials via CLI: ${err}`);
+	}
+
+	return envCreds;
+}
+
+function getAnthropicApiKey(): string | undefined {
+	return extractCredentials().anthropicApiKey;
 }
 
 function getOpenAiApiKey(): string | undefined {
-	if (process.env.OPENAI_API_KEY) return process.env.OPENAI_API_KEY;
-	const home = homedir();
-	try {
-		const data = JSON.parse(readFileSync(join(home, ".codex", "codex.json"), "utf-8"));
-		if (data.apiKey) return data.apiKey;
-	} catch {}
-	return undefined;
+	return extractCredentials().openaiApiKey;
 }
 
 // Build sandbox-agent
@@ -520,6 +550,10 @@ async function testAgentActions(baseUrl: string, agent: string, sandbox: Sandbox
 	const fileMessage = `Create a file at ${testFile} with exactly this content (no quotes, no extra text): ${expectedContent}`;
 	await sendMessage(baseUrl, sessionId, fileMessage);
 
+	// Wait for agent to complete action after permission approval
+	log.info("Waiting for agent to complete action...");
+	await new Promise((r) => setTimeout(r, 5000));
+
 	// Verify file was created
 	log.info("Verifying file was created...");
 	const fileCheck = await sandbox.exec(`cat ${testFile} 2>&1`);
diff --git a/sdks/typescript/src/generated/openapi.ts b/sdks/typescript/src/generated/openapi.ts
index 0fcb8e8..52816ad 100644
--- a/sdks/typescript/src/generated/openapi.ts
+++ b/sdks/typescript/src/generated/openapi.ts
@@ -286,6 +286,16 @@ export interface components {
     SessionStartedData: {
       metadata?: unknown;
     };
+    StderrOutput: {
+      /** @description First N lines of stderr (if truncated) or full stderr (if not truncated) */
+      head?: string | null;
+      /** @description Last N lines of stderr (only present if truncated) */
+      tail?: string | null;
+      /** @description Total number of lines in stderr */
+      total_lines?: number | null;
+      /** @description Whether the output was truncated */
+      truncated: boolean;
+    };
     /** @enum {string} */
     TerminatedBy: "agent" | "daemon";
     TurnStreamQuery: {