mirror of
https://github.com/harivansh-afk/sandbox-agent.git
synced 2026-04-15 06:04:43 +00:00
refactor: split examples into separate packages and update Claude plan mode
- Restructure examples into individual packages per provider (daytona, docker, e2b, vercel) with shared utilities in @sandbox-agent/example-shared - Make Claude plan mode prompt-only (no longer requires permissionMode=plan) - Claude now defaults to bypass permission mode - Add agent_file_edit_flow test for file editing capabilities - Fix Daytona file permission setting to use executeCommand
This commit is contained in:
parent
30c4ad6b39
commit
6d6f6d0272
17 changed files with 2773 additions and 165 deletions
|
|
@ -6,7 +6,7 @@ import {
|
|||
logInspectorUrl,
|
||||
runPrompt,
|
||||
waitForHealth,
|
||||
} from "../shared/sandbox-agent-client.ts";
|
||||
} from "@sandbox-agent/example-shared";
|
||||
|
||||
const DEFAULT_PORT = 3000;
|
||||
const BINARY_PATH = resolve(dirname(fileURLToPath(import.meta.url)), "../../target/release/sandbox-agent");
|
||||
|
|
@ -25,9 +25,10 @@ export async function setupDaytonaSandboxAgent(): Promise<{
|
|||
console.log("Creating sandbox...");
|
||||
const sandbox = await daytona.create({ language });
|
||||
|
||||
// Daytona sandboxes can't reach releases.rivet.dev, so upload binary directly
|
||||
console.log("Uploading sandbox-agent...");
|
||||
await sandbox.fs.uploadFile(BINARY_PATH, "/home/daytona/sandbox-agent");
|
||||
await sandbox.fs.setFilePermissions("/home/daytona/sandbox-agent", { mode: "755" });
|
||||
await sandbox.process.executeCommand("chmod +x /home/daytona/sandbox-agent");
|
||||
|
||||
console.log("Starting server...");
|
||||
const tokenFlag = token ? `--token ${token}` : "--no-token";
|
||||
|
|
|
|||
17
examples/daytona/package.json
Normal file
17
examples/daytona/package.json
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
{
|
||||
"name": "@sandbox-agent/example-daytona",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"start": "tsx daytona.ts"
|
||||
},
|
||||
"dependencies": {
|
||||
"@daytonaio/sdk": "latest",
|
||||
"@sandbox-agent/example-shared": "workspace:*"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "latest",
|
||||
"tsx": "latest",
|
||||
"typescript": "latest"
|
||||
}
|
||||
}
|
||||
|
|
@ -5,7 +5,7 @@ import {
|
|||
logInspectorUrl,
|
||||
runPrompt,
|
||||
waitForHealth,
|
||||
} from "../shared/sandbox-agent-client.ts";
|
||||
} from "@sandbox-agent/example-shared";
|
||||
|
||||
const INSTALL_SCRIPT = "curl -fsSL https://releases.rivet.dev/sandbox-agent/latest/install.sh | sh";
|
||||
const DEFAULT_IMAGE = "debian:bookworm-slim";
|
||||
|
|
|
|||
18
examples/docker/package.json
Normal file
18
examples/docker/package.json
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
{
|
||||
"name": "@sandbox-agent/example-docker",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"start": "tsx docker.ts"
|
||||
},
|
||||
"dependencies": {
|
||||
"dockerode": "latest",
|
||||
"@sandbox-agent/example-shared": "workspace:*"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/dockerode": "latest",
|
||||
"@types/node": "latest",
|
||||
"tsx": "latest",
|
||||
"typescript": "latest"
|
||||
}
|
||||
}
|
||||
|
|
@ -5,7 +5,7 @@ import {
|
|||
logInspectorUrl,
|
||||
runPrompt,
|
||||
waitForHealth,
|
||||
} from "../shared/sandbox-agent-client.ts";
|
||||
} from "@sandbox-agent/example-shared";
|
||||
|
||||
const INSTALL_SCRIPT = "curl -fsSL https://releases.rivet.dev/sandbox-agent/latest/install.sh | sh";
|
||||
const DEFAULT_PORT = 2468;
|
||||
|
|
|
|||
17
examples/e2b/package.json
Normal file
17
examples/e2b/package.json
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
{
|
||||
"name": "@sandbox-agent/example-e2b",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"start": "tsx e2b.ts"
|
||||
},
|
||||
"dependencies": {
|
||||
"@e2b/code-interpreter": "latest",
|
||||
"@sandbox-agent/example-shared": "workspace:*"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "latest",
|
||||
"tsx": "latest",
|
||||
"typescript": "latest"
|
||||
}
|
||||
}
|
||||
|
|
@ -1,25 +0,0 @@
|
|||
{
|
||||
"name": "sandbox-agent-examples",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"test": "vitest run",
|
||||
"test:watch": "vitest",
|
||||
"start:docker": "tsx docker/docker.ts",
|
||||
"start:e2b": "tsx e2b/e2b.ts",
|
||||
"start:daytona": "tsx daytona/daytona.ts",
|
||||
"start:vercel": "tsx vercel/vercel-sandbox.ts"
|
||||
},
|
||||
"dependencies": {
|
||||
"@daytonaio/sdk": "latest",
|
||||
"@e2b/code-interpreter": "latest",
|
||||
"@vercel/sandbox": "latest",
|
||||
"dockerode": "latest"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "latest",
|
||||
"tsx": "latest",
|
||||
"typescript": "latest",
|
||||
"vitest": "latest"
|
||||
}
|
||||
}
|
||||
8
examples/shared/package.json
Normal file
8
examples/shared/package.json
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
{
|
||||
"name": "@sandbox-agent/example-shared",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"exports": {
|
||||
".": "./sandbox-agent-client.ts"
|
||||
}
|
||||
}
|
||||
|
|
@ -1,13 +0,0 @@
|
|||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2022",
|
||||
"module": "NodeNext",
|
||||
"moduleResolution": "NodeNext",
|
||||
"lib": ["ES2022", "DOM"],
|
||||
"types": ["node"],
|
||||
"strict": true,
|
||||
"skipLibCheck": true,
|
||||
"noEmit": true
|
||||
},
|
||||
"include": ["**/*.ts"]
|
||||
}
|
||||
17
examples/vercel/package.json
Normal file
17
examples/vercel/package.json
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
{
|
||||
"name": "@sandbox-agent/example-vercel",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"start": "tsx vercel-sandbox.ts"
|
||||
},
|
||||
"dependencies": {
|
||||
"@vercel/sandbox": "latest",
|
||||
"@sandbox-agent/example-shared": "workspace:*"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "latest",
|
||||
"tsx": "latest",
|
||||
"typescript": "latest"
|
||||
}
|
||||
}
|
||||
|
|
@ -5,7 +5,7 @@ import {
|
|||
logInspectorUrl,
|
||||
runPrompt,
|
||||
waitForHealth,
|
||||
} from "../shared/sandbox-agent-client.ts";
|
||||
} from "@sandbox-agent/example-shared";
|
||||
|
||||
const INSTALL_SCRIPT = "curl -fsSL https://releases.rivet.dev/sandbox-agent/latest/install.sh | sh";
|
||||
const DEFAULT_PORT = 2468;
|
||||
|
|
|
|||
|
|
@ -1,9 +0,0 @@
|
|||
import { defineConfig } from "vitest/config";
|
||||
|
||||
export default defineConfig({
|
||||
test: {
|
||||
include: ["**/*.test.ts"],
|
||||
testTimeout: 300_000,
|
||||
hookTimeout: 300_000,
|
||||
},
|
||||
});
|
||||
2668
pnpm-lock.yaml
generated
2668
pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load diff
|
|
@ -6,3 +6,4 @@ packages:
|
|||
- "resources/agent-schemas"
|
||||
- "resources/vercel-ai-sdk-schemas"
|
||||
- "scripts/release"
|
||||
- "examples/*"
|
||||
|
|
|
|||
|
|
@ -3874,7 +3874,7 @@ fn agent_modes_for(agent: AgentId) -> Vec<AgentModeInfo> {
|
|||
AgentModeInfo {
|
||||
id: "plan".to_string(),
|
||||
name: "Plan".to_string(),
|
||||
description: "Plan mode (requires permissionMode=plan)".to_string(),
|
||||
description: "Plan mode (prompt-only)".to_string(),
|
||||
},
|
||||
],
|
||||
AgentId::Amp => vec![AgentModeInfo {
|
||||
|
|
@ -3947,8 +3947,18 @@ fn normalize_permission_mode(
|
|||
.into())
|
||||
}
|
||||
};
|
||||
if agent == AgentId::Claude {
|
||||
if mode == "plan" {
|
||||
return Err(SandboxError::ModeNotSupported {
|
||||
agent: agent.as_str().to_string(),
|
||||
mode: mode.to_string(),
|
||||
}
|
||||
.into());
|
||||
}
|
||||
return Ok("bypass".to_string());
|
||||
}
|
||||
let supported = match agent {
|
||||
AgentId::Claude | AgentId::Codex => matches!(mode, "default" | "plan" | "bypass"),
|
||||
AgentId::Codex => matches!(mode, "default" | "plan" | "bypass"),
|
||||
AgentId::Amp => matches!(mode, "default" | "bypass"),
|
||||
AgentId::Opencode => matches!(mode, "default"),
|
||||
AgentId::Mock => matches!(mode, "default" | "plan" | "bypass"),
|
||||
|
|
@ -3969,18 +3979,6 @@ fn normalize_modes(
|
|||
permission_mode: Option<&str>,
|
||||
) -> Result<(String, String), SandboxError> {
|
||||
let agent_mode = normalize_agent_mode(agent, agent_mode)?;
|
||||
if agent == AgentId::Claude && agent_mode == "plan" {
|
||||
if let Some(permission_mode) = permission_mode {
|
||||
if permission_mode != "plan" {
|
||||
return Err(SandboxError::InvalidRequest {
|
||||
message: "Claude agentMode=plan requires permissionMode=plan".to_string(),
|
||||
}
|
||||
.into());
|
||||
}
|
||||
}
|
||||
let permission_mode = normalize_permission_mode(agent, Some("plan"))?;
|
||||
return Ok((agent_mode, permission_mode));
|
||||
}
|
||||
let permission_mode = normalize_permission_mode(agent, permission_mode)?;
|
||||
Ok((agent_mode, permission_mode))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,107 @@
|
|||
#[path = "../common/mod.rs"]
|
||||
mod common;
|
||||
|
||||
use common::*;
|
||||
use sandbox_agent_agent_management::testing::test_agents_from_env;
|
||||
use sandbox_agent_agent_management::agents::AgentId;
|
||||
use serde_json::Value;
|
||||
use std::fs;
|
||||
use std::time::{Duration, Instant};
|
||||
use axum::http::Method;
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn agent_file_edit_flow() {
|
||||
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
|
||||
let app = TestApp::new();
|
||||
let capabilities = fetch_capabilities(&app.app).await;
|
||||
|
||||
for config in &configs {
|
||||
let caps = capabilities
|
||||
.get(config.agent.as_str())
|
||||
.expect("capabilities missing");
|
||||
if !caps.file_changes {
|
||||
continue;
|
||||
}
|
||||
if config.agent == AgentId::Mock {
|
||||
// Mock agent only emits synthetic file change events.
|
||||
continue;
|
||||
}
|
||||
|
||||
let _guard = apply_credentials(&config.credentials);
|
||||
install_agent(&app.app, config.agent).await;
|
||||
|
||||
let temp_dir = tempfile::tempdir().expect("create temp dir");
|
||||
let file_path = temp_dir.path().join("edit.txt");
|
||||
fs::write(&file_path, "before\n").expect("write seed file");
|
||||
|
||||
let session_id = format!("file-edit-{}", config.agent.as_str());
|
||||
create_session(
|
||||
&app.app,
|
||||
config.agent,
|
||||
&session_id,
|
||||
test_permission_mode(config.agent),
|
||||
)
|
||||
.await;
|
||||
let prompt = format!(
|
||||
"Edit the file at {} so its entire contents are exactly 'updated' (no quotes). \
|
||||
Do not change any other files. Reply only with DONE after editing.",
|
||||
file_path.display()
|
||||
);
|
||||
send_message(&app.app, &session_id, &prompt).await;
|
||||
|
||||
let start = Instant::now();
|
||||
let mut offset = 0u64;
|
||||
let mut events = Vec::new();
|
||||
let mut replied = false;
|
||||
let mut updated = false;
|
||||
while start.elapsed() < Duration::from_secs(180) {
|
||||
let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200");
|
||||
let (status, payload) = send_json(&app.app, Method::GET, &path, None).await;
|
||||
assert_eq!(status, axum::http::StatusCode::OK, "poll events");
|
||||
let new_events = payload
|
||||
.get("events")
|
||||
.and_then(Value::as_array)
|
||||
.cloned()
|
||||
.unwrap_or_default();
|
||||
if !new_events.is_empty() {
|
||||
if let Some(last) = new_events
|
||||
.last()
|
||||
.and_then(|event| event.get("sequence"))
|
||||
.and_then(Value::as_u64)
|
||||
{
|
||||
offset = last;
|
||||
}
|
||||
events.extend(new_events);
|
||||
if !replied {
|
||||
if let Some(permission_id) = find_permission_id(&events) {
|
||||
let _ = send_status(
|
||||
&app.app,
|
||||
Method::POST,
|
||||
&format!(
|
||||
"/v1/sessions/{session_id}/permissions/{permission_id}/reply"
|
||||
),
|
||||
Some(serde_json::json!({ "reply": "once" })),
|
||||
)
|
||||
.await;
|
||||
replied = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let contents = fs::read_to_string(&file_path).unwrap_or_default();
|
||||
let trimmed = contents.trim_end_matches(&['\r', '\n'][..]);
|
||||
if trimmed == "updated" {
|
||||
updated = true;
|
||||
break;
|
||||
}
|
||||
|
||||
tokio::time::sleep(Duration::from_millis(800)).await;
|
||||
}
|
||||
|
||||
assert!(
|
||||
updated,
|
||||
"file edit did not complete for {}",
|
||||
config.agent.as_str()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,4 +1,5 @@
|
|||
mod agent_basic_reply;
|
||||
mod agent_file_edit_flow;
|
||||
mod agent_multi_turn;
|
||||
mod agent_permission_flow;
|
||||
mod agent_question_flow;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue