From 665ace5e1625c5ab258f159b31795c6e2fced830 Mon Sep 17 00:00:00 2001 From: Nathan Flurry Date: Tue, 27 Jan 2026 20:16:25 -0800 Subject: [PATCH] fix: make Docker image validation optional in release validation --- scripts/release/main.ts | 7 +- server/packages/sandbox-agent/src/router.rs | 134 +++++++++++----- .../sandbox-agent/tests/common/http.rs | 143 +++++++----------- ...ndpoints_snapshots@agent_install_mock.snap | 5 + ..._endpoints_snapshots@agent_modes_mock.snap | 11 ++ .../tests/sessions/permissions.rs | 4 +- .../sandbox-agent/tests/sessions/questions.rs | 10 +- .../sandbox-agent/tests/sessions/reasoning.rs | 12 +- .../tests/sessions/session_lifecycle.rs | 6 +- ...ssion_snapshot@permission_events_mock.snap | 50 +++--- ...ession_snapshot@permission_reply_mock.snap | 5 + ..._snapshot@question_reject_events_mock.snap | 50 +++--- ...session_snapshot@question_reject_mock.snap | 5 + ...n_snapshot@question_reply_events_mock.snap | 50 +++--- ..._session_snapshot@question_reply_mock.snap | 5 + ...sion_snapshot@concurrency_events_mock.snap | 50 +++--- ...http_events_snapshot@http_events_mock.snap | 30 ++-- ...n_sse_events_snapshot@sse_events_mock.snap | 30 ++-- .../sandbox-agent/tests/sessions/status.rs | 12 +- 19 files changed, 331 insertions(+), 288 deletions(-) create mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_endpoints__agent_endpoints__agent_endpoints_snapshots@agent_install_mock.snap create mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_endpoints__agent_endpoints__agent_endpoints_snapshots@agent_modes_mock.snap create mode 100644 server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__permissions__assert_session_snapshot@permission_reply_mock.snap create mode 100644 server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reject_mock.snap create mode 100644 server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reply_mock.snap diff --git a/scripts/release/main.ts b/scripts/release/main.ts index 15a6444..cf65db3 100755 --- a/scripts/release/main.ts +++ b/scripts/release/main.ts @@ -120,16 +120,15 @@ async function validateReuseVersion(version: string): Promise { ); } - // Check Docker images exist + // Check Docker images exist (optional - warn if not found) console.log(`Checking Docker images for ${shortCommit}...`); try { await $({ stdio: "inherit" })`docker manifest inspect rivetdev/sandbox-agent:${shortCommit}-amd64`; await $({ stdio: "inherit" })`docker manifest inspect rivetdev/sandbox-agent:${shortCommit}-arm64`; console.log("✅ Docker images exist"); } catch (error) { - throw new Error( - `Docker images for version ${version} (commit ${shortCommit}) do not exist. Error: ${error}`, - ); + console.log(`⚠️ Docker images for ${shortCommit} not found - skipping Docker validation`); + console.log(" (Docker images will need to be built before publishing)"); } // Check S3 artifacts exist diff --git a/server/packages/sandbox-agent/src/router.rs b/server/packages/sandbox-agent/src/router.rs index 12e8be0..53c9f72 100644 --- a/server/packages/sandbox-agent/src/router.rs +++ b/server/packages/sandbox-agent/src/router.rs @@ -1411,7 +1411,6 @@ impl SessionManager { ) .with_native_session(session.native_session_id.clone()); session.record_conversions(vec![native_started]); - session.record_conversions(mock_prompt_conversions("mock_0")); } let native_session_id = session.native_session_id.clone(); @@ -1953,11 +1952,7 @@ impl SessionManager { if !trimmed.is_empty() { conversions.extend(mock_user_message(&prefix, trimmed)); } - let (command_events, should_prompt) = mock_command_conversions(&prefix, trimmed); - conversions.extend(command_events); - if should_prompt { - conversions.extend(mock_prompt_conversions(&prefix)); - } + conversions.extend(mock_command_conversions(&prefix, trimmed)); let manager = Arc::clone(self); tokio::spawn(async move { @@ -4846,10 +4841,53 @@ fn text_delta_from_parts(parts: &[ContentPart]) -> Option { } } -fn mock_command_conversions(prefix: &str, input: &str) -> (Vec, bool) { +const MOCK_OK_PROMPT: &str = "Reply with exactly the single word OK."; +const MOCK_FIRST_PROMPT: &str = "Reply with exactly the word FIRST."; +const MOCK_SECOND_PROMPT: &str = "Reply with exactly the word SECOND."; +const MOCK_PERMISSION_PROMPT: &str = "List files in the current directory using available tools."; +const MOCK_TOOL_PROMPT: &str = + "Use the bash tool to run `ls` in the current directory. Do not answer without using the tool."; +const MOCK_QUESTION_PROMPT: &str = + "Use the AskUserQuestion tool to ask exactly one yes/no question, then wait for a reply. Do not answer yourself."; +const MOCK_QUESTION_PROMPT_ALT: &str = + "Call the AskUserQuestion tool with exactly one yes/no question and wait for a reply. Do not answer yourself."; +const MOCK_REASONING_PROMPT: &str = "Answer briefly and include your reasoning."; +const MOCK_STATUS_PROMPT: &str = "Provide a short status update."; + +fn mock_command_conversions(prefix: &str, input: &str) -> Vec { let trimmed = input.trim(); if trimmed.is_empty() { - return (vec![], true); + return vec![]; + } + + if trimmed.eq_ignore_ascii_case(MOCK_OK_PROMPT) { + return mock_assistant_message(format!("{prefix}_ok"), "OK".to_string()); + } + if trimmed.eq_ignore_ascii_case(MOCK_FIRST_PROMPT) { + return mock_assistant_message(format!("{prefix}_first"), "FIRST".to_string()); + } + if trimmed.eq_ignore_ascii_case(MOCK_SECOND_PROMPT) { + return mock_assistant_message(format!("{prefix}_second"), "SECOND".to_string()); + } + if trimmed.eq_ignore_ascii_case(MOCK_REASONING_PROMPT) { + return mock_assistant_rich(prefix); + } + if trimmed.eq_ignore_ascii_case(MOCK_STATUS_PROMPT) { + return mock_status_sequence(prefix); + } + if trimmed.eq_ignore_ascii_case(MOCK_PERMISSION_PROMPT) { + return mock_permission_request(prefix); + } + if trimmed.eq_ignore_ascii_case(MOCK_TOOL_PROMPT) { + let mut events = Vec::new(); + events.extend(mock_permission_request(prefix)); + events.extend(mock_tool_sequence(prefix)); + return events; + } + if trimmed.eq_ignore_ascii_case(MOCK_QUESTION_PROMPT) + || trimmed.eq_ignore_ascii_case(MOCK_QUESTION_PROMPT_ALT) + { + return mock_question_request(prefix); } let mut parts = trimmed.split_whitespace(); @@ -4857,8 +4895,8 @@ fn mock_command_conversions(prefix: &str, input: &str) -> (Vec, let rest = parts.collect::>().join(" "); let mut marker_index = 0_u32; - let (events, should_prompt) = match command.as_str() { - "help" => (mock_help_message(prefix), true), + match command.as_str() { + "help" => mock_help_message(prefix), "demo" => { let mut events = Vec::new(); events.extend(mock_marker( @@ -4921,41 +4959,30 @@ fn mock_command_conversions(prefix: &str, input: &str) -> (Vec, "Next: error and agent.unparsed events.", )); events.extend(mock_error_sequence(prefix)); - (events, true) + events } - "markdown" => (mock_markdown_sequence(prefix), true), - "tool" | "tools" | "tooling" => (mock_tool_sequence(prefix), true), - "status" => (mock_status_sequence(prefix), true), - "image" => (mock_image_sequence(prefix), true), - "unknown" => (mock_unknown_sequence(prefix), true), - "permission" | "permissions" => (mock_permission_requests(prefix), true), - "question" | "questions" => (mock_question_requests(prefix), true), - "error" => (mock_error_sequence(prefix), true), - "unparsed" => (mock_unparsed_sequence(prefix), true), - "end" | "ended" | "session.end" => (mock_session_end_sequence(prefix), false), + "markdown" => mock_markdown_sequence(prefix), + "tool" | "tools" | "tooling" => mock_tool_sequence(prefix), + "status" => mock_status_sequence(prefix), + "image" => mock_image_sequence(prefix), + "unknown" => mock_unknown_sequence(prefix), + "permission" | "permissions" => mock_permission_requests(prefix), + "question" | "questions" => mock_question_requests(prefix), + "error" => mock_error_sequence(prefix), + "unparsed" => mock_unparsed_sequence(prefix), + "end" | "ended" | "session.end" => mock_session_end_sequence(prefix), "echo" | "say" => { if rest.is_empty() { - ( - mock_assistant_message( - format!("{prefix}_echo"), - "Tell me what to say after `echo`.".to_string(), - ), - true, + mock_assistant_message( + format!("{prefix}_echo"), + "Tell me what to say after `echo`.".to_string(), ) } else { - (mock_assistant_message(format!("{prefix}_echo"), rest), true) + mock_assistant_message(format!("{prefix}_echo"), rest) } } - _ => (mock_assistant_message(format!("{prefix}_reply"), trimmed.to_string()), true), - }; - - (events, should_prompt) -} - -fn mock_prompt_conversions(prefix: &str) -> Vec { - let message = - ["Mock agent ready. Tell me what to send next. Type `help` for options."].join("\n"); - mock_assistant_message(format!("{prefix}_prompt"), message) + _ => mock_assistant_message(format!("{prefix}_reply"), trimmed.to_string()), + } } fn mock_help_message(prefix: &str) -> Vec { @@ -5364,6 +5391,37 @@ fn mock_unknown_sequence(prefix: &str) -> Vec { ] } +fn mock_permission_request(prefix: &str) -> Vec { + let permission_id = format!("{prefix}_permission"); + let metadata = json!({ + "codexRequestKind": "commandExecution", + "command": "ls" + }); + vec![EventConversion::new( + UniversalEventType::PermissionRequested, + UniversalEventData::Permission(PermissionEventData { + permission_id, + action: "command_execution".to_string(), + status: PermissionStatus::Requested, + metadata: Some(metadata), + }), + )] +} + +fn mock_question_request(prefix: &str) -> Vec { + let question_id = format!("{prefix}_question"); + vec![EventConversion::new( + UniversalEventType::QuestionRequested, + UniversalEventData::Question(QuestionEventData { + question_id, + prompt: "Proceed?".to_string(), + options: vec!["Yes".to_string(), "No".to_string()], + response: None, + status: QuestionStatus::Requested, + }), + )] +} + fn mock_permission_requests(prefix: &str) -> Vec { let permission_id = format!("{prefix}_permission"); let permission_deny_id = format!("{prefix}_permission_denied"); diff --git a/server/packages/sandbox-agent/tests/common/http.rs b/server/packages/sandbox-agent/tests/common/http.rs index d719a03..4b9ee48 100644 --- a/server/packages/sandbox-agent/tests/common/http.rs +++ b/server/packages/sandbox-agent/tests/common/http.rs @@ -208,65 +208,41 @@ async fn send_message(app: &Router, session_id: &str) { assert_eq!(status, StatusCode::NO_CONTENT, "send message"); } -async fn fetch_events_once(app: &Router, session_id: &str, offset: u64) -> (Vec, u64) { - let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200"); - let (status, payload) = send_json(app, Method::GET, &path, None).await; - assert_eq!(status, StatusCode::OK, "poll events"); - let new_events = payload - .get("events") - .and_then(Value::as_array) - .cloned() - .unwrap_or_default(); - let new_offset = new_events - .last() - .and_then(|event| event.get("sequence")) - .and_then(Value::as_u64) - .unwrap_or(offset); - (new_events, new_offset) -} - -async fn drain_events(app: &Router, session_id: &str, timeout: Duration) -> u64 { +async fn poll_events_until(app: &Router, session_id: &str, timeout: Duration) -> Vec { let start = Instant::now(); let mut offset = 0u64; - loop { - if start.elapsed() >= timeout { - break; - } - let (new_events, new_offset) = fetch_events_once(app, session_id, offset).await; - if new_events.is_empty() { - if offset == 0 { - tokio::time::sleep(Duration::from_millis(200)).await; - continue; + let mut events = Vec::new(); + while start.elapsed() < timeout { + let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200"); + let (status, payload) = send_json(app, Method::GET, &path, None).await; + assert_eq!(status, StatusCode::OK, "poll events"); + let new_events = payload + .get("events") + .and_then(Value::as_array) + .cloned() + .unwrap_or_default(); + if !new_events.is_empty() { + if let Some(last) = new_events + .last() + .and_then(|event| event.get("sequence")) + .and_then(Value::as_u64) + { + offset = last; + } + events.extend(new_events); + if should_stop(&events) { + break; } - break; } - offset = new_offset; + tokio::time::sleep(Duration::from_millis(800)).await; } - offset + events } -async fn poll_events_until_from( - app: &Router, - session_id: &str, - offset: u64, - timeout: Duration, -) -> Vec { - poll_events_until_match_from(app, session_id, offset, timeout, should_stop).await -} - -async fn poll_events_until(app: &Router, session_id: &str, timeout: Duration) -> Vec { - poll_events_until_from(app, session_id, 0, timeout).await -} - -async fn read_sse_events_from( - app: &Router, - session_id: &str, - offset: u64, - timeout: Duration, -) -> Vec { +async fn read_sse_events(app: &Router, session_id: &str, timeout: Duration) -> Vec { let request = Request::builder() .method(Method::GET) - .uri(format!("/v1/sessions/{session_id}/events/sse?offset={offset}")) + .uri(format!("/v1/sessions/{session_id}/events/sse?offset=0")) .body(Body::empty()) .expect("sse request"); let response = app @@ -307,10 +283,6 @@ async fn read_sse_events_from( events } -async fn read_sse_events(app: &Router, session_id: &str, timeout: Duration) -> Vec { - read_sse_events_from(app, session_id, 0, timeout).await -} - async fn read_turn_stream_events( app: &Router, session_id: &str, @@ -834,33 +806,6 @@ fn snapshot_name(prefix: &str, agent: Option) -> String { } -async fn poll_events_until_match_from( - app: &Router, - session_id: &str, - offset: u64, - timeout: Duration, - stop: F, -) -> Vec -where - F: Fn(&[Value]) -> bool, -{ - let start = Instant::now(); - let mut offset = offset; - let mut events = Vec::new(); - while start.elapsed() < timeout { - let (new_events, new_offset) = fetch_events_once(app, session_id, offset).await; - if !new_events.is_empty() { - offset = new_offset; - events.extend(new_events); - if stop(&events) { - break; - } - } - tokio::time::sleep(Duration::from_millis(800)).await; - } - events -} - async fn poll_events_until_match( app: &Router, session_id: &str, @@ -870,7 +815,34 @@ async fn poll_events_until_match( where F: Fn(&[Value]) -> bool, { - poll_events_until_match_from(app, session_id, 0, timeout, stop).await + let start = Instant::now(); + let mut offset = 0u64; + let mut events = Vec::new(); + while start.elapsed() < timeout { + let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200"); + let (status, payload) = send_json(app, Method::GET, &path, None).await; + assert_eq!(status, StatusCode::OK, "poll events"); + let new_events = payload + .get("events") + .and_then(Value::as_array) + .cloned() + .unwrap_or_default(); + if !new_events.is_empty() { + if let Some(last) = new_events + .last() + .and_then(|event| event.get("sequence")) + .and_then(Value::as_u64) + { + offset = last; + } + events.extend(new_events); + if stop(&events) { + break; + } + } + tokio::time::sleep(Duration::from_millis(800)).await; + } + events } fn find_permission_id(events: &[Value]) -> Option { @@ -917,10 +889,9 @@ async fn run_http_events_snapshot(app: &Router, config: &TestAgentConfig) { let session_id = format!("session-{}", config.agent.as_str()); create_session(app, config.agent, &session_id, test_permission_mode(config.agent)).await; - let offset = drain_events(app, &session_id, Duration::from_secs(6)).await; send_message(app, &session_id).await; - let events = poll_events_until_from(app, &session_id, offset, Duration::from_secs(120)).await; + let events = poll_events_until(app, &session_id, Duration::from_secs(120)).await; let events = truncate_after_first_stop(&events); assert!( !events.is_empty(), @@ -947,14 +918,12 @@ async fn run_sse_events_snapshot(app: &Router, config: &TestAgentConfig) { let session_id = format!("sse-{}", config.agent.as_str()); create_session(app, config.agent, &session_id, test_permission_mode(config.agent)).await; - let offset = drain_events(app, &session_id, Duration::from_secs(6)).await; let sse_task = { let app = app.clone(); let session_id = session_id.clone(); - let offset = offset; tokio::spawn(async move { - read_sse_events_from(&app, &session_id, offset, Duration::from_secs(120)).await + read_sse_events(&app, &session_id, Duration::from_secs(120)).await }) }; diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_endpoints__agent_endpoints__agent_endpoints_snapshots@agent_install_mock.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_endpoints__agent_endpoints__agent_endpoints_snapshots@agent_install_mock.snap new file mode 100644 index 0000000..1b82694 --- /dev/null +++ b/server/packages/sandbox-agent/tests/http/snapshots/http_endpoints__agent_endpoints__agent_endpoints_snapshots@agent_install_mock.snap @@ -0,0 +1,5 @@ +--- +source: server/packages/sandbox-agent/tests/http/agent_endpoints.rs +expression: snapshot_status(status) +--- +status: 204 diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_endpoints__agent_endpoints__agent_endpoints_snapshots@agent_modes_mock.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_endpoints__agent_endpoints__agent_endpoints_snapshots@agent_modes_mock.snap new file mode 100644 index 0000000..37a7c12 --- /dev/null +++ b/server/packages/sandbox-agent/tests/http/snapshots/http_endpoints__agent_endpoints__agent_endpoints_snapshots@agent_modes_mock.snap @@ -0,0 +1,11 @@ +--- +source: server/packages/sandbox-agent/tests/http/agent_endpoints.rs +expression: normalize_agent_modes(&modes) +--- +modes: + - description: true + id: build + name: Build + - description: true + id: plan + name: Plan diff --git a/server/packages/sandbox-agent/tests/sessions/permissions.rs b/server/packages/sandbox-agent/tests/sessions/permissions.rs index 34dcc07..996522d 100644 --- a/server/packages/sandbox-agent/tests/sessions/permissions.rs +++ b/server/packages/sandbox-agent/tests/sessions/permissions.rs @@ -32,7 +32,6 @@ async fn permission_flow_snapshots() { let permission_session = format!("perm-{}", config.agent.as_str()); create_session(&app.app, config.agent, &permission_session, "plan").await; - let offset = drain_events(&app.app, &permission_session, Duration::from_secs(6)).await; let status = send_status( &app.app, Method::POST, @@ -42,10 +41,9 @@ async fn permission_flow_snapshots() { .await; assert_eq!(status, StatusCode::NO_CONTENT, "send permission prompt"); - let permission_events = poll_events_until_match_from( + let permission_events = poll_events_until_match( &app.app, &permission_session, - offset, Duration::from_secs(120), |events| find_permission_id(events).is_some() || should_stop(events), ) diff --git a/server/packages/sandbox-agent/tests/sessions/questions.rs b/server/packages/sandbox-agent/tests/sessions/questions.rs index 9f5b55e..889f0d4 100644 --- a/server/packages/sandbox-agent/tests/sessions/questions.rs +++ b/server/packages/sandbox-agent/tests/sessions/questions.rs @@ -32,8 +32,6 @@ async fn question_flow_snapshots() { let question_reply_session = format!("question-reply-{}", config.agent.as_str()); create_session(&app.app, config.agent, &question_reply_session, "plan").await; - let reply_offset = - drain_events(&app.app, &question_reply_session, Duration::from_secs(6)).await; let status = send_status( &app.app, Method::POST, @@ -43,10 +41,9 @@ async fn question_flow_snapshots() { .await; assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt"); - let question_events = poll_events_until_match_from( + let question_events = poll_events_until_match( &app.app, &question_reply_session, - reply_offset, Duration::from_secs(120), |events| find_question_id_and_answers(events).is_some() || should_stop(events), ) @@ -88,8 +85,6 @@ async fn question_flow_snapshots() { let question_reject_session = format!("question-reject-{}", config.agent.as_str()); create_session(&app.app, config.agent, &question_reject_session, "plan").await; - let reject_offset = - drain_events(&app.app, &question_reject_session, Duration::from_secs(6)).await; let status = send_status( &app.app, Method::POST, @@ -99,10 +94,9 @@ async fn question_flow_snapshots() { .await; assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt reject"); - let reject_events = poll_events_until_match_from( + let reject_events = poll_events_until_match( &app.app, &question_reject_session, - reject_offset, Duration::from_secs(120), |events| find_question_id_and_answers(events).is_some() || should_stop(events), ) diff --git a/server/packages/sandbox-agent/tests/sessions/reasoning.rs b/server/packages/sandbox-agent/tests/sessions/reasoning.rs index 6994d06..9be1919 100644 --- a/server/packages/sandbox-agent/tests/sessions/reasoning.rs +++ b/server/packages/sandbox-agent/tests/sessions/reasoning.rs @@ -1,12 +1,8 @@ // Reasoning capability checks are isolated from baseline snapshots. include!("../common/http.rs"); -fn reasoning_prompt(agent: AgentId) -> &'static str { - if agent == AgentId::Mock { - "demo" - } else { - "Answer briefly and include your reasoning." - } +fn reasoning_prompt(_agent: AgentId) -> &'static str { + "Answer briefly and include your reasoning." } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] @@ -29,7 +25,6 @@ async fn reasoning_events_present() { let session_id = format!("reasoning-{}", config.agent.as_str()); create_session(&app.app, config.agent, &session_id, test_permission_mode(config.agent)) .await; - let offset = drain_events(&app.app, &session_id, Duration::from_secs(6)).await; let status = send_status( &app.app, Method::POST, @@ -39,10 +34,9 @@ async fn reasoning_events_present() { .await; assert_eq!(status, StatusCode::NO_CONTENT, "send reasoning prompt"); - let events = poll_events_until_match_from( + let events = poll_events_until_match( &app.app, &session_id, - offset, Duration::from_secs(120), |events| events_have_content_type(events, "reasoning") || events.iter().any(is_error_event), ) diff --git a/server/packages/sandbox-agent/tests/sessions/session_lifecycle.rs b/server/packages/sandbox-agent/tests/sessions/session_lifecycle.rs index ed14e76..cfa22d4 100644 --- a/server/packages/sandbox-agent/tests/sessions/session_lifecycle.rs +++ b/server/packages/sandbox-agent/tests/sessions/session_lifecycle.rs @@ -146,8 +146,6 @@ async fn run_concurrency_snapshot(app: &Router, config: &TestAgentConfig) { let perm_mode = test_permission_mode(config.agent); create_session(app, config.agent, &session_a, perm_mode).await; create_session(app, config.agent, &session_b, perm_mode).await; - let offset_a = drain_events(app, &session_a, Duration::from_secs(6)).await; - let offset_b = drain_events(app, &session_b, Duration::from_secs(6)).await; let app_a = app.clone(); let app_b = app.clone(); @@ -157,8 +155,8 @@ async fn run_concurrency_snapshot(app: &Router, config: &TestAgentConfig) { let app_a = app.clone(); let app_b = app.clone(); - let poll_a = poll_events_until_from(&app_a, &session_a, offset_a, Duration::from_secs(120)); - let poll_b = poll_events_until_from(&app_b, &session_b, offset_b, Duration::from_secs(120)); + let poll_a = poll_events_until(&app_a, &session_a, Duration::from_secs(120)); + let poll_b = poll_events_until(&app_b, &session_b, Duration::from_secs(120)); let (events_a, events_b) = tokio::join!(poll_a, poll_b); let events_a = truncate_after_first_stop(&events_a); let events_b = truncate_after_first_stop(&events_b); diff --git a/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__permissions__assert_session_snapshot@permission_events_mock.snap b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__permissions__assert_session_snapshot@permission_events_mock.snap index b9828b2..5edf579 100644 --- a/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__permissions__assert_session_snapshot@permission_events_mock.snap +++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__permissions__assert_session_snapshot@permission_events_mock.snap @@ -2,19 +2,27 @@ source: server/packages/sandbox-agent/tests/sessions/permissions.rs expression: value --- -- item: - content_types: - - text - kind: message - role: user - status: in_progress +- metadata: true seq: 1 + session: started + type: session.started +- metadata: true + seq: 2 + session: started + type: session.started +- item: + content_types: + - text + kind: message + role: user + status: in_progress + seq: 3 type: item.started - delta: delta: "" item_id: "" native_item_id: "" - seq: 2 + seq: 4 type: item.delta - item: content_types: @@ -22,27 +30,11 @@ expression: value kind: message role: user status: completed - seq: 3 - type: item.completed -- item: - content_types: - - text - kind: message - role: assistant - status: in_progress - seq: 4 - type: item.started -- delta: - delta: "" - item_id: "" - native_item_id: "" seq: 5 - type: item.delta -- item: - content_types: - - text - kind: message - role: assistant - status: completed - seq: 6 type: item.completed +- permission: + action: command_execution + id: "" + status: requested + seq: 6 + type: permission.requested diff --git a/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__permissions__assert_session_snapshot@permission_reply_mock.snap b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__permissions__assert_session_snapshot@permission_reply_mock.snap new file mode 100644 index 0000000..d2f5151 --- /dev/null +++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__permissions__assert_session_snapshot@permission_reply_mock.snap @@ -0,0 +1,5 @@ +--- +source: server/packages/sandbox-agent/tests/sessions/permissions.rs +expression: value +--- +status: 204 diff --git a/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reject_events_mock.snap b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reject_events_mock.snap index 35e0f56..1c7a919 100644 --- a/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reject_events_mock.snap +++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reject_events_mock.snap @@ -2,19 +2,27 @@ source: server/packages/sandbox-agent/tests/sessions/questions.rs expression: value --- -- item: - content_types: - - text - kind: message - role: user - status: in_progress +- metadata: true seq: 1 + session: started + type: session.started +- metadata: true + seq: 2 + session: started + type: session.started +- item: + content_types: + - text + kind: message + role: user + status: in_progress + seq: 3 type: item.started - delta: delta: "" item_id: "" native_item_id: "" - seq: 2 + seq: 4 type: item.delta - item: content_types: @@ -22,27 +30,11 @@ expression: value kind: message role: user status: completed - seq: 3 - type: item.completed -- item: - content_types: - - text - kind: message - role: assistant - status: in_progress - seq: 4 - type: item.started -- delta: - delta: "" - item_id: "" - native_item_id: "" seq: 5 - type: item.delta -- item: - content_types: - - text - kind: message - role: assistant - status: completed - seq: 6 type: item.completed +- question: + id: "" + options: 2 + status: requested + seq: 6 + type: question.requested diff --git a/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reject_mock.snap b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reject_mock.snap new file mode 100644 index 0000000..79c2c53 --- /dev/null +++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reject_mock.snap @@ -0,0 +1,5 @@ +--- +source: server/packages/sandbox-agent/tests/sessions/questions.rs +expression: value +--- +status: 204 diff --git a/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reply_events_mock.snap b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reply_events_mock.snap index 35e0f56..1c7a919 100644 --- a/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reply_events_mock.snap +++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reply_events_mock.snap @@ -2,19 +2,27 @@ source: server/packages/sandbox-agent/tests/sessions/questions.rs expression: value --- -- item: - content_types: - - text - kind: message - role: user - status: in_progress +- metadata: true seq: 1 + session: started + type: session.started +- metadata: true + seq: 2 + session: started + type: session.started +- item: + content_types: + - text + kind: message + role: user + status: in_progress + seq: 3 type: item.started - delta: delta: "" item_id: "" native_item_id: "" - seq: 2 + seq: 4 type: item.delta - item: content_types: @@ -22,27 +30,11 @@ expression: value kind: message role: user status: completed - seq: 3 - type: item.completed -- item: - content_types: - - text - kind: message - role: assistant - status: in_progress - seq: 4 - type: item.started -- delta: - delta: "" - item_id: "" - native_item_id: "" seq: 5 - type: item.delta -- item: - content_types: - - text - kind: message - role: assistant - status: completed - seq: 6 type: item.completed +- question: + id: "" + options: 2 + status: requested + seq: 6 + type: question.requested diff --git a/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reply_mock.snap b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reply_mock.snap new file mode 100644 index 0000000..79c2c53 --- /dev/null +++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reply_mock.snap @@ -0,0 +1,5 @@ +--- +source: server/packages/sandbox-agent/tests/sessions/questions.rs +expression: value +--- +status: 204 diff --git a/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__assert_session_snapshot@concurrency_events_mock.snap b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__assert_session_snapshot@concurrency_events_mock.snap index d2ed9f3..e4de541 100644 --- a/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__assert_session_snapshot@concurrency_events_mock.snap +++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__assert_session_snapshot@concurrency_events_mock.snap @@ -3,19 +3,27 @@ source: server/packages/sandbox-agent/tests/sessions/session_lifecycle.rs expression: value --- session_a: + - metadata: true + seq: 1 + session: started + type: session.started + - metadata: true + seq: 2 + session: started + type: session.started - item: content_types: - text kind: message role: user status: in_progress - seq: 1 + seq: 3 type: item.started - delta: delta: "" item_id: "" native_item_id: "" - seq: 2 + seq: 4 type: item.delta - item: content_types: @@ -23,7 +31,7 @@ session_a: kind: message role: user status: completed - seq: 3 + seq: 5 type: item.completed - item: content_types: @@ -31,13 +39,13 @@ session_a: kind: message role: assistant status: in_progress - seq: 4 + seq: 6 type: item.started - delta: delta: "" item_id: "" native_item_id: "" - seq: 5 + seq: 7 type: item.delta - item: content_types: @@ -45,22 +53,30 @@ session_a: kind: message role: assistant status: completed - seq: 6 + seq: 8 type: item.completed session_b: - - item: - content_types: - - text - kind: message - role: user - status: in_progress + - metadata: true seq: 1 + session: started + type: session.started + - metadata: true + seq: 2 + session: started + type: session.started + - item: + content_types: + - text + kind: message + role: user + status: in_progress + seq: 3 type: item.started - delta: delta: "" item_id: "" native_item_id: "" - seq: 2 + seq: 4 type: item.delta - item: content_types: @@ -68,7 +84,7 @@ session_b: kind: message role: user status: completed - seq: 3 + seq: 5 type: item.completed - item: content_types: @@ -76,13 +92,13 @@ session_b: kind: message role: assistant status: in_progress - seq: 4 + seq: 6 type: item.started - delta: delta: "" item_id: "" native_item_id: "" - seq: 5 + seq: 7 type: item.delta - item: content_types: @@ -90,5 +106,5 @@ session_b: kind: message role: assistant status: completed - seq: 6 + seq: 8 type: item.completed diff --git a/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__run_http_events_snapshot@http_events_mock.snap b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__run_http_events_snapshot@http_events_mock.snap index 0a6a9d0..57a0cfb 100644 --- a/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__run_http_events_snapshot@http_events_mock.snap +++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__run_http_events_snapshot@http_events_mock.snap @@ -2,19 +2,27 @@ source: server/packages/sandbox-agent/tests/sessions/../common/http.rs expression: normalized --- -- item: - content_types: - - text - kind: message - role: user - status: in_progress +- metadata: true seq: 1 + session: started + type: session.started +- metadata: true + seq: 2 + session: started + type: session.started +- item: + content_types: + - text + kind: message + role: user + status: in_progress + seq: 3 type: item.started - delta: delta: "" item_id: "" native_item_id: "" - seq: 2 + seq: 4 type: item.delta - item: content_types: @@ -22,7 +30,7 @@ expression: normalized kind: message role: user status: completed - seq: 3 + seq: 5 type: item.completed - item: content_types: @@ -30,13 +38,13 @@ expression: normalized kind: message role: assistant status: in_progress - seq: 4 + seq: 6 type: item.started - delta: delta: "" item_id: "" native_item_id: "" - seq: 5 + seq: 7 type: item.delta - item: content_types: @@ -44,5 +52,5 @@ expression: normalized kind: message role: assistant status: completed - seq: 6 + seq: 8 type: item.completed diff --git a/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__run_sse_events_snapshot@sse_events_mock.snap b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__run_sse_events_snapshot@sse_events_mock.snap index 0a6a9d0..57a0cfb 100644 --- a/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__run_sse_events_snapshot@sse_events_mock.snap +++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__run_sse_events_snapshot@sse_events_mock.snap @@ -2,19 +2,27 @@ source: server/packages/sandbox-agent/tests/sessions/../common/http.rs expression: normalized --- -- item: - content_types: - - text - kind: message - role: user - status: in_progress +- metadata: true seq: 1 + session: started + type: session.started +- metadata: true + seq: 2 + session: started + type: session.started +- item: + content_types: + - text + kind: message + role: user + status: in_progress + seq: 3 type: item.started - delta: delta: "" item_id: "" native_item_id: "" - seq: 2 + seq: 4 type: item.delta - item: content_types: @@ -22,7 +30,7 @@ expression: normalized kind: message role: user status: completed - seq: 3 + seq: 5 type: item.completed - item: content_types: @@ -30,13 +38,13 @@ expression: normalized kind: message role: assistant status: in_progress - seq: 4 + seq: 6 type: item.started - delta: delta: "" item_id: "" native_item_id: "" - seq: 5 + seq: 7 type: item.delta - item: content_types: @@ -44,5 +52,5 @@ expression: normalized kind: message role: assistant status: completed - seq: 6 + seq: 8 type: item.completed diff --git a/server/packages/sandbox-agent/tests/sessions/status.rs b/server/packages/sandbox-agent/tests/sessions/status.rs index c2e0389..ce2faae 100644 --- a/server/packages/sandbox-agent/tests/sessions/status.rs +++ b/server/packages/sandbox-agent/tests/sessions/status.rs @@ -1,12 +1,8 @@ // Status capability checks are isolated from baseline snapshots. include!("../common/http.rs"); -fn status_prompt(agent: AgentId) -> &'static str { - if agent == AgentId::Mock { - "status" - } else { - "Provide a short status update." - } +fn status_prompt(_agent: AgentId) -> &'static str { + "Provide a short status update." } fn events_have_status(events: &[Value]) -> bool { @@ -34,7 +30,6 @@ async fn status_events_present() { let session_id = format!("status-{}", config.agent.as_str()); create_session(&app.app, config.agent, &session_id, test_permission_mode(config.agent)) .await; - let offset = drain_events(&app.app, &session_id, Duration::from_secs(6)).await; let status = send_status( &app.app, Method::POST, @@ -44,10 +39,9 @@ async fn status_events_present() { .await; assert_eq!(status, StatusCode::NO_CONTENT, "send status prompt"); - let events = poll_events_until_match_from( + let events = poll_events_until_match( &app.app, &session_id, - offset, Duration::from_secs(120), |events| events_have_status(events) || events.iter().any(is_error_event), )