fix: make Docker image validation optional in release validation

This commit is contained in:
Nathan Flurry 2026-01-27 20:16:25 -08:00
parent 7950c93f06
commit 665ace5e16
19 changed files with 331 additions and 288 deletions

View file

@ -120,16 +120,15 @@ async function validateReuseVersion(version: string): Promise<void> {
);
}
// Check Docker images exist
// Check Docker images exist (optional - warn if not found)
console.log(`Checking Docker images for ${shortCommit}...`);
try {
await $({ stdio: "inherit" })`docker manifest inspect rivetdev/sandbox-agent:${shortCommit}-amd64`;
await $({ stdio: "inherit" })`docker manifest inspect rivetdev/sandbox-agent:${shortCommit}-arm64`;
console.log("✅ Docker images exist");
} catch (error) {
throw new Error(
`Docker images for version ${version} (commit ${shortCommit}) do not exist. Error: ${error}`,
);
console.log(`⚠️ Docker images for ${shortCommit} not found - skipping Docker validation`);
console.log(" (Docker images will need to be built before publishing)");
}
// Check S3 artifacts exist

View file

@ -1411,7 +1411,6 @@ impl SessionManager {
)
.with_native_session(session.native_session_id.clone());
session.record_conversions(vec![native_started]);
session.record_conversions(mock_prompt_conversions("mock_0"));
}
let native_session_id = session.native_session_id.clone();
@ -1953,11 +1952,7 @@ impl SessionManager {
if !trimmed.is_empty() {
conversions.extend(mock_user_message(&prefix, trimmed));
}
let (command_events, should_prompt) = mock_command_conversions(&prefix, trimmed);
conversions.extend(command_events);
if should_prompt {
conversions.extend(mock_prompt_conversions(&prefix));
}
conversions.extend(mock_command_conversions(&prefix, trimmed));
let manager = Arc::clone(self);
tokio::spawn(async move {
@ -4846,10 +4841,53 @@ fn text_delta_from_parts(parts: &[ContentPart]) -> Option<String> {
}
}
fn mock_command_conversions(prefix: &str, input: &str) -> (Vec<EventConversion>, bool) {
const MOCK_OK_PROMPT: &str = "Reply with exactly the single word OK.";
const MOCK_FIRST_PROMPT: &str = "Reply with exactly the word FIRST.";
const MOCK_SECOND_PROMPT: &str = "Reply with exactly the word SECOND.";
const MOCK_PERMISSION_PROMPT: &str = "List files in the current directory using available tools.";
const MOCK_TOOL_PROMPT: &str =
"Use the bash tool to run `ls` in the current directory. Do not answer without using the tool.";
const MOCK_QUESTION_PROMPT: &str =
"Use the AskUserQuestion tool to ask exactly one yes/no question, then wait for a reply. Do not answer yourself.";
const MOCK_QUESTION_PROMPT_ALT: &str =
"Call the AskUserQuestion tool with exactly one yes/no question and wait for a reply. Do not answer yourself.";
const MOCK_REASONING_PROMPT: &str = "Answer briefly and include your reasoning.";
const MOCK_STATUS_PROMPT: &str = "Provide a short status update.";
fn mock_command_conversions(prefix: &str, input: &str) -> Vec<EventConversion> {
let trimmed = input.trim();
if trimmed.is_empty() {
return (vec![], true);
return vec![];
}
if trimmed.eq_ignore_ascii_case(MOCK_OK_PROMPT) {
return mock_assistant_message(format!("{prefix}_ok"), "OK".to_string());
}
if trimmed.eq_ignore_ascii_case(MOCK_FIRST_PROMPT) {
return mock_assistant_message(format!("{prefix}_first"), "FIRST".to_string());
}
if trimmed.eq_ignore_ascii_case(MOCK_SECOND_PROMPT) {
return mock_assistant_message(format!("{prefix}_second"), "SECOND".to_string());
}
if trimmed.eq_ignore_ascii_case(MOCK_REASONING_PROMPT) {
return mock_assistant_rich(prefix);
}
if trimmed.eq_ignore_ascii_case(MOCK_STATUS_PROMPT) {
return mock_status_sequence(prefix);
}
if trimmed.eq_ignore_ascii_case(MOCK_PERMISSION_PROMPT) {
return mock_permission_request(prefix);
}
if trimmed.eq_ignore_ascii_case(MOCK_TOOL_PROMPT) {
let mut events = Vec::new();
events.extend(mock_permission_request(prefix));
events.extend(mock_tool_sequence(prefix));
return events;
}
if trimmed.eq_ignore_ascii_case(MOCK_QUESTION_PROMPT)
|| trimmed.eq_ignore_ascii_case(MOCK_QUESTION_PROMPT_ALT)
{
return mock_question_request(prefix);
}
let mut parts = trimmed.split_whitespace();
@ -4857,8 +4895,8 @@ fn mock_command_conversions(prefix: &str, input: &str) -> (Vec<EventConversion>,
let rest = parts.collect::<Vec<_>>().join(" ");
let mut marker_index = 0_u32;
let (events, should_prompt) = match command.as_str() {
"help" => (mock_help_message(prefix), true),
match command.as_str() {
"help" => mock_help_message(prefix),
"demo" => {
let mut events = Vec::new();
events.extend(mock_marker(
@ -4921,41 +4959,30 @@ fn mock_command_conversions(prefix: &str, input: &str) -> (Vec<EventConversion>,
"Next: error and agent.unparsed events.",
));
events.extend(mock_error_sequence(prefix));
(events, true)
events
}
"markdown" => (mock_markdown_sequence(prefix), true),
"tool" | "tools" | "tooling" => (mock_tool_sequence(prefix), true),
"status" => (mock_status_sequence(prefix), true),
"image" => (mock_image_sequence(prefix), true),
"unknown" => (mock_unknown_sequence(prefix), true),
"permission" | "permissions" => (mock_permission_requests(prefix), true),
"question" | "questions" => (mock_question_requests(prefix), true),
"error" => (mock_error_sequence(prefix), true),
"unparsed" => (mock_unparsed_sequence(prefix), true),
"end" | "ended" | "session.end" => (mock_session_end_sequence(prefix), false),
"markdown" => mock_markdown_sequence(prefix),
"tool" | "tools" | "tooling" => mock_tool_sequence(prefix),
"status" => mock_status_sequence(prefix),
"image" => mock_image_sequence(prefix),
"unknown" => mock_unknown_sequence(prefix),
"permission" | "permissions" => mock_permission_requests(prefix),
"question" | "questions" => mock_question_requests(prefix),
"error" => mock_error_sequence(prefix),
"unparsed" => mock_unparsed_sequence(prefix),
"end" | "ended" | "session.end" => mock_session_end_sequence(prefix),
"echo" | "say" => {
if rest.is_empty() {
(
mock_assistant_message(
format!("{prefix}_echo"),
"Tell me what to say after `echo`.".to_string(),
),
true,
mock_assistant_message(
format!("{prefix}_echo"),
"Tell me what to say after `echo`.".to_string(),
)
} else {
(mock_assistant_message(format!("{prefix}_echo"), rest), true)
mock_assistant_message(format!("{prefix}_echo"), rest)
}
}
_ => (mock_assistant_message(format!("{prefix}_reply"), trimmed.to_string()), true),
};
(events, should_prompt)
}
fn mock_prompt_conversions(prefix: &str) -> Vec<EventConversion> {
let message =
["Mock agent ready. Tell me what to send next. Type `help` for options."].join("\n");
mock_assistant_message(format!("{prefix}_prompt"), message)
_ => mock_assistant_message(format!("{prefix}_reply"), trimmed.to_string()),
}
}
fn mock_help_message(prefix: &str) -> Vec<EventConversion> {
@ -5364,6 +5391,37 @@ fn mock_unknown_sequence(prefix: &str) -> Vec<EventConversion> {
]
}
fn mock_permission_request(prefix: &str) -> Vec<EventConversion> {
let permission_id = format!("{prefix}_permission");
let metadata = json!({
"codexRequestKind": "commandExecution",
"command": "ls"
});
vec![EventConversion::new(
UniversalEventType::PermissionRequested,
UniversalEventData::Permission(PermissionEventData {
permission_id,
action: "command_execution".to_string(),
status: PermissionStatus::Requested,
metadata: Some(metadata),
}),
)]
}
fn mock_question_request(prefix: &str) -> Vec<EventConversion> {
let question_id = format!("{prefix}_question");
vec![EventConversion::new(
UniversalEventType::QuestionRequested,
UniversalEventData::Question(QuestionEventData {
question_id,
prompt: "Proceed?".to_string(),
options: vec!["Yes".to_string(), "No".to_string()],
response: None,
status: QuestionStatus::Requested,
}),
)]
}
fn mock_permission_requests(prefix: &str) -> Vec<EventConversion> {
let permission_id = format!("{prefix}_permission");
let permission_deny_id = format!("{prefix}_permission_denied");

View file

@ -208,65 +208,41 @@ async fn send_message(app: &Router, session_id: &str) {
assert_eq!(status, StatusCode::NO_CONTENT, "send message");
}
async fn fetch_events_once(app: &Router, session_id: &str, offset: u64) -> (Vec<Value>, u64) {
let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200");
let (status, payload) = send_json(app, Method::GET, &path, None).await;
assert_eq!(status, StatusCode::OK, "poll events");
let new_events = payload
.get("events")
.and_then(Value::as_array)
.cloned()
.unwrap_or_default();
let new_offset = new_events
.last()
.and_then(|event| event.get("sequence"))
.and_then(Value::as_u64)
.unwrap_or(offset);
(new_events, new_offset)
}
async fn drain_events(app: &Router, session_id: &str, timeout: Duration) -> u64 {
async fn poll_events_until(app: &Router, session_id: &str, timeout: Duration) -> Vec<Value> {
let start = Instant::now();
let mut offset = 0u64;
loop {
if start.elapsed() >= timeout {
break;
}
let (new_events, new_offset) = fetch_events_once(app, session_id, offset).await;
if new_events.is_empty() {
if offset == 0 {
tokio::time::sleep(Duration::from_millis(200)).await;
continue;
let mut events = Vec::new();
while start.elapsed() < timeout {
let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200");
let (status, payload) = send_json(app, Method::GET, &path, None).await;
assert_eq!(status, StatusCode::OK, "poll events");
let new_events = payload
.get("events")
.and_then(Value::as_array)
.cloned()
.unwrap_or_default();
if !new_events.is_empty() {
if let Some(last) = new_events
.last()
.and_then(|event| event.get("sequence"))
.and_then(Value::as_u64)
{
offset = last;
}
events.extend(new_events);
if should_stop(&events) {
break;
}
break;
}
offset = new_offset;
tokio::time::sleep(Duration::from_millis(800)).await;
}
offset
events
}
async fn poll_events_until_from(
app: &Router,
session_id: &str,
offset: u64,
timeout: Duration,
) -> Vec<Value> {
poll_events_until_match_from(app, session_id, offset, timeout, should_stop).await
}
async fn poll_events_until(app: &Router, session_id: &str, timeout: Duration) -> Vec<Value> {
poll_events_until_from(app, session_id, 0, timeout).await
}
async fn read_sse_events_from(
app: &Router,
session_id: &str,
offset: u64,
timeout: Duration,
) -> Vec<Value> {
async fn read_sse_events(app: &Router, session_id: &str, timeout: Duration) -> Vec<Value> {
let request = Request::builder()
.method(Method::GET)
.uri(format!("/v1/sessions/{session_id}/events/sse?offset={offset}"))
.uri(format!("/v1/sessions/{session_id}/events/sse?offset=0"))
.body(Body::empty())
.expect("sse request");
let response = app
@ -307,10 +283,6 @@ async fn read_sse_events_from(
events
}
async fn read_sse_events(app: &Router, session_id: &str, timeout: Duration) -> Vec<Value> {
read_sse_events_from(app, session_id, 0, timeout).await
}
async fn read_turn_stream_events(
app: &Router,
session_id: &str,
@ -834,33 +806,6 @@ fn snapshot_name(prefix: &str, agent: Option<AgentId>) -> String {
}
async fn poll_events_until_match_from<F>(
app: &Router,
session_id: &str,
offset: u64,
timeout: Duration,
stop: F,
) -> Vec<Value>
where
F: Fn(&[Value]) -> bool,
{
let start = Instant::now();
let mut offset = offset;
let mut events = Vec::new();
while start.elapsed() < timeout {
let (new_events, new_offset) = fetch_events_once(app, session_id, offset).await;
if !new_events.is_empty() {
offset = new_offset;
events.extend(new_events);
if stop(&events) {
break;
}
}
tokio::time::sleep(Duration::from_millis(800)).await;
}
events
}
async fn poll_events_until_match<F>(
app: &Router,
session_id: &str,
@ -870,7 +815,34 @@ async fn poll_events_until_match<F>(
where
F: Fn(&[Value]) -> bool,
{
poll_events_until_match_from(app, session_id, 0, timeout, stop).await
let start = Instant::now();
let mut offset = 0u64;
let mut events = Vec::new();
while start.elapsed() < timeout {
let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200");
let (status, payload) = send_json(app, Method::GET, &path, None).await;
assert_eq!(status, StatusCode::OK, "poll events");
let new_events = payload
.get("events")
.and_then(Value::as_array)
.cloned()
.unwrap_or_default();
if !new_events.is_empty() {
if let Some(last) = new_events
.last()
.and_then(|event| event.get("sequence"))
.and_then(Value::as_u64)
{
offset = last;
}
events.extend(new_events);
if stop(&events) {
break;
}
}
tokio::time::sleep(Duration::from_millis(800)).await;
}
events
}
fn find_permission_id(events: &[Value]) -> Option<String> {
@ -917,10 +889,9 @@ async fn run_http_events_snapshot(app: &Router, config: &TestAgentConfig) {
let session_id = format!("session-{}", config.agent.as_str());
create_session(app, config.agent, &session_id, test_permission_mode(config.agent)).await;
let offset = drain_events(app, &session_id, Duration::from_secs(6)).await;
send_message(app, &session_id).await;
let events = poll_events_until_from(app, &session_id, offset, Duration::from_secs(120)).await;
let events = poll_events_until(app, &session_id, Duration::from_secs(120)).await;
let events = truncate_after_first_stop(&events);
assert!(
!events.is_empty(),
@ -947,14 +918,12 @@ async fn run_sse_events_snapshot(app: &Router, config: &TestAgentConfig) {
let session_id = format!("sse-{}", config.agent.as_str());
create_session(app, config.agent, &session_id, test_permission_mode(config.agent)).await;
let offset = drain_events(app, &session_id, Duration::from_secs(6)).await;
let sse_task = {
let app = app.clone();
let session_id = session_id.clone();
let offset = offset;
tokio::spawn(async move {
read_sse_events_from(&app, &session_id, offset, Duration::from_secs(120)).await
read_sse_events(&app, &session_id, Duration::from_secs(120)).await
})
};

View file

@ -0,0 +1,5 @@
---
source: server/packages/sandbox-agent/tests/http/agent_endpoints.rs
expression: snapshot_status(status)
---
status: 204

View file

@ -0,0 +1,11 @@
---
source: server/packages/sandbox-agent/tests/http/agent_endpoints.rs
expression: normalize_agent_modes(&modes)
---
modes:
- description: true
id: build
name: Build
- description: true
id: plan
name: Plan

View file

@ -32,7 +32,6 @@ async fn permission_flow_snapshots() {
let permission_session = format!("perm-{}", config.agent.as_str());
create_session(&app.app, config.agent, &permission_session, "plan").await;
let offset = drain_events(&app.app, &permission_session, Duration::from_secs(6)).await;
let status = send_status(
&app.app,
Method::POST,
@ -42,10 +41,9 @@ async fn permission_flow_snapshots() {
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "send permission prompt");
let permission_events = poll_events_until_match_from(
let permission_events = poll_events_until_match(
&app.app,
&permission_session,
offset,
Duration::from_secs(120),
|events| find_permission_id(events).is_some() || should_stop(events),
)

View file

@ -32,8 +32,6 @@ async fn question_flow_snapshots() {
let question_reply_session = format!("question-reply-{}", config.agent.as_str());
create_session(&app.app, config.agent, &question_reply_session, "plan").await;
let reply_offset =
drain_events(&app.app, &question_reply_session, Duration::from_secs(6)).await;
let status = send_status(
&app.app,
Method::POST,
@ -43,10 +41,9 @@ async fn question_flow_snapshots() {
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt");
let question_events = poll_events_until_match_from(
let question_events = poll_events_until_match(
&app.app,
&question_reply_session,
reply_offset,
Duration::from_secs(120),
|events| find_question_id_and_answers(events).is_some() || should_stop(events),
)
@ -88,8 +85,6 @@ async fn question_flow_snapshots() {
let question_reject_session = format!("question-reject-{}", config.agent.as_str());
create_session(&app.app, config.agent, &question_reject_session, "plan").await;
let reject_offset =
drain_events(&app.app, &question_reject_session, Duration::from_secs(6)).await;
let status = send_status(
&app.app,
Method::POST,
@ -99,10 +94,9 @@ async fn question_flow_snapshots() {
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt reject");
let reject_events = poll_events_until_match_from(
let reject_events = poll_events_until_match(
&app.app,
&question_reject_session,
reject_offset,
Duration::from_secs(120),
|events| find_question_id_and_answers(events).is_some() || should_stop(events),
)

View file

@ -1,12 +1,8 @@
// Reasoning capability checks are isolated from baseline snapshots.
include!("../common/http.rs");
fn reasoning_prompt(agent: AgentId) -> &'static str {
if agent == AgentId::Mock {
"demo"
} else {
"Answer briefly and include your reasoning."
}
fn reasoning_prompt(_agent: AgentId) -> &'static str {
"Answer briefly and include your reasoning."
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
@ -29,7 +25,6 @@ async fn reasoning_events_present() {
let session_id = format!("reasoning-{}", config.agent.as_str());
create_session(&app.app, config.agent, &session_id, test_permission_mode(config.agent))
.await;
let offset = drain_events(&app.app, &session_id, Duration::from_secs(6)).await;
let status = send_status(
&app.app,
Method::POST,
@ -39,10 +34,9 @@ async fn reasoning_events_present() {
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "send reasoning prompt");
let events = poll_events_until_match_from(
let events = poll_events_until_match(
&app.app,
&session_id,
offset,
Duration::from_secs(120),
|events| events_have_content_type(events, "reasoning") || events.iter().any(is_error_event),
)

View file

@ -146,8 +146,6 @@ async fn run_concurrency_snapshot(app: &Router, config: &TestAgentConfig) {
let perm_mode = test_permission_mode(config.agent);
create_session(app, config.agent, &session_a, perm_mode).await;
create_session(app, config.agent, &session_b, perm_mode).await;
let offset_a = drain_events(app, &session_a, Duration::from_secs(6)).await;
let offset_b = drain_events(app, &session_b, Duration::from_secs(6)).await;
let app_a = app.clone();
let app_b = app.clone();
@ -157,8 +155,8 @@ async fn run_concurrency_snapshot(app: &Router, config: &TestAgentConfig) {
let app_a = app.clone();
let app_b = app.clone();
let poll_a = poll_events_until_from(&app_a, &session_a, offset_a, Duration::from_secs(120));
let poll_b = poll_events_until_from(&app_b, &session_b, offset_b, Duration::from_secs(120));
let poll_a = poll_events_until(&app_a, &session_a, Duration::from_secs(120));
let poll_b = poll_events_until(&app_b, &session_b, Duration::from_secs(120));
let (events_a, events_b) = tokio::join!(poll_a, poll_b);
let events_a = truncate_after_first_stop(&events_a);
let events_b = truncate_after_first_stop(&events_b);

View file

@ -2,19 +2,27 @@
source: server/packages/sandbox-agent/tests/sessions/permissions.rs
expression: value
---
- item:
content_types:
- text
kind: message
role: user
status: in_progress
- metadata: true
seq: 1
session: started
type: session.started
- metadata: true
seq: 2
session: started
type: session.started
- item:
content_types:
- text
kind: message
role: user
status: in_progress
seq: 3
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 2
seq: 4
type: item.delta
- item:
content_types:
@ -22,27 +30,11 @@ expression: value
kind: message
role: user
status: completed
seq: 3
type: item.completed
- item:
content_types:
- text
kind: message
role: assistant
status: in_progress
seq: 4
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 5
type: item.delta
- item:
content_types:
- text
kind: message
role: assistant
status: completed
seq: 6
type: item.completed
- permission:
action: command_execution
id: "<redacted>"
status: requested
seq: 6
type: permission.requested

View file

@ -0,0 +1,5 @@
---
source: server/packages/sandbox-agent/tests/sessions/permissions.rs
expression: value
---
status: 204

View file

@ -2,19 +2,27 @@
source: server/packages/sandbox-agent/tests/sessions/questions.rs
expression: value
---
- item:
content_types:
- text
kind: message
role: user
status: in_progress
- metadata: true
seq: 1
session: started
type: session.started
- metadata: true
seq: 2
session: started
type: session.started
- item:
content_types:
- text
kind: message
role: user
status: in_progress
seq: 3
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 2
seq: 4
type: item.delta
- item:
content_types:
@ -22,27 +30,11 @@ expression: value
kind: message
role: user
status: completed
seq: 3
type: item.completed
- item:
content_types:
- text
kind: message
role: assistant
status: in_progress
seq: 4
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 5
type: item.delta
- item:
content_types:
- text
kind: message
role: assistant
status: completed
seq: 6
type: item.completed
- question:
id: "<redacted>"
options: 2
status: requested
seq: 6
type: question.requested

View file

@ -0,0 +1,5 @@
---
source: server/packages/sandbox-agent/tests/sessions/questions.rs
expression: value
---
status: 204

View file

@ -2,19 +2,27 @@
source: server/packages/sandbox-agent/tests/sessions/questions.rs
expression: value
---
- item:
content_types:
- text
kind: message
role: user
status: in_progress
- metadata: true
seq: 1
session: started
type: session.started
- metadata: true
seq: 2
session: started
type: session.started
- item:
content_types:
- text
kind: message
role: user
status: in_progress
seq: 3
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 2
seq: 4
type: item.delta
- item:
content_types:
@ -22,27 +30,11 @@ expression: value
kind: message
role: user
status: completed
seq: 3
type: item.completed
- item:
content_types:
- text
kind: message
role: assistant
status: in_progress
seq: 4
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 5
type: item.delta
- item:
content_types:
- text
kind: message
role: assistant
status: completed
seq: 6
type: item.completed
- question:
id: "<redacted>"
options: 2
status: requested
seq: 6
type: question.requested

View file

@ -0,0 +1,5 @@
---
source: server/packages/sandbox-agent/tests/sessions/questions.rs
expression: value
---
status: 204

View file

@ -3,19 +3,27 @@ source: server/packages/sandbox-agent/tests/sessions/session_lifecycle.rs
expression: value
---
session_a:
- metadata: true
seq: 1
session: started
type: session.started
- metadata: true
seq: 2
session: started
type: session.started
- item:
content_types:
- text
kind: message
role: user
status: in_progress
seq: 1
seq: 3
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 2
seq: 4
type: item.delta
- item:
content_types:
@ -23,7 +31,7 @@ session_a:
kind: message
role: user
status: completed
seq: 3
seq: 5
type: item.completed
- item:
content_types:
@ -31,13 +39,13 @@ session_a:
kind: message
role: assistant
status: in_progress
seq: 4
seq: 6
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 5
seq: 7
type: item.delta
- item:
content_types:
@ -45,22 +53,30 @@ session_a:
kind: message
role: assistant
status: completed
seq: 6
seq: 8
type: item.completed
session_b:
- item:
content_types:
- text
kind: message
role: user
status: in_progress
- metadata: true
seq: 1
session: started
type: session.started
- metadata: true
seq: 2
session: started
type: session.started
- item:
content_types:
- text
kind: message
role: user
status: in_progress
seq: 3
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 2
seq: 4
type: item.delta
- item:
content_types:
@ -68,7 +84,7 @@ session_b:
kind: message
role: user
status: completed
seq: 3
seq: 5
type: item.completed
- item:
content_types:
@ -76,13 +92,13 @@ session_b:
kind: message
role: assistant
status: in_progress
seq: 4
seq: 6
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 5
seq: 7
type: item.delta
- item:
content_types:
@ -90,5 +106,5 @@ session_b:
kind: message
role: assistant
status: completed
seq: 6
seq: 8
type: item.completed

View file

@ -2,19 +2,27 @@
source: server/packages/sandbox-agent/tests/sessions/../common/http.rs
expression: normalized
---
- item:
content_types:
- text
kind: message
role: user
status: in_progress
- metadata: true
seq: 1
session: started
type: session.started
- metadata: true
seq: 2
session: started
type: session.started
- item:
content_types:
- text
kind: message
role: user
status: in_progress
seq: 3
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 2
seq: 4
type: item.delta
- item:
content_types:
@ -22,7 +30,7 @@ expression: normalized
kind: message
role: user
status: completed
seq: 3
seq: 5
type: item.completed
- item:
content_types:
@ -30,13 +38,13 @@ expression: normalized
kind: message
role: assistant
status: in_progress
seq: 4
seq: 6
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 5
seq: 7
type: item.delta
- item:
content_types:
@ -44,5 +52,5 @@ expression: normalized
kind: message
role: assistant
status: completed
seq: 6
seq: 8
type: item.completed

View file

@ -2,19 +2,27 @@
source: server/packages/sandbox-agent/tests/sessions/../common/http.rs
expression: normalized
---
- item:
content_types:
- text
kind: message
role: user
status: in_progress
- metadata: true
seq: 1
session: started
type: session.started
- metadata: true
seq: 2
session: started
type: session.started
- item:
content_types:
- text
kind: message
role: user
status: in_progress
seq: 3
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 2
seq: 4
type: item.delta
- item:
content_types:
@ -22,7 +30,7 @@ expression: normalized
kind: message
role: user
status: completed
seq: 3
seq: 5
type: item.completed
- item:
content_types:
@ -30,13 +38,13 @@ expression: normalized
kind: message
role: assistant
status: in_progress
seq: 4
seq: 6
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 5
seq: 7
type: item.delta
- item:
content_types:
@ -44,5 +52,5 @@ expression: normalized
kind: message
role: assistant
status: completed
seq: 6
seq: 8
type: item.completed

View file

@ -1,12 +1,8 @@
// Status capability checks are isolated from baseline snapshots.
include!("../common/http.rs");
fn status_prompt(agent: AgentId) -> &'static str {
if agent == AgentId::Mock {
"status"
} else {
"Provide a short status update."
}
fn status_prompt(_agent: AgentId) -> &'static str {
"Provide a short status update."
}
fn events_have_status(events: &[Value]) -> bool {
@ -34,7 +30,6 @@ async fn status_events_present() {
let session_id = format!("status-{}", config.agent.as_str());
create_session(&app.app, config.agent, &session_id, test_permission_mode(config.agent))
.await;
let offset = drain_events(&app.app, &session_id, Duration::from_secs(6)).await;
let status = send_status(
&app.app,
Method::POST,
@ -44,10 +39,9 @@ async fn status_events_present() {
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "send status prompt");
let events = poll_events_until_match_from(
let events = poll_events_until_match(
&app.app,
&session_id,
offset,
Duration::from_secs(120),
|events| events_have_status(events) || events.iter().any(is_error_event),
)