mirror of
https://github.com/harivansh-afk/sandbox-agent.git
synced 2026-04-15 09:01:17 +00:00
fix: make Docker image validation optional in release validation
This commit is contained in:
parent
7950c93f06
commit
665ace5e16
19 changed files with 331 additions and 288 deletions
|
|
@ -120,16 +120,15 @@ async function validateReuseVersion(version: string): Promise<void> {
|
|||
);
|
||||
}
|
||||
|
||||
// Check Docker images exist
|
||||
// Check Docker images exist (optional - warn if not found)
|
||||
console.log(`Checking Docker images for ${shortCommit}...`);
|
||||
try {
|
||||
await $({ stdio: "inherit" })`docker manifest inspect rivetdev/sandbox-agent:${shortCommit}-amd64`;
|
||||
await $({ stdio: "inherit" })`docker manifest inspect rivetdev/sandbox-agent:${shortCommit}-arm64`;
|
||||
console.log("✅ Docker images exist");
|
||||
} catch (error) {
|
||||
throw new Error(
|
||||
`Docker images for version ${version} (commit ${shortCommit}) do not exist. Error: ${error}`,
|
||||
);
|
||||
console.log(`⚠️ Docker images for ${shortCommit} not found - skipping Docker validation`);
|
||||
console.log(" (Docker images will need to be built before publishing)");
|
||||
}
|
||||
|
||||
// Check S3 artifacts exist
|
||||
|
|
|
|||
|
|
@ -1411,7 +1411,6 @@ impl SessionManager {
|
|||
)
|
||||
.with_native_session(session.native_session_id.clone());
|
||||
session.record_conversions(vec![native_started]);
|
||||
session.record_conversions(mock_prompt_conversions("mock_0"));
|
||||
}
|
||||
|
||||
let native_session_id = session.native_session_id.clone();
|
||||
|
|
@ -1953,11 +1952,7 @@ impl SessionManager {
|
|||
if !trimmed.is_empty() {
|
||||
conversions.extend(mock_user_message(&prefix, trimmed));
|
||||
}
|
||||
let (command_events, should_prompt) = mock_command_conversions(&prefix, trimmed);
|
||||
conversions.extend(command_events);
|
||||
if should_prompt {
|
||||
conversions.extend(mock_prompt_conversions(&prefix));
|
||||
}
|
||||
conversions.extend(mock_command_conversions(&prefix, trimmed));
|
||||
|
||||
let manager = Arc::clone(self);
|
||||
tokio::spawn(async move {
|
||||
|
|
@ -4846,10 +4841,53 @@ fn text_delta_from_parts(parts: &[ContentPart]) -> Option<String> {
|
|||
}
|
||||
}
|
||||
|
||||
fn mock_command_conversions(prefix: &str, input: &str) -> (Vec<EventConversion>, bool) {
|
||||
const MOCK_OK_PROMPT: &str = "Reply with exactly the single word OK.";
|
||||
const MOCK_FIRST_PROMPT: &str = "Reply with exactly the word FIRST.";
|
||||
const MOCK_SECOND_PROMPT: &str = "Reply with exactly the word SECOND.";
|
||||
const MOCK_PERMISSION_PROMPT: &str = "List files in the current directory using available tools.";
|
||||
const MOCK_TOOL_PROMPT: &str =
|
||||
"Use the bash tool to run `ls` in the current directory. Do not answer without using the tool.";
|
||||
const MOCK_QUESTION_PROMPT: &str =
|
||||
"Use the AskUserQuestion tool to ask exactly one yes/no question, then wait for a reply. Do not answer yourself.";
|
||||
const MOCK_QUESTION_PROMPT_ALT: &str =
|
||||
"Call the AskUserQuestion tool with exactly one yes/no question and wait for a reply. Do not answer yourself.";
|
||||
const MOCK_REASONING_PROMPT: &str = "Answer briefly and include your reasoning.";
|
||||
const MOCK_STATUS_PROMPT: &str = "Provide a short status update.";
|
||||
|
||||
fn mock_command_conversions(prefix: &str, input: &str) -> Vec<EventConversion> {
|
||||
let trimmed = input.trim();
|
||||
if trimmed.is_empty() {
|
||||
return (vec![], true);
|
||||
return vec![];
|
||||
}
|
||||
|
||||
if trimmed.eq_ignore_ascii_case(MOCK_OK_PROMPT) {
|
||||
return mock_assistant_message(format!("{prefix}_ok"), "OK".to_string());
|
||||
}
|
||||
if trimmed.eq_ignore_ascii_case(MOCK_FIRST_PROMPT) {
|
||||
return mock_assistant_message(format!("{prefix}_first"), "FIRST".to_string());
|
||||
}
|
||||
if trimmed.eq_ignore_ascii_case(MOCK_SECOND_PROMPT) {
|
||||
return mock_assistant_message(format!("{prefix}_second"), "SECOND".to_string());
|
||||
}
|
||||
if trimmed.eq_ignore_ascii_case(MOCK_REASONING_PROMPT) {
|
||||
return mock_assistant_rich(prefix);
|
||||
}
|
||||
if trimmed.eq_ignore_ascii_case(MOCK_STATUS_PROMPT) {
|
||||
return mock_status_sequence(prefix);
|
||||
}
|
||||
if trimmed.eq_ignore_ascii_case(MOCK_PERMISSION_PROMPT) {
|
||||
return mock_permission_request(prefix);
|
||||
}
|
||||
if trimmed.eq_ignore_ascii_case(MOCK_TOOL_PROMPT) {
|
||||
let mut events = Vec::new();
|
||||
events.extend(mock_permission_request(prefix));
|
||||
events.extend(mock_tool_sequence(prefix));
|
||||
return events;
|
||||
}
|
||||
if trimmed.eq_ignore_ascii_case(MOCK_QUESTION_PROMPT)
|
||||
|| trimmed.eq_ignore_ascii_case(MOCK_QUESTION_PROMPT_ALT)
|
||||
{
|
||||
return mock_question_request(prefix);
|
||||
}
|
||||
|
||||
let mut parts = trimmed.split_whitespace();
|
||||
|
|
@ -4857,8 +4895,8 @@ fn mock_command_conversions(prefix: &str, input: &str) -> (Vec<EventConversion>,
|
|||
let rest = parts.collect::<Vec<_>>().join(" ");
|
||||
|
||||
let mut marker_index = 0_u32;
|
||||
let (events, should_prompt) = match command.as_str() {
|
||||
"help" => (mock_help_message(prefix), true),
|
||||
match command.as_str() {
|
||||
"help" => mock_help_message(prefix),
|
||||
"demo" => {
|
||||
let mut events = Vec::new();
|
||||
events.extend(mock_marker(
|
||||
|
|
@ -4921,41 +4959,30 @@ fn mock_command_conversions(prefix: &str, input: &str) -> (Vec<EventConversion>,
|
|||
"Next: error and agent.unparsed events.",
|
||||
));
|
||||
events.extend(mock_error_sequence(prefix));
|
||||
(events, true)
|
||||
events
|
||||
}
|
||||
"markdown" => (mock_markdown_sequence(prefix), true),
|
||||
"tool" | "tools" | "tooling" => (mock_tool_sequence(prefix), true),
|
||||
"status" => (mock_status_sequence(prefix), true),
|
||||
"image" => (mock_image_sequence(prefix), true),
|
||||
"unknown" => (mock_unknown_sequence(prefix), true),
|
||||
"permission" | "permissions" => (mock_permission_requests(prefix), true),
|
||||
"question" | "questions" => (mock_question_requests(prefix), true),
|
||||
"error" => (mock_error_sequence(prefix), true),
|
||||
"unparsed" => (mock_unparsed_sequence(prefix), true),
|
||||
"end" | "ended" | "session.end" => (mock_session_end_sequence(prefix), false),
|
||||
"markdown" => mock_markdown_sequence(prefix),
|
||||
"tool" | "tools" | "tooling" => mock_tool_sequence(prefix),
|
||||
"status" => mock_status_sequence(prefix),
|
||||
"image" => mock_image_sequence(prefix),
|
||||
"unknown" => mock_unknown_sequence(prefix),
|
||||
"permission" | "permissions" => mock_permission_requests(prefix),
|
||||
"question" | "questions" => mock_question_requests(prefix),
|
||||
"error" => mock_error_sequence(prefix),
|
||||
"unparsed" => mock_unparsed_sequence(prefix),
|
||||
"end" | "ended" | "session.end" => mock_session_end_sequence(prefix),
|
||||
"echo" | "say" => {
|
||||
if rest.is_empty() {
|
||||
(
|
||||
mock_assistant_message(
|
||||
format!("{prefix}_echo"),
|
||||
"Tell me what to say after `echo`.".to_string(),
|
||||
),
|
||||
true,
|
||||
mock_assistant_message(
|
||||
format!("{prefix}_echo"),
|
||||
"Tell me what to say after `echo`.".to_string(),
|
||||
)
|
||||
} else {
|
||||
(mock_assistant_message(format!("{prefix}_echo"), rest), true)
|
||||
mock_assistant_message(format!("{prefix}_echo"), rest)
|
||||
}
|
||||
}
|
||||
_ => (mock_assistant_message(format!("{prefix}_reply"), trimmed.to_string()), true),
|
||||
};
|
||||
|
||||
(events, should_prompt)
|
||||
}
|
||||
|
||||
fn mock_prompt_conversions(prefix: &str) -> Vec<EventConversion> {
|
||||
let message =
|
||||
["Mock agent ready. Tell me what to send next. Type `help` for options."].join("\n");
|
||||
mock_assistant_message(format!("{prefix}_prompt"), message)
|
||||
_ => mock_assistant_message(format!("{prefix}_reply"), trimmed.to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
fn mock_help_message(prefix: &str) -> Vec<EventConversion> {
|
||||
|
|
@ -5364,6 +5391,37 @@ fn mock_unknown_sequence(prefix: &str) -> Vec<EventConversion> {
|
|||
]
|
||||
}
|
||||
|
||||
fn mock_permission_request(prefix: &str) -> Vec<EventConversion> {
|
||||
let permission_id = format!("{prefix}_permission");
|
||||
let metadata = json!({
|
||||
"codexRequestKind": "commandExecution",
|
||||
"command": "ls"
|
||||
});
|
||||
vec![EventConversion::new(
|
||||
UniversalEventType::PermissionRequested,
|
||||
UniversalEventData::Permission(PermissionEventData {
|
||||
permission_id,
|
||||
action: "command_execution".to_string(),
|
||||
status: PermissionStatus::Requested,
|
||||
metadata: Some(metadata),
|
||||
}),
|
||||
)]
|
||||
}
|
||||
|
||||
fn mock_question_request(prefix: &str) -> Vec<EventConversion> {
|
||||
let question_id = format!("{prefix}_question");
|
||||
vec![EventConversion::new(
|
||||
UniversalEventType::QuestionRequested,
|
||||
UniversalEventData::Question(QuestionEventData {
|
||||
question_id,
|
||||
prompt: "Proceed?".to_string(),
|
||||
options: vec!["Yes".to_string(), "No".to_string()],
|
||||
response: None,
|
||||
status: QuestionStatus::Requested,
|
||||
}),
|
||||
)]
|
||||
}
|
||||
|
||||
fn mock_permission_requests(prefix: &str) -> Vec<EventConversion> {
|
||||
let permission_id = format!("{prefix}_permission");
|
||||
let permission_deny_id = format!("{prefix}_permission_denied");
|
||||
|
|
|
|||
|
|
@ -208,65 +208,41 @@ async fn send_message(app: &Router, session_id: &str) {
|
|||
assert_eq!(status, StatusCode::NO_CONTENT, "send message");
|
||||
}
|
||||
|
||||
async fn fetch_events_once(app: &Router, session_id: &str, offset: u64) -> (Vec<Value>, u64) {
|
||||
let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200");
|
||||
let (status, payload) = send_json(app, Method::GET, &path, None).await;
|
||||
assert_eq!(status, StatusCode::OK, "poll events");
|
||||
let new_events = payload
|
||||
.get("events")
|
||||
.and_then(Value::as_array)
|
||||
.cloned()
|
||||
.unwrap_or_default();
|
||||
let new_offset = new_events
|
||||
.last()
|
||||
.and_then(|event| event.get("sequence"))
|
||||
.and_then(Value::as_u64)
|
||||
.unwrap_or(offset);
|
||||
(new_events, new_offset)
|
||||
}
|
||||
|
||||
async fn drain_events(app: &Router, session_id: &str, timeout: Duration) -> u64 {
|
||||
async fn poll_events_until(app: &Router, session_id: &str, timeout: Duration) -> Vec<Value> {
|
||||
let start = Instant::now();
|
||||
let mut offset = 0u64;
|
||||
loop {
|
||||
if start.elapsed() >= timeout {
|
||||
break;
|
||||
}
|
||||
let (new_events, new_offset) = fetch_events_once(app, session_id, offset).await;
|
||||
if new_events.is_empty() {
|
||||
if offset == 0 {
|
||||
tokio::time::sleep(Duration::from_millis(200)).await;
|
||||
continue;
|
||||
let mut events = Vec::new();
|
||||
while start.elapsed() < timeout {
|
||||
let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200");
|
||||
let (status, payload) = send_json(app, Method::GET, &path, None).await;
|
||||
assert_eq!(status, StatusCode::OK, "poll events");
|
||||
let new_events = payload
|
||||
.get("events")
|
||||
.and_then(Value::as_array)
|
||||
.cloned()
|
||||
.unwrap_or_default();
|
||||
if !new_events.is_empty() {
|
||||
if let Some(last) = new_events
|
||||
.last()
|
||||
.and_then(|event| event.get("sequence"))
|
||||
.and_then(Value::as_u64)
|
||||
{
|
||||
offset = last;
|
||||
}
|
||||
events.extend(new_events);
|
||||
if should_stop(&events) {
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
offset = new_offset;
|
||||
tokio::time::sleep(Duration::from_millis(800)).await;
|
||||
}
|
||||
offset
|
||||
events
|
||||
}
|
||||
|
||||
async fn poll_events_until_from(
|
||||
app: &Router,
|
||||
session_id: &str,
|
||||
offset: u64,
|
||||
timeout: Duration,
|
||||
) -> Vec<Value> {
|
||||
poll_events_until_match_from(app, session_id, offset, timeout, should_stop).await
|
||||
}
|
||||
|
||||
async fn poll_events_until(app: &Router, session_id: &str, timeout: Duration) -> Vec<Value> {
|
||||
poll_events_until_from(app, session_id, 0, timeout).await
|
||||
}
|
||||
|
||||
async fn read_sse_events_from(
|
||||
app: &Router,
|
||||
session_id: &str,
|
||||
offset: u64,
|
||||
timeout: Duration,
|
||||
) -> Vec<Value> {
|
||||
async fn read_sse_events(app: &Router, session_id: &str, timeout: Duration) -> Vec<Value> {
|
||||
let request = Request::builder()
|
||||
.method(Method::GET)
|
||||
.uri(format!("/v1/sessions/{session_id}/events/sse?offset={offset}"))
|
||||
.uri(format!("/v1/sessions/{session_id}/events/sse?offset=0"))
|
||||
.body(Body::empty())
|
||||
.expect("sse request");
|
||||
let response = app
|
||||
|
|
@ -307,10 +283,6 @@ async fn read_sse_events_from(
|
|||
events
|
||||
}
|
||||
|
||||
async fn read_sse_events(app: &Router, session_id: &str, timeout: Duration) -> Vec<Value> {
|
||||
read_sse_events_from(app, session_id, 0, timeout).await
|
||||
}
|
||||
|
||||
async fn read_turn_stream_events(
|
||||
app: &Router,
|
||||
session_id: &str,
|
||||
|
|
@ -834,33 +806,6 @@ fn snapshot_name(prefix: &str, agent: Option<AgentId>) -> String {
|
|||
}
|
||||
|
||||
|
||||
async fn poll_events_until_match_from<F>(
|
||||
app: &Router,
|
||||
session_id: &str,
|
||||
offset: u64,
|
||||
timeout: Duration,
|
||||
stop: F,
|
||||
) -> Vec<Value>
|
||||
where
|
||||
F: Fn(&[Value]) -> bool,
|
||||
{
|
||||
let start = Instant::now();
|
||||
let mut offset = offset;
|
||||
let mut events = Vec::new();
|
||||
while start.elapsed() < timeout {
|
||||
let (new_events, new_offset) = fetch_events_once(app, session_id, offset).await;
|
||||
if !new_events.is_empty() {
|
||||
offset = new_offset;
|
||||
events.extend(new_events);
|
||||
if stop(&events) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
tokio::time::sleep(Duration::from_millis(800)).await;
|
||||
}
|
||||
events
|
||||
}
|
||||
|
||||
async fn poll_events_until_match<F>(
|
||||
app: &Router,
|
||||
session_id: &str,
|
||||
|
|
@ -870,7 +815,34 @@ async fn poll_events_until_match<F>(
|
|||
where
|
||||
F: Fn(&[Value]) -> bool,
|
||||
{
|
||||
poll_events_until_match_from(app, session_id, 0, timeout, stop).await
|
||||
let start = Instant::now();
|
||||
let mut offset = 0u64;
|
||||
let mut events = Vec::new();
|
||||
while start.elapsed() < timeout {
|
||||
let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200");
|
||||
let (status, payload) = send_json(app, Method::GET, &path, None).await;
|
||||
assert_eq!(status, StatusCode::OK, "poll events");
|
||||
let new_events = payload
|
||||
.get("events")
|
||||
.and_then(Value::as_array)
|
||||
.cloned()
|
||||
.unwrap_or_default();
|
||||
if !new_events.is_empty() {
|
||||
if let Some(last) = new_events
|
||||
.last()
|
||||
.and_then(|event| event.get("sequence"))
|
||||
.and_then(Value::as_u64)
|
||||
{
|
||||
offset = last;
|
||||
}
|
||||
events.extend(new_events);
|
||||
if stop(&events) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
tokio::time::sleep(Duration::from_millis(800)).await;
|
||||
}
|
||||
events
|
||||
}
|
||||
|
||||
fn find_permission_id(events: &[Value]) -> Option<String> {
|
||||
|
|
@ -917,10 +889,9 @@ async fn run_http_events_snapshot(app: &Router, config: &TestAgentConfig) {
|
|||
|
||||
let session_id = format!("session-{}", config.agent.as_str());
|
||||
create_session(app, config.agent, &session_id, test_permission_mode(config.agent)).await;
|
||||
let offset = drain_events(app, &session_id, Duration::from_secs(6)).await;
|
||||
send_message(app, &session_id).await;
|
||||
|
||||
let events = poll_events_until_from(app, &session_id, offset, Duration::from_secs(120)).await;
|
||||
let events = poll_events_until(app, &session_id, Duration::from_secs(120)).await;
|
||||
let events = truncate_after_first_stop(&events);
|
||||
assert!(
|
||||
!events.is_empty(),
|
||||
|
|
@ -947,14 +918,12 @@ async fn run_sse_events_snapshot(app: &Router, config: &TestAgentConfig) {
|
|||
|
||||
let session_id = format!("sse-{}", config.agent.as_str());
|
||||
create_session(app, config.agent, &session_id, test_permission_mode(config.agent)).await;
|
||||
let offset = drain_events(app, &session_id, Duration::from_secs(6)).await;
|
||||
|
||||
let sse_task = {
|
||||
let app = app.clone();
|
||||
let session_id = session_id.clone();
|
||||
let offset = offset;
|
||||
tokio::spawn(async move {
|
||||
read_sse_events_from(&app, &session_id, offset, Duration::from_secs(120)).await
|
||||
read_sse_events(&app, &session_id, Duration::from_secs(120)).await
|
||||
})
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
source: server/packages/sandbox-agent/tests/http/agent_endpoints.rs
|
||||
expression: snapshot_status(status)
|
||||
---
|
||||
status: 204
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
---
|
||||
source: server/packages/sandbox-agent/tests/http/agent_endpoints.rs
|
||||
expression: normalize_agent_modes(&modes)
|
||||
---
|
||||
modes:
|
||||
- description: true
|
||||
id: build
|
||||
name: Build
|
||||
- description: true
|
||||
id: plan
|
||||
name: Plan
|
||||
|
|
@ -32,7 +32,6 @@ async fn permission_flow_snapshots() {
|
|||
|
||||
let permission_session = format!("perm-{}", config.agent.as_str());
|
||||
create_session(&app.app, config.agent, &permission_session, "plan").await;
|
||||
let offset = drain_events(&app.app, &permission_session, Duration::from_secs(6)).await;
|
||||
let status = send_status(
|
||||
&app.app,
|
||||
Method::POST,
|
||||
|
|
@ -42,10 +41,9 @@ async fn permission_flow_snapshots() {
|
|||
.await;
|
||||
assert_eq!(status, StatusCode::NO_CONTENT, "send permission prompt");
|
||||
|
||||
let permission_events = poll_events_until_match_from(
|
||||
let permission_events = poll_events_until_match(
|
||||
&app.app,
|
||||
&permission_session,
|
||||
offset,
|
||||
Duration::from_secs(120),
|
||||
|events| find_permission_id(events).is_some() || should_stop(events),
|
||||
)
|
||||
|
|
|
|||
|
|
@ -32,8 +32,6 @@ async fn question_flow_snapshots() {
|
|||
|
||||
let question_reply_session = format!("question-reply-{}", config.agent.as_str());
|
||||
create_session(&app.app, config.agent, &question_reply_session, "plan").await;
|
||||
let reply_offset =
|
||||
drain_events(&app.app, &question_reply_session, Duration::from_secs(6)).await;
|
||||
let status = send_status(
|
||||
&app.app,
|
||||
Method::POST,
|
||||
|
|
@ -43,10 +41,9 @@ async fn question_flow_snapshots() {
|
|||
.await;
|
||||
assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt");
|
||||
|
||||
let question_events = poll_events_until_match_from(
|
||||
let question_events = poll_events_until_match(
|
||||
&app.app,
|
||||
&question_reply_session,
|
||||
reply_offset,
|
||||
Duration::from_secs(120),
|
||||
|events| find_question_id_and_answers(events).is_some() || should_stop(events),
|
||||
)
|
||||
|
|
@ -88,8 +85,6 @@ async fn question_flow_snapshots() {
|
|||
|
||||
let question_reject_session = format!("question-reject-{}", config.agent.as_str());
|
||||
create_session(&app.app, config.agent, &question_reject_session, "plan").await;
|
||||
let reject_offset =
|
||||
drain_events(&app.app, &question_reject_session, Duration::from_secs(6)).await;
|
||||
let status = send_status(
|
||||
&app.app,
|
||||
Method::POST,
|
||||
|
|
@ -99,10 +94,9 @@ async fn question_flow_snapshots() {
|
|||
.await;
|
||||
assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt reject");
|
||||
|
||||
let reject_events = poll_events_until_match_from(
|
||||
let reject_events = poll_events_until_match(
|
||||
&app.app,
|
||||
&question_reject_session,
|
||||
reject_offset,
|
||||
Duration::from_secs(120),
|
||||
|events| find_question_id_and_answers(events).is_some() || should_stop(events),
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,12 +1,8 @@
|
|||
// Reasoning capability checks are isolated from baseline snapshots.
|
||||
include!("../common/http.rs");
|
||||
|
||||
fn reasoning_prompt(agent: AgentId) -> &'static str {
|
||||
if agent == AgentId::Mock {
|
||||
"demo"
|
||||
} else {
|
||||
"Answer briefly and include your reasoning."
|
||||
}
|
||||
fn reasoning_prompt(_agent: AgentId) -> &'static str {
|
||||
"Answer briefly and include your reasoning."
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
|
|
@ -29,7 +25,6 @@ async fn reasoning_events_present() {
|
|||
let session_id = format!("reasoning-{}", config.agent.as_str());
|
||||
create_session(&app.app, config.agent, &session_id, test_permission_mode(config.agent))
|
||||
.await;
|
||||
let offset = drain_events(&app.app, &session_id, Duration::from_secs(6)).await;
|
||||
let status = send_status(
|
||||
&app.app,
|
||||
Method::POST,
|
||||
|
|
@ -39,10 +34,9 @@ async fn reasoning_events_present() {
|
|||
.await;
|
||||
assert_eq!(status, StatusCode::NO_CONTENT, "send reasoning prompt");
|
||||
|
||||
let events = poll_events_until_match_from(
|
||||
let events = poll_events_until_match(
|
||||
&app.app,
|
||||
&session_id,
|
||||
offset,
|
||||
Duration::from_secs(120),
|
||||
|events| events_have_content_type(events, "reasoning") || events.iter().any(is_error_event),
|
||||
)
|
||||
|
|
|
|||
|
|
@ -146,8 +146,6 @@ async fn run_concurrency_snapshot(app: &Router, config: &TestAgentConfig) {
|
|||
let perm_mode = test_permission_mode(config.agent);
|
||||
create_session(app, config.agent, &session_a, perm_mode).await;
|
||||
create_session(app, config.agent, &session_b, perm_mode).await;
|
||||
let offset_a = drain_events(app, &session_a, Duration::from_secs(6)).await;
|
||||
let offset_b = drain_events(app, &session_b, Duration::from_secs(6)).await;
|
||||
|
||||
let app_a = app.clone();
|
||||
let app_b = app.clone();
|
||||
|
|
@ -157,8 +155,8 @@ async fn run_concurrency_snapshot(app: &Router, config: &TestAgentConfig) {
|
|||
|
||||
let app_a = app.clone();
|
||||
let app_b = app.clone();
|
||||
let poll_a = poll_events_until_from(&app_a, &session_a, offset_a, Duration::from_secs(120));
|
||||
let poll_b = poll_events_until_from(&app_b, &session_b, offset_b, Duration::from_secs(120));
|
||||
let poll_a = poll_events_until(&app_a, &session_a, Duration::from_secs(120));
|
||||
let poll_b = poll_events_until(&app_b, &session_b, Duration::from_secs(120));
|
||||
let (events_a, events_b) = tokio::join!(poll_a, poll_b);
|
||||
let events_a = truncate_after_first_stop(&events_a);
|
||||
let events_b = truncate_after_first_stop(&events_b);
|
||||
|
|
|
|||
|
|
@ -2,19 +2,27 @@
|
|||
source: server/packages/sandbox-agent/tests/sessions/permissions.rs
|
||||
expression: value
|
||||
---
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: user
|
||||
status: in_progress
|
||||
- metadata: true
|
||||
seq: 1
|
||||
session: started
|
||||
type: session.started
|
||||
- metadata: true
|
||||
seq: 2
|
||||
session: started
|
||||
type: session.started
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: user
|
||||
status: in_progress
|
||||
seq: 3
|
||||
type: item.started
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 2
|
||||
seq: 4
|
||||
type: item.delta
|
||||
- item:
|
||||
content_types:
|
||||
|
|
@ -22,27 +30,11 @@ expression: value
|
|||
kind: message
|
||||
role: user
|
||||
status: completed
|
||||
seq: 3
|
||||
type: item.completed
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: assistant
|
||||
status: in_progress
|
||||
seq: 4
|
||||
type: item.started
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 5
|
||||
type: item.delta
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: assistant
|
||||
status: completed
|
||||
seq: 6
|
||||
type: item.completed
|
||||
- permission:
|
||||
action: command_execution
|
||||
id: "<redacted>"
|
||||
status: requested
|
||||
seq: 6
|
||||
type: permission.requested
|
||||
|
|
|
|||
|
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
source: server/packages/sandbox-agent/tests/sessions/permissions.rs
|
||||
expression: value
|
||||
---
|
||||
status: 204
|
||||
|
|
@ -2,19 +2,27 @@
|
|||
source: server/packages/sandbox-agent/tests/sessions/questions.rs
|
||||
expression: value
|
||||
---
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: user
|
||||
status: in_progress
|
||||
- metadata: true
|
||||
seq: 1
|
||||
session: started
|
||||
type: session.started
|
||||
- metadata: true
|
||||
seq: 2
|
||||
session: started
|
||||
type: session.started
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: user
|
||||
status: in_progress
|
||||
seq: 3
|
||||
type: item.started
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 2
|
||||
seq: 4
|
||||
type: item.delta
|
||||
- item:
|
||||
content_types:
|
||||
|
|
@ -22,27 +30,11 @@ expression: value
|
|||
kind: message
|
||||
role: user
|
||||
status: completed
|
||||
seq: 3
|
||||
type: item.completed
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: assistant
|
||||
status: in_progress
|
||||
seq: 4
|
||||
type: item.started
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 5
|
||||
type: item.delta
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: assistant
|
||||
status: completed
|
||||
seq: 6
|
||||
type: item.completed
|
||||
- question:
|
||||
id: "<redacted>"
|
||||
options: 2
|
||||
status: requested
|
||||
seq: 6
|
||||
type: question.requested
|
||||
|
|
|
|||
|
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
source: server/packages/sandbox-agent/tests/sessions/questions.rs
|
||||
expression: value
|
||||
---
|
||||
status: 204
|
||||
|
|
@ -2,19 +2,27 @@
|
|||
source: server/packages/sandbox-agent/tests/sessions/questions.rs
|
||||
expression: value
|
||||
---
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: user
|
||||
status: in_progress
|
||||
- metadata: true
|
||||
seq: 1
|
||||
session: started
|
||||
type: session.started
|
||||
- metadata: true
|
||||
seq: 2
|
||||
session: started
|
||||
type: session.started
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: user
|
||||
status: in_progress
|
||||
seq: 3
|
||||
type: item.started
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 2
|
||||
seq: 4
|
||||
type: item.delta
|
||||
- item:
|
||||
content_types:
|
||||
|
|
@ -22,27 +30,11 @@ expression: value
|
|||
kind: message
|
||||
role: user
|
||||
status: completed
|
||||
seq: 3
|
||||
type: item.completed
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: assistant
|
||||
status: in_progress
|
||||
seq: 4
|
||||
type: item.started
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 5
|
||||
type: item.delta
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: assistant
|
||||
status: completed
|
||||
seq: 6
|
||||
type: item.completed
|
||||
- question:
|
||||
id: "<redacted>"
|
||||
options: 2
|
||||
status: requested
|
||||
seq: 6
|
||||
type: question.requested
|
||||
|
|
|
|||
|
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
source: server/packages/sandbox-agent/tests/sessions/questions.rs
|
||||
expression: value
|
||||
---
|
||||
status: 204
|
||||
|
|
@ -3,19 +3,27 @@ source: server/packages/sandbox-agent/tests/sessions/session_lifecycle.rs
|
|||
expression: value
|
||||
---
|
||||
session_a:
|
||||
- metadata: true
|
||||
seq: 1
|
||||
session: started
|
||||
type: session.started
|
||||
- metadata: true
|
||||
seq: 2
|
||||
session: started
|
||||
type: session.started
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: user
|
||||
status: in_progress
|
||||
seq: 1
|
||||
seq: 3
|
||||
type: item.started
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 2
|
||||
seq: 4
|
||||
type: item.delta
|
||||
- item:
|
||||
content_types:
|
||||
|
|
@ -23,7 +31,7 @@ session_a:
|
|||
kind: message
|
||||
role: user
|
||||
status: completed
|
||||
seq: 3
|
||||
seq: 5
|
||||
type: item.completed
|
||||
- item:
|
||||
content_types:
|
||||
|
|
@ -31,13 +39,13 @@ session_a:
|
|||
kind: message
|
||||
role: assistant
|
||||
status: in_progress
|
||||
seq: 4
|
||||
seq: 6
|
||||
type: item.started
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 5
|
||||
seq: 7
|
||||
type: item.delta
|
||||
- item:
|
||||
content_types:
|
||||
|
|
@ -45,22 +53,30 @@ session_a:
|
|||
kind: message
|
||||
role: assistant
|
||||
status: completed
|
||||
seq: 6
|
||||
seq: 8
|
||||
type: item.completed
|
||||
session_b:
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: user
|
||||
status: in_progress
|
||||
- metadata: true
|
||||
seq: 1
|
||||
session: started
|
||||
type: session.started
|
||||
- metadata: true
|
||||
seq: 2
|
||||
session: started
|
||||
type: session.started
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: user
|
||||
status: in_progress
|
||||
seq: 3
|
||||
type: item.started
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 2
|
||||
seq: 4
|
||||
type: item.delta
|
||||
- item:
|
||||
content_types:
|
||||
|
|
@ -68,7 +84,7 @@ session_b:
|
|||
kind: message
|
||||
role: user
|
||||
status: completed
|
||||
seq: 3
|
||||
seq: 5
|
||||
type: item.completed
|
||||
- item:
|
||||
content_types:
|
||||
|
|
@ -76,13 +92,13 @@ session_b:
|
|||
kind: message
|
||||
role: assistant
|
||||
status: in_progress
|
||||
seq: 4
|
||||
seq: 6
|
||||
type: item.started
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 5
|
||||
seq: 7
|
||||
type: item.delta
|
||||
- item:
|
||||
content_types:
|
||||
|
|
@ -90,5 +106,5 @@ session_b:
|
|||
kind: message
|
||||
role: assistant
|
||||
status: completed
|
||||
seq: 6
|
||||
seq: 8
|
||||
type: item.completed
|
||||
|
|
|
|||
|
|
@ -2,19 +2,27 @@
|
|||
source: server/packages/sandbox-agent/tests/sessions/../common/http.rs
|
||||
expression: normalized
|
||||
---
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: user
|
||||
status: in_progress
|
||||
- metadata: true
|
||||
seq: 1
|
||||
session: started
|
||||
type: session.started
|
||||
- metadata: true
|
||||
seq: 2
|
||||
session: started
|
||||
type: session.started
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: user
|
||||
status: in_progress
|
||||
seq: 3
|
||||
type: item.started
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 2
|
||||
seq: 4
|
||||
type: item.delta
|
||||
- item:
|
||||
content_types:
|
||||
|
|
@ -22,7 +30,7 @@ expression: normalized
|
|||
kind: message
|
||||
role: user
|
||||
status: completed
|
||||
seq: 3
|
||||
seq: 5
|
||||
type: item.completed
|
||||
- item:
|
||||
content_types:
|
||||
|
|
@ -30,13 +38,13 @@ expression: normalized
|
|||
kind: message
|
||||
role: assistant
|
||||
status: in_progress
|
||||
seq: 4
|
||||
seq: 6
|
||||
type: item.started
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 5
|
||||
seq: 7
|
||||
type: item.delta
|
||||
- item:
|
||||
content_types:
|
||||
|
|
@ -44,5 +52,5 @@ expression: normalized
|
|||
kind: message
|
||||
role: assistant
|
||||
status: completed
|
||||
seq: 6
|
||||
seq: 8
|
||||
type: item.completed
|
||||
|
|
|
|||
|
|
@ -2,19 +2,27 @@
|
|||
source: server/packages/sandbox-agent/tests/sessions/../common/http.rs
|
||||
expression: normalized
|
||||
---
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: user
|
||||
status: in_progress
|
||||
- metadata: true
|
||||
seq: 1
|
||||
session: started
|
||||
type: session.started
|
||||
- metadata: true
|
||||
seq: 2
|
||||
session: started
|
||||
type: session.started
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: user
|
||||
status: in_progress
|
||||
seq: 3
|
||||
type: item.started
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 2
|
||||
seq: 4
|
||||
type: item.delta
|
||||
- item:
|
||||
content_types:
|
||||
|
|
@ -22,7 +30,7 @@ expression: normalized
|
|||
kind: message
|
||||
role: user
|
||||
status: completed
|
||||
seq: 3
|
||||
seq: 5
|
||||
type: item.completed
|
||||
- item:
|
||||
content_types:
|
||||
|
|
@ -30,13 +38,13 @@ expression: normalized
|
|||
kind: message
|
||||
role: assistant
|
||||
status: in_progress
|
||||
seq: 4
|
||||
seq: 6
|
||||
type: item.started
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 5
|
||||
seq: 7
|
||||
type: item.delta
|
||||
- item:
|
||||
content_types:
|
||||
|
|
@ -44,5 +52,5 @@ expression: normalized
|
|||
kind: message
|
||||
role: assistant
|
||||
status: completed
|
||||
seq: 6
|
||||
seq: 8
|
||||
type: item.completed
|
||||
|
|
|
|||
|
|
@ -1,12 +1,8 @@
|
|||
// Status capability checks are isolated from baseline snapshots.
|
||||
include!("../common/http.rs");
|
||||
|
||||
fn status_prompt(agent: AgentId) -> &'static str {
|
||||
if agent == AgentId::Mock {
|
||||
"status"
|
||||
} else {
|
||||
"Provide a short status update."
|
||||
}
|
||||
fn status_prompt(_agent: AgentId) -> &'static str {
|
||||
"Provide a short status update."
|
||||
}
|
||||
|
||||
fn events_have_status(events: &[Value]) -> bool {
|
||||
|
|
@ -34,7 +30,6 @@ async fn status_events_present() {
|
|||
let session_id = format!("status-{}", config.agent.as_str());
|
||||
create_session(&app.app, config.agent, &session_id, test_permission_mode(config.agent))
|
||||
.await;
|
||||
let offset = drain_events(&app.app, &session_id, Duration::from_secs(6)).await;
|
||||
let status = send_status(
|
||||
&app.app,
|
||||
Method::POST,
|
||||
|
|
@ -44,10 +39,9 @@ async fn status_events_present() {
|
|||
.await;
|
||||
assert_eq!(status, StatusCode::NO_CONTENT, "send status prompt");
|
||||
|
||||
let events = poll_events_until_match_from(
|
||||
let events = poll_events_until_match(
|
||||
&app.app,
|
||||
&session_id,
|
||||
offset,
|
||||
Duration::from_secs(120),
|
||||
|events| events_have_status(events) || events.iter().any(is_error_event),
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue