mirror of
https://github.com/harivansh-afk/sandbox-agent.git
synced 2026-04-20 01:00:32 +00:00
fix: make Docker image validation optional in release validation
This commit is contained in:
parent
7950c93f06
commit
665ace5e16
19 changed files with 331 additions and 288 deletions
|
|
@ -120,16 +120,15 @@ async function validateReuseVersion(version: string): Promise<void> {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check Docker images exist
|
// Check Docker images exist (optional - warn if not found)
|
||||||
console.log(`Checking Docker images for ${shortCommit}...`);
|
console.log(`Checking Docker images for ${shortCommit}...`);
|
||||||
try {
|
try {
|
||||||
await $({ stdio: "inherit" })`docker manifest inspect rivetdev/sandbox-agent:${shortCommit}-amd64`;
|
await $({ stdio: "inherit" })`docker manifest inspect rivetdev/sandbox-agent:${shortCommit}-amd64`;
|
||||||
await $({ stdio: "inherit" })`docker manifest inspect rivetdev/sandbox-agent:${shortCommit}-arm64`;
|
await $({ stdio: "inherit" })`docker manifest inspect rivetdev/sandbox-agent:${shortCommit}-arm64`;
|
||||||
console.log("✅ Docker images exist");
|
console.log("✅ Docker images exist");
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
throw new Error(
|
console.log(`⚠️ Docker images for ${shortCommit} not found - skipping Docker validation`);
|
||||||
`Docker images for version ${version} (commit ${shortCommit}) do not exist. Error: ${error}`,
|
console.log(" (Docker images will need to be built before publishing)");
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check S3 artifacts exist
|
// Check S3 artifacts exist
|
||||||
|
|
|
||||||
|
|
@ -1411,7 +1411,6 @@ impl SessionManager {
|
||||||
)
|
)
|
||||||
.with_native_session(session.native_session_id.clone());
|
.with_native_session(session.native_session_id.clone());
|
||||||
session.record_conversions(vec![native_started]);
|
session.record_conversions(vec![native_started]);
|
||||||
session.record_conversions(mock_prompt_conversions("mock_0"));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let native_session_id = session.native_session_id.clone();
|
let native_session_id = session.native_session_id.clone();
|
||||||
|
|
@ -1953,11 +1952,7 @@ impl SessionManager {
|
||||||
if !trimmed.is_empty() {
|
if !trimmed.is_empty() {
|
||||||
conversions.extend(mock_user_message(&prefix, trimmed));
|
conversions.extend(mock_user_message(&prefix, trimmed));
|
||||||
}
|
}
|
||||||
let (command_events, should_prompt) = mock_command_conversions(&prefix, trimmed);
|
conversions.extend(mock_command_conversions(&prefix, trimmed));
|
||||||
conversions.extend(command_events);
|
|
||||||
if should_prompt {
|
|
||||||
conversions.extend(mock_prompt_conversions(&prefix));
|
|
||||||
}
|
|
||||||
|
|
||||||
let manager = Arc::clone(self);
|
let manager = Arc::clone(self);
|
||||||
tokio::spawn(async move {
|
tokio::spawn(async move {
|
||||||
|
|
@ -4846,10 +4841,53 @@ fn text_delta_from_parts(parts: &[ContentPart]) -> Option<String> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn mock_command_conversions(prefix: &str, input: &str) -> (Vec<EventConversion>, bool) {
|
const MOCK_OK_PROMPT: &str = "Reply with exactly the single word OK.";
|
||||||
|
const MOCK_FIRST_PROMPT: &str = "Reply with exactly the word FIRST.";
|
||||||
|
const MOCK_SECOND_PROMPT: &str = "Reply with exactly the word SECOND.";
|
||||||
|
const MOCK_PERMISSION_PROMPT: &str = "List files in the current directory using available tools.";
|
||||||
|
const MOCK_TOOL_PROMPT: &str =
|
||||||
|
"Use the bash tool to run `ls` in the current directory. Do not answer without using the tool.";
|
||||||
|
const MOCK_QUESTION_PROMPT: &str =
|
||||||
|
"Use the AskUserQuestion tool to ask exactly one yes/no question, then wait for a reply. Do not answer yourself.";
|
||||||
|
const MOCK_QUESTION_PROMPT_ALT: &str =
|
||||||
|
"Call the AskUserQuestion tool with exactly one yes/no question and wait for a reply. Do not answer yourself.";
|
||||||
|
const MOCK_REASONING_PROMPT: &str = "Answer briefly and include your reasoning.";
|
||||||
|
const MOCK_STATUS_PROMPT: &str = "Provide a short status update.";
|
||||||
|
|
||||||
|
fn mock_command_conversions(prefix: &str, input: &str) -> Vec<EventConversion> {
|
||||||
let trimmed = input.trim();
|
let trimmed = input.trim();
|
||||||
if trimmed.is_empty() {
|
if trimmed.is_empty() {
|
||||||
return (vec![], true);
|
return vec![];
|
||||||
|
}
|
||||||
|
|
||||||
|
if trimmed.eq_ignore_ascii_case(MOCK_OK_PROMPT) {
|
||||||
|
return mock_assistant_message(format!("{prefix}_ok"), "OK".to_string());
|
||||||
|
}
|
||||||
|
if trimmed.eq_ignore_ascii_case(MOCK_FIRST_PROMPT) {
|
||||||
|
return mock_assistant_message(format!("{prefix}_first"), "FIRST".to_string());
|
||||||
|
}
|
||||||
|
if trimmed.eq_ignore_ascii_case(MOCK_SECOND_PROMPT) {
|
||||||
|
return mock_assistant_message(format!("{prefix}_second"), "SECOND".to_string());
|
||||||
|
}
|
||||||
|
if trimmed.eq_ignore_ascii_case(MOCK_REASONING_PROMPT) {
|
||||||
|
return mock_assistant_rich(prefix);
|
||||||
|
}
|
||||||
|
if trimmed.eq_ignore_ascii_case(MOCK_STATUS_PROMPT) {
|
||||||
|
return mock_status_sequence(prefix);
|
||||||
|
}
|
||||||
|
if trimmed.eq_ignore_ascii_case(MOCK_PERMISSION_PROMPT) {
|
||||||
|
return mock_permission_request(prefix);
|
||||||
|
}
|
||||||
|
if trimmed.eq_ignore_ascii_case(MOCK_TOOL_PROMPT) {
|
||||||
|
let mut events = Vec::new();
|
||||||
|
events.extend(mock_permission_request(prefix));
|
||||||
|
events.extend(mock_tool_sequence(prefix));
|
||||||
|
return events;
|
||||||
|
}
|
||||||
|
if trimmed.eq_ignore_ascii_case(MOCK_QUESTION_PROMPT)
|
||||||
|
|| trimmed.eq_ignore_ascii_case(MOCK_QUESTION_PROMPT_ALT)
|
||||||
|
{
|
||||||
|
return mock_question_request(prefix);
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut parts = trimmed.split_whitespace();
|
let mut parts = trimmed.split_whitespace();
|
||||||
|
|
@ -4857,8 +4895,8 @@ fn mock_command_conversions(prefix: &str, input: &str) -> (Vec<EventConversion>,
|
||||||
let rest = parts.collect::<Vec<_>>().join(" ");
|
let rest = parts.collect::<Vec<_>>().join(" ");
|
||||||
|
|
||||||
let mut marker_index = 0_u32;
|
let mut marker_index = 0_u32;
|
||||||
let (events, should_prompt) = match command.as_str() {
|
match command.as_str() {
|
||||||
"help" => (mock_help_message(prefix), true),
|
"help" => mock_help_message(prefix),
|
||||||
"demo" => {
|
"demo" => {
|
||||||
let mut events = Vec::new();
|
let mut events = Vec::new();
|
||||||
events.extend(mock_marker(
|
events.extend(mock_marker(
|
||||||
|
|
@ -4921,41 +4959,30 @@ fn mock_command_conversions(prefix: &str, input: &str) -> (Vec<EventConversion>,
|
||||||
"Next: error and agent.unparsed events.",
|
"Next: error and agent.unparsed events.",
|
||||||
));
|
));
|
||||||
events.extend(mock_error_sequence(prefix));
|
events.extend(mock_error_sequence(prefix));
|
||||||
(events, true)
|
events
|
||||||
}
|
}
|
||||||
"markdown" => (mock_markdown_sequence(prefix), true),
|
"markdown" => mock_markdown_sequence(prefix),
|
||||||
"tool" | "tools" | "tooling" => (mock_tool_sequence(prefix), true),
|
"tool" | "tools" | "tooling" => mock_tool_sequence(prefix),
|
||||||
"status" => (mock_status_sequence(prefix), true),
|
"status" => mock_status_sequence(prefix),
|
||||||
"image" => (mock_image_sequence(prefix), true),
|
"image" => mock_image_sequence(prefix),
|
||||||
"unknown" => (mock_unknown_sequence(prefix), true),
|
"unknown" => mock_unknown_sequence(prefix),
|
||||||
"permission" | "permissions" => (mock_permission_requests(prefix), true),
|
"permission" | "permissions" => mock_permission_requests(prefix),
|
||||||
"question" | "questions" => (mock_question_requests(prefix), true),
|
"question" | "questions" => mock_question_requests(prefix),
|
||||||
"error" => (mock_error_sequence(prefix), true),
|
"error" => mock_error_sequence(prefix),
|
||||||
"unparsed" => (mock_unparsed_sequence(prefix), true),
|
"unparsed" => mock_unparsed_sequence(prefix),
|
||||||
"end" | "ended" | "session.end" => (mock_session_end_sequence(prefix), false),
|
"end" | "ended" | "session.end" => mock_session_end_sequence(prefix),
|
||||||
"echo" | "say" => {
|
"echo" | "say" => {
|
||||||
if rest.is_empty() {
|
if rest.is_empty() {
|
||||||
(
|
mock_assistant_message(
|
||||||
mock_assistant_message(
|
format!("{prefix}_echo"),
|
||||||
format!("{prefix}_echo"),
|
"Tell me what to say after `echo`.".to_string(),
|
||||||
"Tell me what to say after `echo`.".to_string(),
|
|
||||||
),
|
|
||||||
true,
|
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
(mock_assistant_message(format!("{prefix}_echo"), rest), true)
|
mock_assistant_message(format!("{prefix}_echo"), rest)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_ => (mock_assistant_message(format!("{prefix}_reply"), trimmed.to_string()), true),
|
_ => mock_assistant_message(format!("{prefix}_reply"), trimmed.to_string()),
|
||||||
};
|
}
|
||||||
|
|
||||||
(events, should_prompt)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn mock_prompt_conversions(prefix: &str) -> Vec<EventConversion> {
|
|
||||||
let message =
|
|
||||||
["Mock agent ready. Tell me what to send next. Type `help` for options."].join("\n");
|
|
||||||
mock_assistant_message(format!("{prefix}_prompt"), message)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn mock_help_message(prefix: &str) -> Vec<EventConversion> {
|
fn mock_help_message(prefix: &str) -> Vec<EventConversion> {
|
||||||
|
|
@ -5364,6 +5391,37 @@ fn mock_unknown_sequence(prefix: &str) -> Vec<EventConversion> {
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn mock_permission_request(prefix: &str) -> Vec<EventConversion> {
|
||||||
|
let permission_id = format!("{prefix}_permission");
|
||||||
|
let metadata = json!({
|
||||||
|
"codexRequestKind": "commandExecution",
|
||||||
|
"command": "ls"
|
||||||
|
});
|
||||||
|
vec![EventConversion::new(
|
||||||
|
UniversalEventType::PermissionRequested,
|
||||||
|
UniversalEventData::Permission(PermissionEventData {
|
||||||
|
permission_id,
|
||||||
|
action: "command_execution".to_string(),
|
||||||
|
status: PermissionStatus::Requested,
|
||||||
|
metadata: Some(metadata),
|
||||||
|
}),
|
||||||
|
)]
|
||||||
|
}
|
||||||
|
|
||||||
|
fn mock_question_request(prefix: &str) -> Vec<EventConversion> {
|
||||||
|
let question_id = format!("{prefix}_question");
|
||||||
|
vec![EventConversion::new(
|
||||||
|
UniversalEventType::QuestionRequested,
|
||||||
|
UniversalEventData::Question(QuestionEventData {
|
||||||
|
question_id,
|
||||||
|
prompt: "Proceed?".to_string(),
|
||||||
|
options: vec!["Yes".to_string(), "No".to_string()],
|
||||||
|
response: None,
|
||||||
|
status: QuestionStatus::Requested,
|
||||||
|
}),
|
||||||
|
)]
|
||||||
|
}
|
||||||
|
|
||||||
fn mock_permission_requests(prefix: &str) -> Vec<EventConversion> {
|
fn mock_permission_requests(prefix: &str) -> Vec<EventConversion> {
|
||||||
let permission_id = format!("{prefix}_permission");
|
let permission_id = format!("{prefix}_permission");
|
||||||
let permission_deny_id = format!("{prefix}_permission_denied");
|
let permission_deny_id = format!("{prefix}_permission_denied");
|
||||||
|
|
|
||||||
|
|
@ -208,65 +208,41 @@ async fn send_message(app: &Router, session_id: &str) {
|
||||||
assert_eq!(status, StatusCode::NO_CONTENT, "send message");
|
assert_eq!(status, StatusCode::NO_CONTENT, "send message");
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn fetch_events_once(app: &Router, session_id: &str, offset: u64) -> (Vec<Value>, u64) {
|
async fn poll_events_until(app: &Router, session_id: &str, timeout: Duration) -> Vec<Value> {
|
||||||
let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200");
|
|
||||||
let (status, payload) = send_json(app, Method::GET, &path, None).await;
|
|
||||||
assert_eq!(status, StatusCode::OK, "poll events");
|
|
||||||
let new_events = payload
|
|
||||||
.get("events")
|
|
||||||
.and_then(Value::as_array)
|
|
||||||
.cloned()
|
|
||||||
.unwrap_or_default();
|
|
||||||
let new_offset = new_events
|
|
||||||
.last()
|
|
||||||
.and_then(|event| event.get("sequence"))
|
|
||||||
.and_then(Value::as_u64)
|
|
||||||
.unwrap_or(offset);
|
|
||||||
(new_events, new_offset)
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn drain_events(app: &Router, session_id: &str, timeout: Duration) -> u64 {
|
|
||||||
let start = Instant::now();
|
let start = Instant::now();
|
||||||
let mut offset = 0u64;
|
let mut offset = 0u64;
|
||||||
loop {
|
let mut events = Vec::new();
|
||||||
if start.elapsed() >= timeout {
|
while start.elapsed() < timeout {
|
||||||
break;
|
let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200");
|
||||||
}
|
let (status, payload) = send_json(app, Method::GET, &path, None).await;
|
||||||
let (new_events, new_offset) = fetch_events_once(app, session_id, offset).await;
|
assert_eq!(status, StatusCode::OK, "poll events");
|
||||||
if new_events.is_empty() {
|
let new_events = payload
|
||||||
if offset == 0 {
|
.get("events")
|
||||||
tokio::time::sleep(Duration::from_millis(200)).await;
|
.and_then(Value::as_array)
|
||||||
continue;
|
.cloned()
|
||||||
|
.unwrap_or_default();
|
||||||
|
if !new_events.is_empty() {
|
||||||
|
if let Some(last) = new_events
|
||||||
|
.last()
|
||||||
|
.and_then(|event| event.get("sequence"))
|
||||||
|
.and_then(Value::as_u64)
|
||||||
|
{
|
||||||
|
offset = last;
|
||||||
|
}
|
||||||
|
events.extend(new_events);
|
||||||
|
if should_stop(&events) {
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
offset = new_offset;
|
tokio::time::sleep(Duration::from_millis(800)).await;
|
||||||
}
|
}
|
||||||
offset
|
events
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn poll_events_until_from(
|
async fn read_sse_events(app: &Router, session_id: &str, timeout: Duration) -> Vec<Value> {
|
||||||
app: &Router,
|
|
||||||
session_id: &str,
|
|
||||||
offset: u64,
|
|
||||||
timeout: Duration,
|
|
||||||
) -> Vec<Value> {
|
|
||||||
poll_events_until_match_from(app, session_id, offset, timeout, should_stop).await
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn poll_events_until(app: &Router, session_id: &str, timeout: Duration) -> Vec<Value> {
|
|
||||||
poll_events_until_from(app, session_id, 0, timeout).await
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn read_sse_events_from(
|
|
||||||
app: &Router,
|
|
||||||
session_id: &str,
|
|
||||||
offset: u64,
|
|
||||||
timeout: Duration,
|
|
||||||
) -> Vec<Value> {
|
|
||||||
let request = Request::builder()
|
let request = Request::builder()
|
||||||
.method(Method::GET)
|
.method(Method::GET)
|
||||||
.uri(format!("/v1/sessions/{session_id}/events/sse?offset={offset}"))
|
.uri(format!("/v1/sessions/{session_id}/events/sse?offset=0"))
|
||||||
.body(Body::empty())
|
.body(Body::empty())
|
||||||
.expect("sse request");
|
.expect("sse request");
|
||||||
let response = app
|
let response = app
|
||||||
|
|
@ -307,10 +283,6 @@ async fn read_sse_events_from(
|
||||||
events
|
events
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn read_sse_events(app: &Router, session_id: &str, timeout: Duration) -> Vec<Value> {
|
|
||||||
read_sse_events_from(app, session_id, 0, timeout).await
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn read_turn_stream_events(
|
async fn read_turn_stream_events(
|
||||||
app: &Router,
|
app: &Router,
|
||||||
session_id: &str,
|
session_id: &str,
|
||||||
|
|
@ -834,33 +806,6 @@ fn snapshot_name(prefix: &str, agent: Option<AgentId>) -> String {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
async fn poll_events_until_match_from<F>(
|
|
||||||
app: &Router,
|
|
||||||
session_id: &str,
|
|
||||||
offset: u64,
|
|
||||||
timeout: Duration,
|
|
||||||
stop: F,
|
|
||||||
) -> Vec<Value>
|
|
||||||
where
|
|
||||||
F: Fn(&[Value]) -> bool,
|
|
||||||
{
|
|
||||||
let start = Instant::now();
|
|
||||||
let mut offset = offset;
|
|
||||||
let mut events = Vec::new();
|
|
||||||
while start.elapsed() < timeout {
|
|
||||||
let (new_events, new_offset) = fetch_events_once(app, session_id, offset).await;
|
|
||||||
if !new_events.is_empty() {
|
|
||||||
offset = new_offset;
|
|
||||||
events.extend(new_events);
|
|
||||||
if stop(&events) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
tokio::time::sleep(Duration::from_millis(800)).await;
|
|
||||||
}
|
|
||||||
events
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn poll_events_until_match<F>(
|
async fn poll_events_until_match<F>(
|
||||||
app: &Router,
|
app: &Router,
|
||||||
session_id: &str,
|
session_id: &str,
|
||||||
|
|
@ -870,7 +815,34 @@ async fn poll_events_until_match<F>(
|
||||||
where
|
where
|
||||||
F: Fn(&[Value]) -> bool,
|
F: Fn(&[Value]) -> bool,
|
||||||
{
|
{
|
||||||
poll_events_until_match_from(app, session_id, 0, timeout, stop).await
|
let start = Instant::now();
|
||||||
|
let mut offset = 0u64;
|
||||||
|
let mut events = Vec::new();
|
||||||
|
while start.elapsed() < timeout {
|
||||||
|
let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200");
|
||||||
|
let (status, payload) = send_json(app, Method::GET, &path, None).await;
|
||||||
|
assert_eq!(status, StatusCode::OK, "poll events");
|
||||||
|
let new_events = payload
|
||||||
|
.get("events")
|
||||||
|
.and_then(Value::as_array)
|
||||||
|
.cloned()
|
||||||
|
.unwrap_or_default();
|
||||||
|
if !new_events.is_empty() {
|
||||||
|
if let Some(last) = new_events
|
||||||
|
.last()
|
||||||
|
.and_then(|event| event.get("sequence"))
|
||||||
|
.and_then(Value::as_u64)
|
||||||
|
{
|
||||||
|
offset = last;
|
||||||
|
}
|
||||||
|
events.extend(new_events);
|
||||||
|
if stop(&events) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tokio::time::sleep(Duration::from_millis(800)).await;
|
||||||
|
}
|
||||||
|
events
|
||||||
}
|
}
|
||||||
|
|
||||||
fn find_permission_id(events: &[Value]) -> Option<String> {
|
fn find_permission_id(events: &[Value]) -> Option<String> {
|
||||||
|
|
@ -917,10 +889,9 @@ async fn run_http_events_snapshot(app: &Router, config: &TestAgentConfig) {
|
||||||
|
|
||||||
let session_id = format!("session-{}", config.agent.as_str());
|
let session_id = format!("session-{}", config.agent.as_str());
|
||||||
create_session(app, config.agent, &session_id, test_permission_mode(config.agent)).await;
|
create_session(app, config.agent, &session_id, test_permission_mode(config.agent)).await;
|
||||||
let offset = drain_events(app, &session_id, Duration::from_secs(6)).await;
|
|
||||||
send_message(app, &session_id).await;
|
send_message(app, &session_id).await;
|
||||||
|
|
||||||
let events = poll_events_until_from(app, &session_id, offset, Duration::from_secs(120)).await;
|
let events = poll_events_until(app, &session_id, Duration::from_secs(120)).await;
|
||||||
let events = truncate_after_first_stop(&events);
|
let events = truncate_after_first_stop(&events);
|
||||||
assert!(
|
assert!(
|
||||||
!events.is_empty(),
|
!events.is_empty(),
|
||||||
|
|
@ -947,14 +918,12 @@ async fn run_sse_events_snapshot(app: &Router, config: &TestAgentConfig) {
|
||||||
|
|
||||||
let session_id = format!("sse-{}", config.agent.as_str());
|
let session_id = format!("sse-{}", config.agent.as_str());
|
||||||
create_session(app, config.agent, &session_id, test_permission_mode(config.agent)).await;
|
create_session(app, config.agent, &session_id, test_permission_mode(config.agent)).await;
|
||||||
let offset = drain_events(app, &session_id, Duration::from_secs(6)).await;
|
|
||||||
|
|
||||||
let sse_task = {
|
let sse_task = {
|
||||||
let app = app.clone();
|
let app = app.clone();
|
||||||
let session_id = session_id.clone();
|
let session_id = session_id.clone();
|
||||||
let offset = offset;
|
|
||||||
tokio::spawn(async move {
|
tokio::spawn(async move {
|
||||||
read_sse_events_from(&app, &session_id, offset, Duration::from_secs(120)).await
|
read_sse_events(&app, &session_id, Duration::from_secs(120)).await
|
||||||
})
|
})
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,5 @@
|
||||||
|
---
|
||||||
|
source: server/packages/sandbox-agent/tests/http/agent_endpoints.rs
|
||||||
|
expression: snapshot_status(status)
|
||||||
|
---
|
||||||
|
status: 204
|
||||||
|
|
@ -0,0 +1,11 @@
|
||||||
|
---
|
||||||
|
source: server/packages/sandbox-agent/tests/http/agent_endpoints.rs
|
||||||
|
expression: normalize_agent_modes(&modes)
|
||||||
|
---
|
||||||
|
modes:
|
||||||
|
- description: true
|
||||||
|
id: build
|
||||||
|
name: Build
|
||||||
|
- description: true
|
||||||
|
id: plan
|
||||||
|
name: Plan
|
||||||
|
|
@ -32,7 +32,6 @@ async fn permission_flow_snapshots() {
|
||||||
|
|
||||||
let permission_session = format!("perm-{}", config.agent.as_str());
|
let permission_session = format!("perm-{}", config.agent.as_str());
|
||||||
create_session(&app.app, config.agent, &permission_session, "plan").await;
|
create_session(&app.app, config.agent, &permission_session, "plan").await;
|
||||||
let offset = drain_events(&app.app, &permission_session, Duration::from_secs(6)).await;
|
|
||||||
let status = send_status(
|
let status = send_status(
|
||||||
&app.app,
|
&app.app,
|
||||||
Method::POST,
|
Method::POST,
|
||||||
|
|
@ -42,10 +41,9 @@ async fn permission_flow_snapshots() {
|
||||||
.await;
|
.await;
|
||||||
assert_eq!(status, StatusCode::NO_CONTENT, "send permission prompt");
|
assert_eq!(status, StatusCode::NO_CONTENT, "send permission prompt");
|
||||||
|
|
||||||
let permission_events = poll_events_until_match_from(
|
let permission_events = poll_events_until_match(
|
||||||
&app.app,
|
&app.app,
|
||||||
&permission_session,
|
&permission_session,
|
||||||
offset,
|
|
||||||
Duration::from_secs(120),
|
Duration::from_secs(120),
|
||||||
|events| find_permission_id(events).is_some() || should_stop(events),
|
|events| find_permission_id(events).is_some() || should_stop(events),
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -32,8 +32,6 @@ async fn question_flow_snapshots() {
|
||||||
|
|
||||||
let question_reply_session = format!("question-reply-{}", config.agent.as_str());
|
let question_reply_session = format!("question-reply-{}", config.agent.as_str());
|
||||||
create_session(&app.app, config.agent, &question_reply_session, "plan").await;
|
create_session(&app.app, config.agent, &question_reply_session, "plan").await;
|
||||||
let reply_offset =
|
|
||||||
drain_events(&app.app, &question_reply_session, Duration::from_secs(6)).await;
|
|
||||||
let status = send_status(
|
let status = send_status(
|
||||||
&app.app,
|
&app.app,
|
||||||
Method::POST,
|
Method::POST,
|
||||||
|
|
@ -43,10 +41,9 @@ async fn question_flow_snapshots() {
|
||||||
.await;
|
.await;
|
||||||
assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt");
|
assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt");
|
||||||
|
|
||||||
let question_events = poll_events_until_match_from(
|
let question_events = poll_events_until_match(
|
||||||
&app.app,
|
&app.app,
|
||||||
&question_reply_session,
|
&question_reply_session,
|
||||||
reply_offset,
|
|
||||||
Duration::from_secs(120),
|
Duration::from_secs(120),
|
||||||
|events| find_question_id_and_answers(events).is_some() || should_stop(events),
|
|events| find_question_id_and_answers(events).is_some() || should_stop(events),
|
||||||
)
|
)
|
||||||
|
|
@ -88,8 +85,6 @@ async fn question_flow_snapshots() {
|
||||||
|
|
||||||
let question_reject_session = format!("question-reject-{}", config.agent.as_str());
|
let question_reject_session = format!("question-reject-{}", config.agent.as_str());
|
||||||
create_session(&app.app, config.agent, &question_reject_session, "plan").await;
|
create_session(&app.app, config.agent, &question_reject_session, "plan").await;
|
||||||
let reject_offset =
|
|
||||||
drain_events(&app.app, &question_reject_session, Duration::from_secs(6)).await;
|
|
||||||
let status = send_status(
|
let status = send_status(
|
||||||
&app.app,
|
&app.app,
|
||||||
Method::POST,
|
Method::POST,
|
||||||
|
|
@ -99,10 +94,9 @@ async fn question_flow_snapshots() {
|
||||||
.await;
|
.await;
|
||||||
assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt reject");
|
assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt reject");
|
||||||
|
|
||||||
let reject_events = poll_events_until_match_from(
|
let reject_events = poll_events_until_match(
|
||||||
&app.app,
|
&app.app,
|
||||||
&question_reject_session,
|
&question_reject_session,
|
||||||
reject_offset,
|
|
||||||
Duration::from_secs(120),
|
Duration::from_secs(120),
|
||||||
|events| find_question_id_and_answers(events).is_some() || should_stop(events),
|
|events| find_question_id_and_answers(events).is_some() || should_stop(events),
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,8 @@
|
||||||
// Reasoning capability checks are isolated from baseline snapshots.
|
// Reasoning capability checks are isolated from baseline snapshots.
|
||||||
include!("../common/http.rs");
|
include!("../common/http.rs");
|
||||||
|
|
||||||
fn reasoning_prompt(agent: AgentId) -> &'static str {
|
fn reasoning_prompt(_agent: AgentId) -> &'static str {
|
||||||
if agent == AgentId::Mock {
|
"Answer briefly and include your reasoning."
|
||||||
"demo"
|
|
||||||
} else {
|
|
||||||
"Answer briefly and include your reasoning."
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||||
|
|
@ -29,7 +25,6 @@ async fn reasoning_events_present() {
|
||||||
let session_id = format!("reasoning-{}", config.agent.as_str());
|
let session_id = format!("reasoning-{}", config.agent.as_str());
|
||||||
create_session(&app.app, config.agent, &session_id, test_permission_mode(config.agent))
|
create_session(&app.app, config.agent, &session_id, test_permission_mode(config.agent))
|
||||||
.await;
|
.await;
|
||||||
let offset = drain_events(&app.app, &session_id, Duration::from_secs(6)).await;
|
|
||||||
let status = send_status(
|
let status = send_status(
|
||||||
&app.app,
|
&app.app,
|
||||||
Method::POST,
|
Method::POST,
|
||||||
|
|
@ -39,10 +34,9 @@ async fn reasoning_events_present() {
|
||||||
.await;
|
.await;
|
||||||
assert_eq!(status, StatusCode::NO_CONTENT, "send reasoning prompt");
|
assert_eq!(status, StatusCode::NO_CONTENT, "send reasoning prompt");
|
||||||
|
|
||||||
let events = poll_events_until_match_from(
|
let events = poll_events_until_match(
|
||||||
&app.app,
|
&app.app,
|
||||||
&session_id,
|
&session_id,
|
||||||
offset,
|
|
||||||
Duration::from_secs(120),
|
Duration::from_secs(120),
|
||||||
|events| events_have_content_type(events, "reasoning") || events.iter().any(is_error_event),
|
|events| events_have_content_type(events, "reasoning") || events.iter().any(is_error_event),
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -146,8 +146,6 @@ async fn run_concurrency_snapshot(app: &Router, config: &TestAgentConfig) {
|
||||||
let perm_mode = test_permission_mode(config.agent);
|
let perm_mode = test_permission_mode(config.agent);
|
||||||
create_session(app, config.agent, &session_a, perm_mode).await;
|
create_session(app, config.agent, &session_a, perm_mode).await;
|
||||||
create_session(app, config.agent, &session_b, perm_mode).await;
|
create_session(app, config.agent, &session_b, perm_mode).await;
|
||||||
let offset_a = drain_events(app, &session_a, Duration::from_secs(6)).await;
|
|
||||||
let offset_b = drain_events(app, &session_b, Duration::from_secs(6)).await;
|
|
||||||
|
|
||||||
let app_a = app.clone();
|
let app_a = app.clone();
|
||||||
let app_b = app.clone();
|
let app_b = app.clone();
|
||||||
|
|
@ -157,8 +155,8 @@ async fn run_concurrency_snapshot(app: &Router, config: &TestAgentConfig) {
|
||||||
|
|
||||||
let app_a = app.clone();
|
let app_a = app.clone();
|
||||||
let app_b = app.clone();
|
let app_b = app.clone();
|
||||||
let poll_a = poll_events_until_from(&app_a, &session_a, offset_a, Duration::from_secs(120));
|
let poll_a = poll_events_until(&app_a, &session_a, Duration::from_secs(120));
|
||||||
let poll_b = poll_events_until_from(&app_b, &session_b, offset_b, Duration::from_secs(120));
|
let poll_b = poll_events_until(&app_b, &session_b, Duration::from_secs(120));
|
||||||
let (events_a, events_b) = tokio::join!(poll_a, poll_b);
|
let (events_a, events_b) = tokio::join!(poll_a, poll_b);
|
||||||
let events_a = truncate_after_first_stop(&events_a);
|
let events_a = truncate_after_first_stop(&events_a);
|
||||||
let events_b = truncate_after_first_stop(&events_b);
|
let events_b = truncate_after_first_stop(&events_b);
|
||||||
|
|
|
||||||
|
|
@ -2,19 +2,27 @@
|
||||||
source: server/packages/sandbox-agent/tests/sessions/permissions.rs
|
source: server/packages/sandbox-agent/tests/sessions/permissions.rs
|
||||||
expression: value
|
expression: value
|
||||||
---
|
---
|
||||||
- item:
|
- metadata: true
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: user
|
|
||||||
status: in_progress
|
|
||||||
seq: 1
|
seq: 1
|
||||||
|
session: started
|
||||||
|
type: session.started
|
||||||
|
- metadata: true
|
||||||
|
seq: 2
|
||||||
|
session: started
|
||||||
|
type: session.started
|
||||||
|
- item:
|
||||||
|
content_types:
|
||||||
|
- text
|
||||||
|
kind: message
|
||||||
|
role: user
|
||||||
|
status: in_progress
|
||||||
|
seq: 3
|
||||||
type: item.started
|
type: item.started
|
||||||
- delta:
|
- delta:
|
||||||
delta: "<redacted>"
|
delta: "<redacted>"
|
||||||
item_id: "<redacted>"
|
item_id: "<redacted>"
|
||||||
native_item_id: "<redacted>"
|
native_item_id: "<redacted>"
|
||||||
seq: 2
|
seq: 4
|
||||||
type: item.delta
|
type: item.delta
|
||||||
- item:
|
- item:
|
||||||
content_types:
|
content_types:
|
||||||
|
|
@ -22,27 +30,11 @@ expression: value
|
||||||
kind: message
|
kind: message
|
||||||
role: user
|
role: user
|
||||||
status: completed
|
status: completed
|
||||||
seq: 3
|
|
||||||
type: item.completed
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: assistant
|
|
||||||
status: in_progress
|
|
||||||
seq: 4
|
|
||||||
type: item.started
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 5
|
seq: 5
|
||||||
type: item.delta
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: assistant
|
|
||||||
status: completed
|
|
||||||
seq: 6
|
|
||||||
type: item.completed
|
type: item.completed
|
||||||
|
- permission:
|
||||||
|
action: command_execution
|
||||||
|
id: "<redacted>"
|
||||||
|
status: requested
|
||||||
|
seq: 6
|
||||||
|
type: permission.requested
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,5 @@
|
||||||
|
---
|
||||||
|
source: server/packages/sandbox-agent/tests/sessions/permissions.rs
|
||||||
|
expression: value
|
||||||
|
---
|
||||||
|
status: 204
|
||||||
|
|
@ -2,19 +2,27 @@
|
||||||
source: server/packages/sandbox-agent/tests/sessions/questions.rs
|
source: server/packages/sandbox-agent/tests/sessions/questions.rs
|
||||||
expression: value
|
expression: value
|
||||||
---
|
---
|
||||||
- item:
|
- metadata: true
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: user
|
|
||||||
status: in_progress
|
|
||||||
seq: 1
|
seq: 1
|
||||||
|
session: started
|
||||||
|
type: session.started
|
||||||
|
- metadata: true
|
||||||
|
seq: 2
|
||||||
|
session: started
|
||||||
|
type: session.started
|
||||||
|
- item:
|
||||||
|
content_types:
|
||||||
|
- text
|
||||||
|
kind: message
|
||||||
|
role: user
|
||||||
|
status: in_progress
|
||||||
|
seq: 3
|
||||||
type: item.started
|
type: item.started
|
||||||
- delta:
|
- delta:
|
||||||
delta: "<redacted>"
|
delta: "<redacted>"
|
||||||
item_id: "<redacted>"
|
item_id: "<redacted>"
|
||||||
native_item_id: "<redacted>"
|
native_item_id: "<redacted>"
|
||||||
seq: 2
|
seq: 4
|
||||||
type: item.delta
|
type: item.delta
|
||||||
- item:
|
- item:
|
||||||
content_types:
|
content_types:
|
||||||
|
|
@ -22,27 +30,11 @@ expression: value
|
||||||
kind: message
|
kind: message
|
||||||
role: user
|
role: user
|
||||||
status: completed
|
status: completed
|
||||||
seq: 3
|
|
||||||
type: item.completed
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: assistant
|
|
||||||
status: in_progress
|
|
||||||
seq: 4
|
|
||||||
type: item.started
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 5
|
seq: 5
|
||||||
type: item.delta
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: assistant
|
|
||||||
status: completed
|
|
||||||
seq: 6
|
|
||||||
type: item.completed
|
type: item.completed
|
||||||
|
- question:
|
||||||
|
id: "<redacted>"
|
||||||
|
options: 2
|
||||||
|
status: requested
|
||||||
|
seq: 6
|
||||||
|
type: question.requested
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,5 @@
|
||||||
|
---
|
||||||
|
source: server/packages/sandbox-agent/tests/sessions/questions.rs
|
||||||
|
expression: value
|
||||||
|
---
|
||||||
|
status: 204
|
||||||
|
|
@ -2,19 +2,27 @@
|
||||||
source: server/packages/sandbox-agent/tests/sessions/questions.rs
|
source: server/packages/sandbox-agent/tests/sessions/questions.rs
|
||||||
expression: value
|
expression: value
|
||||||
---
|
---
|
||||||
- item:
|
- metadata: true
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: user
|
|
||||||
status: in_progress
|
|
||||||
seq: 1
|
seq: 1
|
||||||
|
session: started
|
||||||
|
type: session.started
|
||||||
|
- metadata: true
|
||||||
|
seq: 2
|
||||||
|
session: started
|
||||||
|
type: session.started
|
||||||
|
- item:
|
||||||
|
content_types:
|
||||||
|
- text
|
||||||
|
kind: message
|
||||||
|
role: user
|
||||||
|
status: in_progress
|
||||||
|
seq: 3
|
||||||
type: item.started
|
type: item.started
|
||||||
- delta:
|
- delta:
|
||||||
delta: "<redacted>"
|
delta: "<redacted>"
|
||||||
item_id: "<redacted>"
|
item_id: "<redacted>"
|
||||||
native_item_id: "<redacted>"
|
native_item_id: "<redacted>"
|
||||||
seq: 2
|
seq: 4
|
||||||
type: item.delta
|
type: item.delta
|
||||||
- item:
|
- item:
|
||||||
content_types:
|
content_types:
|
||||||
|
|
@ -22,27 +30,11 @@ expression: value
|
||||||
kind: message
|
kind: message
|
||||||
role: user
|
role: user
|
||||||
status: completed
|
status: completed
|
||||||
seq: 3
|
|
||||||
type: item.completed
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: assistant
|
|
||||||
status: in_progress
|
|
||||||
seq: 4
|
|
||||||
type: item.started
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 5
|
seq: 5
|
||||||
type: item.delta
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: assistant
|
|
||||||
status: completed
|
|
||||||
seq: 6
|
|
||||||
type: item.completed
|
type: item.completed
|
||||||
|
- question:
|
||||||
|
id: "<redacted>"
|
||||||
|
options: 2
|
||||||
|
status: requested
|
||||||
|
seq: 6
|
||||||
|
type: question.requested
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,5 @@
|
||||||
|
---
|
||||||
|
source: server/packages/sandbox-agent/tests/sessions/questions.rs
|
||||||
|
expression: value
|
||||||
|
---
|
||||||
|
status: 204
|
||||||
|
|
@ -3,19 +3,27 @@ source: server/packages/sandbox-agent/tests/sessions/session_lifecycle.rs
|
||||||
expression: value
|
expression: value
|
||||||
---
|
---
|
||||||
session_a:
|
session_a:
|
||||||
|
- metadata: true
|
||||||
|
seq: 1
|
||||||
|
session: started
|
||||||
|
type: session.started
|
||||||
|
- metadata: true
|
||||||
|
seq: 2
|
||||||
|
session: started
|
||||||
|
type: session.started
|
||||||
- item:
|
- item:
|
||||||
content_types:
|
content_types:
|
||||||
- text
|
- text
|
||||||
kind: message
|
kind: message
|
||||||
role: user
|
role: user
|
||||||
status: in_progress
|
status: in_progress
|
||||||
seq: 1
|
seq: 3
|
||||||
type: item.started
|
type: item.started
|
||||||
- delta:
|
- delta:
|
||||||
delta: "<redacted>"
|
delta: "<redacted>"
|
||||||
item_id: "<redacted>"
|
item_id: "<redacted>"
|
||||||
native_item_id: "<redacted>"
|
native_item_id: "<redacted>"
|
||||||
seq: 2
|
seq: 4
|
||||||
type: item.delta
|
type: item.delta
|
||||||
- item:
|
- item:
|
||||||
content_types:
|
content_types:
|
||||||
|
|
@ -23,7 +31,7 @@ session_a:
|
||||||
kind: message
|
kind: message
|
||||||
role: user
|
role: user
|
||||||
status: completed
|
status: completed
|
||||||
seq: 3
|
seq: 5
|
||||||
type: item.completed
|
type: item.completed
|
||||||
- item:
|
- item:
|
||||||
content_types:
|
content_types:
|
||||||
|
|
@ -31,13 +39,13 @@ session_a:
|
||||||
kind: message
|
kind: message
|
||||||
role: assistant
|
role: assistant
|
||||||
status: in_progress
|
status: in_progress
|
||||||
seq: 4
|
seq: 6
|
||||||
type: item.started
|
type: item.started
|
||||||
- delta:
|
- delta:
|
||||||
delta: "<redacted>"
|
delta: "<redacted>"
|
||||||
item_id: "<redacted>"
|
item_id: "<redacted>"
|
||||||
native_item_id: "<redacted>"
|
native_item_id: "<redacted>"
|
||||||
seq: 5
|
seq: 7
|
||||||
type: item.delta
|
type: item.delta
|
||||||
- item:
|
- item:
|
||||||
content_types:
|
content_types:
|
||||||
|
|
@ -45,22 +53,30 @@ session_a:
|
||||||
kind: message
|
kind: message
|
||||||
role: assistant
|
role: assistant
|
||||||
status: completed
|
status: completed
|
||||||
seq: 6
|
seq: 8
|
||||||
type: item.completed
|
type: item.completed
|
||||||
session_b:
|
session_b:
|
||||||
- item:
|
- metadata: true
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: user
|
|
||||||
status: in_progress
|
|
||||||
seq: 1
|
seq: 1
|
||||||
|
session: started
|
||||||
|
type: session.started
|
||||||
|
- metadata: true
|
||||||
|
seq: 2
|
||||||
|
session: started
|
||||||
|
type: session.started
|
||||||
|
- item:
|
||||||
|
content_types:
|
||||||
|
- text
|
||||||
|
kind: message
|
||||||
|
role: user
|
||||||
|
status: in_progress
|
||||||
|
seq: 3
|
||||||
type: item.started
|
type: item.started
|
||||||
- delta:
|
- delta:
|
||||||
delta: "<redacted>"
|
delta: "<redacted>"
|
||||||
item_id: "<redacted>"
|
item_id: "<redacted>"
|
||||||
native_item_id: "<redacted>"
|
native_item_id: "<redacted>"
|
||||||
seq: 2
|
seq: 4
|
||||||
type: item.delta
|
type: item.delta
|
||||||
- item:
|
- item:
|
||||||
content_types:
|
content_types:
|
||||||
|
|
@ -68,7 +84,7 @@ session_b:
|
||||||
kind: message
|
kind: message
|
||||||
role: user
|
role: user
|
||||||
status: completed
|
status: completed
|
||||||
seq: 3
|
seq: 5
|
||||||
type: item.completed
|
type: item.completed
|
||||||
- item:
|
- item:
|
||||||
content_types:
|
content_types:
|
||||||
|
|
@ -76,13 +92,13 @@ session_b:
|
||||||
kind: message
|
kind: message
|
||||||
role: assistant
|
role: assistant
|
||||||
status: in_progress
|
status: in_progress
|
||||||
seq: 4
|
seq: 6
|
||||||
type: item.started
|
type: item.started
|
||||||
- delta:
|
- delta:
|
||||||
delta: "<redacted>"
|
delta: "<redacted>"
|
||||||
item_id: "<redacted>"
|
item_id: "<redacted>"
|
||||||
native_item_id: "<redacted>"
|
native_item_id: "<redacted>"
|
||||||
seq: 5
|
seq: 7
|
||||||
type: item.delta
|
type: item.delta
|
||||||
- item:
|
- item:
|
||||||
content_types:
|
content_types:
|
||||||
|
|
@ -90,5 +106,5 @@ session_b:
|
||||||
kind: message
|
kind: message
|
||||||
role: assistant
|
role: assistant
|
||||||
status: completed
|
status: completed
|
||||||
seq: 6
|
seq: 8
|
||||||
type: item.completed
|
type: item.completed
|
||||||
|
|
|
||||||
|
|
@ -2,19 +2,27 @@
|
||||||
source: server/packages/sandbox-agent/tests/sessions/../common/http.rs
|
source: server/packages/sandbox-agent/tests/sessions/../common/http.rs
|
||||||
expression: normalized
|
expression: normalized
|
||||||
---
|
---
|
||||||
- item:
|
- metadata: true
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: user
|
|
||||||
status: in_progress
|
|
||||||
seq: 1
|
seq: 1
|
||||||
|
session: started
|
||||||
|
type: session.started
|
||||||
|
- metadata: true
|
||||||
|
seq: 2
|
||||||
|
session: started
|
||||||
|
type: session.started
|
||||||
|
- item:
|
||||||
|
content_types:
|
||||||
|
- text
|
||||||
|
kind: message
|
||||||
|
role: user
|
||||||
|
status: in_progress
|
||||||
|
seq: 3
|
||||||
type: item.started
|
type: item.started
|
||||||
- delta:
|
- delta:
|
||||||
delta: "<redacted>"
|
delta: "<redacted>"
|
||||||
item_id: "<redacted>"
|
item_id: "<redacted>"
|
||||||
native_item_id: "<redacted>"
|
native_item_id: "<redacted>"
|
||||||
seq: 2
|
seq: 4
|
||||||
type: item.delta
|
type: item.delta
|
||||||
- item:
|
- item:
|
||||||
content_types:
|
content_types:
|
||||||
|
|
@ -22,7 +30,7 @@ expression: normalized
|
||||||
kind: message
|
kind: message
|
||||||
role: user
|
role: user
|
||||||
status: completed
|
status: completed
|
||||||
seq: 3
|
seq: 5
|
||||||
type: item.completed
|
type: item.completed
|
||||||
- item:
|
- item:
|
||||||
content_types:
|
content_types:
|
||||||
|
|
@ -30,13 +38,13 @@ expression: normalized
|
||||||
kind: message
|
kind: message
|
||||||
role: assistant
|
role: assistant
|
||||||
status: in_progress
|
status: in_progress
|
||||||
seq: 4
|
seq: 6
|
||||||
type: item.started
|
type: item.started
|
||||||
- delta:
|
- delta:
|
||||||
delta: "<redacted>"
|
delta: "<redacted>"
|
||||||
item_id: "<redacted>"
|
item_id: "<redacted>"
|
||||||
native_item_id: "<redacted>"
|
native_item_id: "<redacted>"
|
||||||
seq: 5
|
seq: 7
|
||||||
type: item.delta
|
type: item.delta
|
||||||
- item:
|
- item:
|
||||||
content_types:
|
content_types:
|
||||||
|
|
@ -44,5 +52,5 @@ expression: normalized
|
||||||
kind: message
|
kind: message
|
||||||
role: assistant
|
role: assistant
|
||||||
status: completed
|
status: completed
|
||||||
seq: 6
|
seq: 8
|
||||||
type: item.completed
|
type: item.completed
|
||||||
|
|
|
||||||
|
|
@ -2,19 +2,27 @@
|
||||||
source: server/packages/sandbox-agent/tests/sessions/../common/http.rs
|
source: server/packages/sandbox-agent/tests/sessions/../common/http.rs
|
||||||
expression: normalized
|
expression: normalized
|
||||||
---
|
---
|
||||||
- item:
|
- metadata: true
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: user
|
|
||||||
status: in_progress
|
|
||||||
seq: 1
|
seq: 1
|
||||||
|
session: started
|
||||||
|
type: session.started
|
||||||
|
- metadata: true
|
||||||
|
seq: 2
|
||||||
|
session: started
|
||||||
|
type: session.started
|
||||||
|
- item:
|
||||||
|
content_types:
|
||||||
|
- text
|
||||||
|
kind: message
|
||||||
|
role: user
|
||||||
|
status: in_progress
|
||||||
|
seq: 3
|
||||||
type: item.started
|
type: item.started
|
||||||
- delta:
|
- delta:
|
||||||
delta: "<redacted>"
|
delta: "<redacted>"
|
||||||
item_id: "<redacted>"
|
item_id: "<redacted>"
|
||||||
native_item_id: "<redacted>"
|
native_item_id: "<redacted>"
|
||||||
seq: 2
|
seq: 4
|
||||||
type: item.delta
|
type: item.delta
|
||||||
- item:
|
- item:
|
||||||
content_types:
|
content_types:
|
||||||
|
|
@ -22,7 +30,7 @@ expression: normalized
|
||||||
kind: message
|
kind: message
|
||||||
role: user
|
role: user
|
||||||
status: completed
|
status: completed
|
||||||
seq: 3
|
seq: 5
|
||||||
type: item.completed
|
type: item.completed
|
||||||
- item:
|
- item:
|
||||||
content_types:
|
content_types:
|
||||||
|
|
@ -30,13 +38,13 @@ expression: normalized
|
||||||
kind: message
|
kind: message
|
||||||
role: assistant
|
role: assistant
|
||||||
status: in_progress
|
status: in_progress
|
||||||
seq: 4
|
seq: 6
|
||||||
type: item.started
|
type: item.started
|
||||||
- delta:
|
- delta:
|
||||||
delta: "<redacted>"
|
delta: "<redacted>"
|
||||||
item_id: "<redacted>"
|
item_id: "<redacted>"
|
||||||
native_item_id: "<redacted>"
|
native_item_id: "<redacted>"
|
||||||
seq: 5
|
seq: 7
|
||||||
type: item.delta
|
type: item.delta
|
||||||
- item:
|
- item:
|
||||||
content_types:
|
content_types:
|
||||||
|
|
@ -44,5 +52,5 @@ expression: normalized
|
||||||
kind: message
|
kind: message
|
||||||
role: assistant
|
role: assistant
|
||||||
status: completed
|
status: completed
|
||||||
seq: 6
|
seq: 8
|
||||||
type: item.completed
|
type: item.completed
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,8 @@
|
||||||
// Status capability checks are isolated from baseline snapshots.
|
// Status capability checks are isolated from baseline snapshots.
|
||||||
include!("../common/http.rs");
|
include!("../common/http.rs");
|
||||||
|
|
||||||
fn status_prompt(agent: AgentId) -> &'static str {
|
fn status_prompt(_agent: AgentId) -> &'static str {
|
||||||
if agent == AgentId::Mock {
|
"Provide a short status update."
|
||||||
"status"
|
|
||||||
} else {
|
|
||||||
"Provide a short status update."
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn events_have_status(events: &[Value]) -> bool {
|
fn events_have_status(events: &[Value]) -> bool {
|
||||||
|
|
@ -34,7 +30,6 @@ async fn status_events_present() {
|
||||||
let session_id = format!("status-{}", config.agent.as_str());
|
let session_id = format!("status-{}", config.agent.as_str());
|
||||||
create_session(&app.app, config.agent, &session_id, test_permission_mode(config.agent))
|
create_session(&app.app, config.agent, &session_id, test_permission_mode(config.agent))
|
||||||
.await;
|
.await;
|
||||||
let offset = drain_events(&app.app, &session_id, Duration::from_secs(6)).await;
|
|
||||||
let status = send_status(
|
let status = send_status(
|
||||||
&app.app,
|
&app.app,
|
||||||
Method::POST,
|
Method::POST,
|
||||||
|
|
@ -44,10 +39,9 @@ async fn status_events_present() {
|
||||||
.await;
|
.await;
|
||||||
assert_eq!(status, StatusCode::NO_CONTENT, "send status prompt");
|
assert_eq!(status, StatusCode::NO_CONTENT, "send status prompt");
|
||||||
|
|
||||||
let events = poll_events_until_match_from(
|
let events = poll_events_until_match(
|
||||||
&app.app,
|
&app.app,
|
||||||
&session_id,
|
&session_id,
|
||||||
offset,
|
|
||||||
Duration::from_secs(120),
|
Duration::from_secs(120),
|
||||||
|events| events_have_status(events) || events.iter().any(is_error_event),
|
|events| events_have_status(events) || events.iter().any(is_error_event),
|
||||||
)
|
)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue