mirror of
https://github.com/harivansh-afk/sandbox-agent.git
synced 2026-04-15 08:03:46 +00:00
feat: add structured stderr output for error diagnostics (#29)
Add StderrOutput schema with head/tail/truncated/total_lines fields to provide better error diagnostics when agent processes fail.
This commit is contained in:
parent
82ac0b3880
commit
c7d6482fd4
9 changed files with 247 additions and 3 deletions
|
|
@ -1465,9 +1465,28 @@
|
|||
"terminated_by"
|
||||
],
|
||||
"properties": {
|
||||
"exit_code": {
|
||||
"type": "integer",
|
||||
"format": "int32",
|
||||
"description": "Process exit code when reason is Error",
|
||||
"nullable": true
|
||||
},
|
||||
"message": {
|
||||
"type": "string",
|
||||
"description": "Error message when reason is Error",
|
||||
"nullable": true
|
||||
},
|
||||
"reason": {
|
||||
"$ref": "#/components/schemas/SessionEndReason"
|
||||
},
|
||||
"stderr": {
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/StderrOutput"
|
||||
}
|
||||
],
|
||||
"nullable": true
|
||||
},
|
||||
"terminated_by": {
|
||||
"$ref": "#/components/schemas/TerminatedBy"
|
||||
}
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ Every event from the API is wrapped in a `UniversalEvent` envelope.
|
|||
| Type | Description | Data |
|
||||
|------|-------------|------|
|
||||
| `session.started` | Session has started | `{ metadata?: any }` |
|
||||
| `session.ended` | Session has ended | `{ reason, terminated_by }` |
|
||||
| `session.ended` | Session has ended | `{ reason, terminated_by, message?, exit_code? }` |
|
||||
|
||||
**SessionEndedData**
|
||||
|
||||
|
|
@ -54,6 +54,18 @@ Every event from the API is wrapped in a `UniversalEvent` envelope.
|
|||
|-------|------|--------|
|
||||
| `reason` | string | `completed`, `error`, `terminated` |
|
||||
| `terminated_by` | string | `agent`, `daemon` |
|
||||
| `message` | string? | Error message (only present when reason is `error`) |
|
||||
| `exit_code` | int? | Process exit code (only present when reason is `error`) |
|
||||
| `stderr` | StderrOutput? | Structured stderr output (only present when reason is `error`) |
|
||||
|
||||
**StderrOutput**
|
||||
|
||||
| Field | Type | Description |
|
||||
|-------|------|-------------|
|
||||
| `head` | string? | First 20 lines of stderr (if truncated) or full stderr (if not truncated) |
|
||||
| `tail` | string? | Last 50 lines of stderr (only present if truncated) |
|
||||
| `truncated` | boolean | Whether the output was truncated |
|
||||
| `total_lines` | int? | Total number of lines in stderr |
|
||||
|
||||
### Item Lifecycle
|
||||
|
||||
|
|
|
|||
|
|
@ -257,7 +257,15 @@ export interface components {
|
|||
/** @enum {string} */
|
||||
SessionEndReason: "completed" | "error" | "terminated";
|
||||
SessionEndedData: {
|
||||
/**
|
||||
* Format: int32
|
||||
* @description Process exit code when reason is Error
|
||||
*/
|
||||
exit_code?: number | null;
|
||||
/** @description Error message when reason is Error */
|
||||
message?: string | null;
|
||||
reason: components["schemas"]["SessionEndReason"];
|
||||
stderr?: components["schemas"]["StderrOutput"] | null;
|
||||
terminated_by: components["schemas"]["TerminatedBy"];
|
||||
};
|
||||
SessionInfo: {
|
||||
|
|
|
|||
|
|
@ -1,10 +1,15 @@
|
|||
use std::fs::OpenOptions;
|
||||
use std::fs::{File, OpenOptions};
|
||||
use std::io::{BufRead, BufReader};
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use sandbox_agent_error::SandboxError;
|
||||
use time::{Duration, OffsetDateTime};
|
||||
use sandbox_agent_universal_agent_schema::StderrOutput;
|
||||
|
||||
const LOG_RETENTION_DAYS: i64 = 7;
|
||||
const LOG_HEAD_LINES: usize = 20;
|
||||
const LOG_TAIL_LINES: usize = 50;
|
||||
const LOG_MAX_LINE_LENGTH: usize = 500;
|
||||
|
||||
pub struct AgentServerLogs {
|
||||
base_dir: PathBuf,
|
||||
|
|
@ -76,4 +81,70 @@ impl AgentServerLogs {
|
|||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Read stderr from the current log file for error diagnostics.
|
||||
/// Returns structured output with head/tail if truncated.
|
||||
pub fn read_stderr(&self) -> Option<StderrOutput> {
|
||||
let log_dir = self.base_dir.join(&self.agent);
|
||||
let now = OffsetDateTime::now_utc();
|
||||
let file_name = format!(
|
||||
"{}-{:04}-{:02}-{:02}.log",
|
||||
self.agent,
|
||||
now.year(),
|
||||
now.month() as u8,
|
||||
now.day()
|
||||
);
|
||||
let path = log_dir.join(file_name);
|
||||
|
||||
let file = File::open(&path).ok()?;
|
||||
let metadata = file.metadata().ok()?;
|
||||
let file_size = metadata.len();
|
||||
|
||||
if file_size == 0 {
|
||||
return None;
|
||||
}
|
||||
|
||||
let reader = BufReader::new(file);
|
||||
let mut all_lines: Vec<String> = Vec::new();
|
||||
|
||||
for line_result in reader.lines() {
|
||||
let line: String = match line_result {
|
||||
Ok(l) => l,
|
||||
Err(_) => break,
|
||||
};
|
||||
let truncated_line = if line.len() > LOG_MAX_LINE_LENGTH {
|
||||
format!("{}...", &line[..LOG_MAX_LINE_LENGTH])
|
||||
} else {
|
||||
line
|
||||
};
|
||||
all_lines.push(truncated_line);
|
||||
}
|
||||
|
||||
let line_count = all_lines.len();
|
||||
if line_count == 0 {
|
||||
return None;
|
||||
}
|
||||
|
||||
let max_untruncated = LOG_HEAD_LINES + LOG_TAIL_LINES;
|
||||
|
||||
if line_count <= max_untruncated {
|
||||
// Small file - return all content in head
|
||||
Some(StderrOutput {
|
||||
head: Some(all_lines.join("\n")),
|
||||
tail: None,
|
||||
truncated: false,
|
||||
total_lines: Some(line_count),
|
||||
})
|
||||
} else {
|
||||
// Large file - return head and tail separately
|
||||
let head = all_lines[..LOG_HEAD_LINES].join("\n");
|
||||
let tail = all_lines[line_count - LOG_TAIL_LINES..].join("\n");
|
||||
Some(StderrOutput {
|
||||
head: Some(head),
|
||||
tail: Some(tail),
|
||||
truncated: true,
|
||||
total_lines: Some(line_count),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,7 +24,8 @@ use sandbox_agent_universal_agent_schema::{
|
|||
AgentUnparsedData, ContentPart, ErrorData, EventConversion, EventSource, FileAction,
|
||||
ItemDeltaData, ItemEventData, ItemKind, ItemRole, ItemStatus, PermissionEventData,
|
||||
PermissionStatus, QuestionEventData, QuestionStatus, ReasoningVisibility, SessionEndReason,
|
||||
SessionEndedData, SessionStartedData, TerminatedBy, UniversalEvent, UniversalEventData,
|
||||
SessionEndedData, SessionStartedData, StderrOutput, TerminatedBy, UniversalEvent,
|
||||
UniversalEventData,
|
||||
UniversalEventType, UniversalItem,
|
||||
};
|
||||
use schemars::JsonSchema;
|
||||
|
|
@ -1377,6 +1378,7 @@ impl AgentServerManager {
|
|||
|
||||
let owner = { self.owner.lock().expect("owner lock").clone() };
|
||||
if let Some(owner) = owner.and_then(|weak| weak.upgrade()) {
|
||||
let logs = owner.read_agent_stderr(agent);
|
||||
for session_id in session_ids {
|
||||
owner
|
||||
.record_error(
|
||||
|
|
@ -1393,6 +1395,7 @@ impl AgentServerManager {
|
|||
message,
|
||||
SessionEndReason::Error,
|
||||
TerminatedBy::Daemon,
|
||||
logs.clone(),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
|
@ -1442,6 +1445,15 @@ impl SessionManager {
|
|||
sessions.iter_mut().find(|session| session.session_id == session_id)
|
||||
}
|
||||
|
||||
/// Read agent stderr for error diagnostics
|
||||
fn read_agent_stderr(&self, agent: AgentId) -> Option<StderrOutput> {
|
||||
let logs = AgentServerLogs::new(
|
||||
self.server_manager.log_base_dir.clone(),
|
||||
agent.as_str(),
|
||||
);
|
||||
logs.read_stderr()
|
||||
}
|
||||
|
||||
async fn create_session(
|
||||
self: &Arc<Self>,
|
||||
session_id: String,
|
||||
|
|
@ -1697,6 +1709,9 @@ impl SessionManager {
|
|||
UniversalEventData::SessionEnded(SessionEndedData {
|
||||
reason: SessionEndReason::Terminated,
|
||||
terminated_by: TerminatedBy::Daemon,
|
||||
message: None,
|
||||
exit_code: None,
|
||||
stderr: None,
|
||||
}),
|
||||
)
|
||||
.synthetic()
|
||||
|
|
@ -2232,6 +2247,7 @@ impl SessionManager {
|
|||
&message,
|
||||
SessionEndReason::Completed,
|
||||
TerminatedBy::Agent,
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
|
@ -2247,12 +2263,14 @@ impl SessionManager {
|
|||
)
|
||||
.await;
|
||||
}
|
||||
let logs = self.read_agent_stderr(agent);
|
||||
self.mark_session_ended(
|
||||
&session_id,
|
||||
status.code(),
|
||||
&message,
|
||||
SessionEndReason::Error,
|
||||
TerminatedBy::Agent,
|
||||
logs,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
|
@ -2267,12 +2285,14 @@ impl SessionManager {
|
|||
)
|
||||
.await;
|
||||
}
|
||||
let logs = self.read_agent_stderr(agent);
|
||||
self.mark_session_ended(
|
||||
&session_id,
|
||||
None,
|
||||
&message,
|
||||
SessionEndReason::Error,
|
||||
TerminatedBy::Daemon,
|
||||
logs,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
|
@ -2287,12 +2307,14 @@ impl SessionManager {
|
|||
)
|
||||
.await;
|
||||
}
|
||||
let logs = self.read_agent_stderr(agent);
|
||||
self.mark_session_ended(
|
||||
&session_id,
|
||||
None,
|
||||
&message,
|
||||
SessionEndReason::Error,
|
||||
TerminatedBy::Daemon,
|
||||
logs,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
|
@ -2419,6 +2441,7 @@ impl SessionManager {
|
|||
message: &str,
|
||||
reason: SessionEndReason,
|
||||
terminated_by: TerminatedBy,
|
||||
stderr: Option<StderrOutput>,
|
||||
) {
|
||||
let mut sessions = self.sessions.lock().await;
|
||||
if let Some(session) = Self::session_mut(&mut sessions, session_id) {
|
||||
|
|
@ -2431,11 +2454,20 @@ impl SessionManager {
|
|||
reason.clone(),
|
||||
terminated_by.clone(),
|
||||
);
|
||||
let (error_message, error_exit_code, error_stderr) =
|
||||
if reason == SessionEndReason::Error {
|
||||
(Some(message.to_string()), exit_code, stderr)
|
||||
} else {
|
||||
(None, None, None)
|
||||
};
|
||||
let ended = EventConversion::new(
|
||||
UniversalEventType::SessionEnded,
|
||||
UniversalEventData::SessionEnded(SessionEndedData {
|
||||
reason,
|
||||
terminated_by,
|
||||
message: error_message,
|
||||
exit_code: error_exit_code,
|
||||
stderr: error_stderr,
|
||||
}),
|
||||
)
|
||||
.synthetic()
|
||||
|
|
@ -2493,12 +2525,14 @@ impl SessionManager {
|
|||
None,
|
||||
)
|
||||
.await;
|
||||
let logs = self.read_agent_stderr(AgentId::Opencode);
|
||||
self.mark_session_ended(
|
||||
&session_id,
|
||||
None,
|
||||
"opencode server unavailable",
|
||||
SessionEndReason::Error,
|
||||
TerminatedBy::Daemon,
|
||||
logs,
|
||||
)
|
||||
.await;
|
||||
return;
|
||||
|
|
@ -2516,12 +2550,14 @@ impl SessionManager {
|
|||
None,
|
||||
)
|
||||
.await;
|
||||
let logs = self.read_agent_stderr(AgentId::Opencode);
|
||||
self.mark_session_ended(
|
||||
&session_id,
|
||||
None,
|
||||
"opencode sse connection failed",
|
||||
SessionEndReason::Error,
|
||||
TerminatedBy::Daemon,
|
||||
logs,
|
||||
)
|
||||
.await;
|
||||
return;
|
||||
|
|
@ -2538,12 +2574,14 @@ impl SessionManager {
|
|||
None,
|
||||
)
|
||||
.await;
|
||||
let logs = self.read_agent_stderr(AgentId::Opencode);
|
||||
self.mark_session_ended(
|
||||
&session_id,
|
||||
None,
|
||||
"opencode sse error",
|
||||
SessionEndReason::Error,
|
||||
TerminatedBy::Daemon,
|
||||
logs,
|
||||
)
|
||||
.await;
|
||||
return;
|
||||
|
|
@ -2562,12 +2600,14 @@ impl SessionManager {
|
|||
None,
|
||||
)
|
||||
.await;
|
||||
let logs = self.read_agent_stderr(AgentId::Opencode);
|
||||
self.mark_session_ended(
|
||||
&session_id,
|
||||
None,
|
||||
"opencode sse stream error",
|
||||
SessionEndReason::Error,
|
||||
TerminatedBy::Daemon,
|
||||
logs,
|
||||
)
|
||||
.await;
|
||||
return;
|
||||
|
|
@ -5804,6 +5844,9 @@ fn mock_session_end_sequence(_prefix: &str) -> Vec<EventConversion> {
|
|||
UniversalEventData::SessionEnded(SessionEndedData {
|
||||
reason: SessionEndReason::Completed,
|
||||
terminated_by: TerminatedBy::Agent,
|
||||
message: None,
|
||||
exit_code: None,
|
||||
stderr: None,
|
||||
}),
|
||||
)
|
||||
.synthetic()]
|
||||
|
|
|
|||
|
|
@ -109,6 +109,9 @@ pub fn event_to_universal(event: &schema::StreamJsonMessage) -> Result<Vec<Event
|
|||
UniversalEventData::SessionEnded(SessionEndedData {
|
||||
reason: SessionEndReason::Completed,
|
||||
terminated_by: TerminatedBy::Agent,
|
||||
message: None,
|
||||
exit_code: None,
|
||||
stderr: None,
|
||||
}),
|
||||
)
|
||||
.with_raw(serde_json::to_value(event).ok()),
|
||||
|
|
|
|||
|
|
@ -521,6 +521,9 @@ pub fn session_ended_event(thread_id: &str, reason: SessionEndReason) -> EventCo
|
|||
UniversalEventData::SessionEnded(SessionEndedData {
|
||||
reason,
|
||||
terminated_by: TerminatedBy::Agent,
|
||||
message: None,
|
||||
exit_code: None,
|
||||
stderr: None,
|
||||
}),
|
||||
)
|
||||
.with_native_session(Some(thread_id.to_string()))
|
||||
|
|
|
|||
|
|
@ -79,9 +79,33 @@ pub struct SessionStartedData {
|
|||
pub struct SessionEndedData {
|
||||
pub reason: SessionEndReason,
|
||||
pub terminated_by: TerminatedBy,
|
||||
/// Error message when reason is Error
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub message: Option<String>,
|
||||
/// Process exit code when reason is Error
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub exit_code: Option<i32>,
|
||||
/// Agent stderr output when reason is Error
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub stderr: Option<StderrOutput>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)]
|
||||
pub struct StderrOutput {
|
||||
/// First N lines of stderr (if truncated) or full stderr (if not truncated)
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub head: Option<String>,
|
||||
/// Last N lines of stderr (only present if truncated)
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub tail: Option<String>,
|
||||
/// Whether the output was truncated
|
||||
pub truncated: bool,
|
||||
/// Total number of lines in stderr
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub total_lines: Option<usize>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema, ToSchema)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum SessionEndReason {
|
||||
Completed,
|
||||
|
|
|
|||
|
|
@ -436,9 +436,35 @@
|
|||
"terminated_by"
|
||||
],
|
||||
"properties": {
|
||||
"exit_code": {
|
||||
"description": "Process exit code when reason is Error",
|
||||
"type": [
|
||||
"integer",
|
||||
"null"
|
||||
],
|
||||
"format": "int32"
|
||||
},
|
||||
"message": {
|
||||
"description": "Error message when reason is Error",
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"reason": {
|
||||
"$ref": "#/definitions/SessionEndReason"
|
||||
},
|
||||
"stderr": {
|
||||
"description": "Agent stderr output when reason is Error",
|
||||
"anyOf": [
|
||||
{
|
||||
"$ref": "#/definitions/StderrOutput"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
},
|
||||
"terminated_by": {
|
||||
"$ref": "#/definitions/TerminatedBy"
|
||||
}
|
||||
|
|
@ -450,6 +476,41 @@
|
|||
"metadata": true
|
||||
}
|
||||
},
|
||||
"StderrOutput": {
|
||||
"type": "object",
|
||||
"required": [
|
||||
"truncated"
|
||||
],
|
||||
"properties": {
|
||||
"head": {
|
||||
"description": "First N lines of stderr (if truncated) or full stderr (if not truncated)",
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"tail": {
|
||||
"description": "Last N lines of stderr (only present if truncated)",
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"total_lines": {
|
||||
"description": "Total number of lines in stderr",
|
||||
"type": [
|
||||
"integer",
|
||||
"null"
|
||||
],
|
||||
"format": "uint",
|
||||
"minimum": 0.0
|
||||
},
|
||||
"truncated": {
|
||||
"description": "Whether the output was truncated",
|
||||
"type": "boolean"
|
||||
}
|
||||
}
|
||||
},
|
||||
"TerminatedBy": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue