feat: add structured stderr output for error diagnostics (#29)

Add StderrOutput schema with head/tail/truncated/total_lines fields to
provide better error diagnostics when agent processes fail.
This commit is contained in:
Nathan Flurry 2026-01-29 07:18:56 -08:00 committed by GitHub
parent 82ac0b3880
commit c7d6482fd4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 247 additions and 3 deletions

View file

@ -1465,9 +1465,28 @@
"terminated_by" "terminated_by"
], ],
"properties": { "properties": {
"exit_code": {
"type": "integer",
"format": "int32",
"description": "Process exit code when reason is Error",
"nullable": true
},
"message": {
"type": "string",
"description": "Error message when reason is Error",
"nullable": true
},
"reason": { "reason": {
"$ref": "#/components/schemas/SessionEndReason" "$ref": "#/components/schemas/SessionEndReason"
}, },
"stderr": {
"allOf": [
{
"$ref": "#/components/schemas/StderrOutput"
}
],
"nullable": true
},
"terminated_by": { "terminated_by": {
"$ref": "#/components/schemas/TerminatedBy" "$ref": "#/components/schemas/TerminatedBy"
} }

View file

@ -46,7 +46,7 @@ Every event from the API is wrapped in a `UniversalEvent` envelope.
| Type | Description | Data | | Type | Description | Data |
|------|-------------|------| |------|-------------|------|
| `session.started` | Session has started | `{ metadata?: any }` | | `session.started` | Session has started | `{ metadata?: any }` |
| `session.ended` | Session has ended | `{ reason, terminated_by }` | | `session.ended` | Session has ended | `{ reason, terminated_by, message?, exit_code? }` |
**SessionEndedData** **SessionEndedData**
@ -54,6 +54,18 @@ Every event from the API is wrapped in a `UniversalEvent` envelope.
|-------|------|--------| |-------|------|--------|
| `reason` | string | `completed`, `error`, `terminated` | | `reason` | string | `completed`, `error`, `terminated` |
| `terminated_by` | string | `agent`, `daemon` | | `terminated_by` | string | `agent`, `daemon` |
| `message` | string? | Error message (only present when reason is `error`) |
| `exit_code` | int? | Process exit code (only present when reason is `error`) |
| `stderr` | StderrOutput? | Structured stderr output (only present when reason is `error`) |
**StderrOutput**
| Field | Type | Description |
|-------|------|-------------|
| `head` | string? | First 20 lines of stderr (if truncated) or full stderr (if not truncated) |
| `tail` | string? | Last 50 lines of stderr (only present if truncated) |
| `truncated` | boolean | Whether the output was truncated |
| `total_lines` | int? | Total number of lines in stderr |
### Item Lifecycle ### Item Lifecycle

View file

@ -257,7 +257,15 @@ export interface components {
/** @enum {string} */ /** @enum {string} */
SessionEndReason: "completed" | "error" | "terminated"; SessionEndReason: "completed" | "error" | "terminated";
SessionEndedData: { SessionEndedData: {
/**
* Format: int32
* @description Process exit code when reason is Error
*/
exit_code?: number | null;
/** @description Error message when reason is Error */
message?: string | null;
reason: components["schemas"]["SessionEndReason"]; reason: components["schemas"]["SessionEndReason"];
stderr?: components["schemas"]["StderrOutput"] | null;
terminated_by: components["schemas"]["TerminatedBy"]; terminated_by: components["schemas"]["TerminatedBy"];
}; };
SessionInfo: { SessionInfo: {

View file

@ -1,10 +1,15 @@
use std::fs::OpenOptions; use std::fs::{File, OpenOptions};
use std::io::{BufRead, BufReader};
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use sandbox_agent_error::SandboxError; use sandbox_agent_error::SandboxError;
use time::{Duration, OffsetDateTime}; use time::{Duration, OffsetDateTime};
use sandbox_agent_universal_agent_schema::StderrOutput;
const LOG_RETENTION_DAYS: i64 = 7; const LOG_RETENTION_DAYS: i64 = 7;
const LOG_HEAD_LINES: usize = 20;
const LOG_TAIL_LINES: usize = 50;
const LOG_MAX_LINE_LENGTH: usize = 500;
pub struct AgentServerLogs { pub struct AgentServerLogs {
base_dir: PathBuf, base_dir: PathBuf,
@ -76,4 +81,70 @@ impl AgentServerLogs {
Ok(()) Ok(())
} }
/// Read stderr from the current log file for error diagnostics.
/// Returns structured output with head/tail if truncated.
pub fn read_stderr(&self) -> Option<StderrOutput> {
let log_dir = self.base_dir.join(&self.agent);
let now = OffsetDateTime::now_utc();
let file_name = format!(
"{}-{:04}-{:02}-{:02}.log",
self.agent,
now.year(),
now.month() as u8,
now.day()
);
let path = log_dir.join(file_name);
let file = File::open(&path).ok()?;
let metadata = file.metadata().ok()?;
let file_size = metadata.len();
if file_size == 0 {
return None;
}
let reader = BufReader::new(file);
let mut all_lines: Vec<String> = Vec::new();
for line_result in reader.lines() {
let line: String = match line_result {
Ok(l) => l,
Err(_) => break,
};
let truncated_line = if line.len() > LOG_MAX_LINE_LENGTH {
format!("{}...", &line[..LOG_MAX_LINE_LENGTH])
} else {
line
};
all_lines.push(truncated_line);
}
let line_count = all_lines.len();
if line_count == 0 {
return None;
}
let max_untruncated = LOG_HEAD_LINES + LOG_TAIL_LINES;
if line_count <= max_untruncated {
// Small file - return all content in head
Some(StderrOutput {
head: Some(all_lines.join("\n")),
tail: None,
truncated: false,
total_lines: Some(line_count),
})
} else {
// Large file - return head and tail separately
let head = all_lines[..LOG_HEAD_LINES].join("\n");
let tail = all_lines[line_count - LOG_TAIL_LINES..].join("\n");
Some(StderrOutput {
head: Some(head),
tail: Some(tail),
truncated: true,
total_lines: Some(line_count),
})
}
}
} }

View file

@ -24,7 +24,8 @@ use sandbox_agent_universal_agent_schema::{
AgentUnparsedData, ContentPart, ErrorData, EventConversion, EventSource, FileAction, AgentUnparsedData, ContentPart, ErrorData, EventConversion, EventSource, FileAction,
ItemDeltaData, ItemEventData, ItemKind, ItemRole, ItemStatus, PermissionEventData, ItemDeltaData, ItemEventData, ItemKind, ItemRole, ItemStatus, PermissionEventData,
PermissionStatus, QuestionEventData, QuestionStatus, ReasoningVisibility, SessionEndReason, PermissionStatus, QuestionEventData, QuestionStatus, ReasoningVisibility, SessionEndReason,
SessionEndedData, SessionStartedData, TerminatedBy, UniversalEvent, UniversalEventData, SessionEndedData, SessionStartedData, StderrOutput, TerminatedBy, UniversalEvent,
UniversalEventData,
UniversalEventType, UniversalItem, UniversalEventType, UniversalItem,
}; };
use schemars::JsonSchema; use schemars::JsonSchema;
@ -1377,6 +1378,7 @@ impl AgentServerManager {
let owner = { self.owner.lock().expect("owner lock").clone() }; let owner = { self.owner.lock().expect("owner lock").clone() };
if let Some(owner) = owner.and_then(|weak| weak.upgrade()) { if let Some(owner) = owner.and_then(|weak| weak.upgrade()) {
let logs = owner.read_agent_stderr(agent);
for session_id in session_ids { for session_id in session_ids {
owner owner
.record_error( .record_error(
@ -1393,6 +1395,7 @@ impl AgentServerManager {
message, message,
SessionEndReason::Error, SessionEndReason::Error,
TerminatedBy::Daemon, TerminatedBy::Daemon,
logs.clone(),
) )
.await; .await;
} }
@ -1442,6 +1445,15 @@ impl SessionManager {
sessions.iter_mut().find(|session| session.session_id == session_id) sessions.iter_mut().find(|session| session.session_id == session_id)
} }
/// Read agent stderr for error diagnostics
fn read_agent_stderr(&self, agent: AgentId) -> Option<StderrOutput> {
let logs = AgentServerLogs::new(
self.server_manager.log_base_dir.clone(),
agent.as_str(),
);
logs.read_stderr()
}
async fn create_session( async fn create_session(
self: &Arc<Self>, self: &Arc<Self>,
session_id: String, session_id: String,
@ -1697,6 +1709,9 @@ impl SessionManager {
UniversalEventData::SessionEnded(SessionEndedData { UniversalEventData::SessionEnded(SessionEndedData {
reason: SessionEndReason::Terminated, reason: SessionEndReason::Terminated,
terminated_by: TerminatedBy::Daemon, terminated_by: TerminatedBy::Daemon,
message: None,
exit_code: None,
stderr: None,
}), }),
) )
.synthetic() .synthetic()
@ -2232,6 +2247,7 @@ impl SessionManager {
&message, &message,
SessionEndReason::Completed, SessionEndReason::Completed,
TerminatedBy::Agent, TerminatedBy::Agent,
None,
) )
.await; .await;
} }
@ -2247,12 +2263,14 @@ impl SessionManager {
) )
.await; .await;
} }
let logs = self.read_agent_stderr(agent);
self.mark_session_ended( self.mark_session_ended(
&session_id, &session_id,
status.code(), status.code(),
&message, &message,
SessionEndReason::Error, SessionEndReason::Error,
TerminatedBy::Agent, TerminatedBy::Agent,
logs,
) )
.await; .await;
} }
@ -2267,12 +2285,14 @@ impl SessionManager {
) )
.await; .await;
} }
let logs = self.read_agent_stderr(agent);
self.mark_session_ended( self.mark_session_ended(
&session_id, &session_id,
None, None,
&message, &message,
SessionEndReason::Error, SessionEndReason::Error,
TerminatedBy::Daemon, TerminatedBy::Daemon,
logs,
) )
.await; .await;
} }
@ -2287,12 +2307,14 @@ impl SessionManager {
) )
.await; .await;
} }
let logs = self.read_agent_stderr(agent);
self.mark_session_ended( self.mark_session_ended(
&session_id, &session_id,
None, None,
&message, &message,
SessionEndReason::Error, SessionEndReason::Error,
TerminatedBy::Daemon, TerminatedBy::Daemon,
logs,
) )
.await; .await;
} }
@ -2419,6 +2441,7 @@ impl SessionManager {
message: &str, message: &str,
reason: SessionEndReason, reason: SessionEndReason,
terminated_by: TerminatedBy, terminated_by: TerminatedBy,
stderr: Option<StderrOutput>,
) { ) {
let mut sessions = self.sessions.lock().await; let mut sessions = self.sessions.lock().await;
if let Some(session) = Self::session_mut(&mut sessions, session_id) { if let Some(session) = Self::session_mut(&mut sessions, session_id) {
@ -2431,11 +2454,20 @@ impl SessionManager {
reason.clone(), reason.clone(),
terminated_by.clone(), terminated_by.clone(),
); );
let (error_message, error_exit_code, error_stderr) =
if reason == SessionEndReason::Error {
(Some(message.to_string()), exit_code, stderr)
} else {
(None, None, None)
};
let ended = EventConversion::new( let ended = EventConversion::new(
UniversalEventType::SessionEnded, UniversalEventType::SessionEnded,
UniversalEventData::SessionEnded(SessionEndedData { UniversalEventData::SessionEnded(SessionEndedData {
reason, reason,
terminated_by, terminated_by,
message: error_message,
exit_code: error_exit_code,
stderr: error_stderr,
}), }),
) )
.synthetic() .synthetic()
@ -2493,12 +2525,14 @@ impl SessionManager {
None, None,
) )
.await; .await;
let logs = self.read_agent_stderr(AgentId::Opencode);
self.mark_session_ended( self.mark_session_ended(
&session_id, &session_id,
None, None,
"opencode server unavailable", "opencode server unavailable",
SessionEndReason::Error, SessionEndReason::Error,
TerminatedBy::Daemon, TerminatedBy::Daemon,
logs,
) )
.await; .await;
return; return;
@ -2516,12 +2550,14 @@ impl SessionManager {
None, None,
) )
.await; .await;
let logs = self.read_agent_stderr(AgentId::Opencode);
self.mark_session_ended( self.mark_session_ended(
&session_id, &session_id,
None, None,
"opencode sse connection failed", "opencode sse connection failed",
SessionEndReason::Error, SessionEndReason::Error,
TerminatedBy::Daemon, TerminatedBy::Daemon,
logs,
) )
.await; .await;
return; return;
@ -2538,12 +2574,14 @@ impl SessionManager {
None, None,
) )
.await; .await;
let logs = self.read_agent_stderr(AgentId::Opencode);
self.mark_session_ended( self.mark_session_ended(
&session_id, &session_id,
None, None,
"opencode sse error", "opencode sse error",
SessionEndReason::Error, SessionEndReason::Error,
TerminatedBy::Daemon, TerminatedBy::Daemon,
logs,
) )
.await; .await;
return; return;
@ -2562,12 +2600,14 @@ impl SessionManager {
None, None,
) )
.await; .await;
let logs = self.read_agent_stderr(AgentId::Opencode);
self.mark_session_ended( self.mark_session_ended(
&session_id, &session_id,
None, None,
"opencode sse stream error", "opencode sse stream error",
SessionEndReason::Error, SessionEndReason::Error,
TerminatedBy::Daemon, TerminatedBy::Daemon,
logs,
) )
.await; .await;
return; return;
@ -5804,6 +5844,9 @@ fn mock_session_end_sequence(_prefix: &str) -> Vec<EventConversion> {
UniversalEventData::SessionEnded(SessionEndedData { UniversalEventData::SessionEnded(SessionEndedData {
reason: SessionEndReason::Completed, reason: SessionEndReason::Completed,
terminated_by: TerminatedBy::Agent, terminated_by: TerminatedBy::Agent,
message: None,
exit_code: None,
stderr: None,
}), }),
) )
.synthetic()] .synthetic()]

View file

@ -109,6 +109,9 @@ pub fn event_to_universal(event: &schema::StreamJsonMessage) -> Result<Vec<Event
UniversalEventData::SessionEnded(SessionEndedData { UniversalEventData::SessionEnded(SessionEndedData {
reason: SessionEndReason::Completed, reason: SessionEndReason::Completed,
terminated_by: TerminatedBy::Agent, terminated_by: TerminatedBy::Agent,
message: None,
exit_code: None,
stderr: None,
}), }),
) )
.with_raw(serde_json::to_value(event).ok()), .with_raw(serde_json::to_value(event).ok()),

View file

@ -521,6 +521,9 @@ pub fn session_ended_event(thread_id: &str, reason: SessionEndReason) -> EventCo
UniversalEventData::SessionEnded(SessionEndedData { UniversalEventData::SessionEnded(SessionEndedData {
reason, reason,
terminated_by: TerminatedBy::Agent, terminated_by: TerminatedBy::Agent,
message: None,
exit_code: None,
stderr: None,
}), }),
) )
.with_native_session(Some(thread_id.to_string())) .with_native_session(Some(thread_id.to_string()))

View file

@ -79,9 +79,33 @@ pub struct SessionStartedData {
pub struct SessionEndedData { pub struct SessionEndedData {
pub reason: SessionEndReason, pub reason: SessionEndReason,
pub terminated_by: TerminatedBy, pub terminated_by: TerminatedBy,
/// Error message when reason is Error
#[serde(default, skip_serializing_if = "Option::is_none")]
pub message: Option<String>,
/// Process exit code when reason is Error
#[serde(default, skip_serializing_if = "Option::is_none")]
pub exit_code: Option<i32>,
/// Agent stderr output when reason is Error
#[serde(default, skip_serializing_if = "Option::is_none")]
pub stderr: Option<StderrOutput>,
} }
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)] #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)]
pub struct StderrOutput {
/// First N lines of stderr (if truncated) or full stderr (if not truncated)
#[serde(default, skip_serializing_if = "Option::is_none")]
pub head: Option<String>,
/// Last N lines of stderr (only present if truncated)
#[serde(default, skip_serializing_if = "Option::is_none")]
pub tail: Option<String>,
/// Whether the output was truncated
pub truncated: bool,
/// Total number of lines in stderr
#[serde(default, skip_serializing_if = "Option::is_none")]
pub total_lines: Option<usize>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema, ToSchema)]
#[serde(rename_all = "snake_case")] #[serde(rename_all = "snake_case")]
pub enum SessionEndReason { pub enum SessionEndReason {
Completed, Completed,

View file

@ -436,9 +436,35 @@
"terminated_by" "terminated_by"
], ],
"properties": { "properties": {
"exit_code": {
"description": "Process exit code when reason is Error",
"type": [
"integer",
"null"
],
"format": "int32"
},
"message": {
"description": "Error message when reason is Error",
"type": [
"string",
"null"
]
},
"reason": { "reason": {
"$ref": "#/definitions/SessionEndReason" "$ref": "#/definitions/SessionEndReason"
}, },
"stderr": {
"description": "Agent stderr output when reason is Error",
"anyOf": [
{
"$ref": "#/definitions/StderrOutput"
},
{
"type": "null"
}
]
},
"terminated_by": { "terminated_by": {
"$ref": "#/definitions/TerminatedBy" "$ref": "#/definitions/TerminatedBy"
} }
@ -450,6 +476,41 @@
"metadata": true "metadata": true
} }
}, },
"StderrOutput": {
"type": "object",
"required": [
"truncated"
],
"properties": {
"head": {
"description": "First N lines of stderr (if truncated) or full stderr (if not truncated)",
"type": [
"string",
"null"
]
},
"tail": {
"description": "Last N lines of stderr (only present if truncated)",
"type": [
"string",
"null"
]
},
"total_lines": {
"description": "Total number of lines in stderr",
"type": [
"integer",
"null"
],
"format": "uint",
"minimum": 0.0
},
"truncated": {
"description": "Whether the output was truncated",
"type": "boolean"
}
}
},
"TerminatedBy": { "TerminatedBy": {
"type": "string", "type": "string",
"enum": [ "enum": [