feat: add structured stderr output for error diagnostics (#29)

Add StderrOutput schema with head/tail/truncated/total_lines fields to
provide better error diagnostics when agent processes fail.
This commit is contained in:
Nathan Flurry 2026-01-29 07:18:56 -08:00 committed by GitHub
parent 82ac0b3880
commit c7d6482fd4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 247 additions and 3 deletions

View file

@ -1,10 +1,15 @@
use std::fs::OpenOptions;
use std::fs::{File, OpenOptions};
use std::io::{BufRead, BufReader};
use std::path::{Path, PathBuf};
use sandbox_agent_error::SandboxError;
use time::{Duration, OffsetDateTime};
use sandbox_agent_universal_agent_schema::StderrOutput;
const LOG_RETENTION_DAYS: i64 = 7;
const LOG_HEAD_LINES: usize = 20;
const LOG_TAIL_LINES: usize = 50;
const LOG_MAX_LINE_LENGTH: usize = 500;
pub struct AgentServerLogs {
base_dir: PathBuf,
@ -76,4 +81,70 @@ impl AgentServerLogs {
Ok(())
}
/// Read stderr from the current log file for error diagnostics.
/// Returns structured output with head/tail if truncated.
pub fn read_stderr(&self) -> Option<StderrOutput> {
let log_dir = self.base_dir.join(&self.agent);
let now = OffsetDateTime::now_utc();
let file_name = format!(
"{}-{:04}-{:02}-{:02}.log",
self.agent,
now.year(),
now.month() as u8,
now.day()
);
let path = log_dir.join(file_name);
let file = File::open(&path).ok()?;
let metadata = file.metadata().ok()?;
let file_size = metadata.len();
if file_size == 0 {
return None;
}
let reader = BufReader::new(file);
let mut all_lines: Vec<String> = Vec::new();
for line_result in reader.lines() {
let line: String = match line_result {
Ok(l) => l,
Err(_) => break,
};
let truncated_line = if line.len() > LOG_MAX_LINE_LENGTH {
format!("{}...", &line[..LOG_MAX_LINE_LENGTH])
} else {
line
};
all_lines.push(truncated_line);
}
let line_count = all_lines.len();
if line_count == 0 {
return None;
}
let max_untruncated = LOG_HEAD_LINES + LOG_TAIL_LINES;
if line_count <= max_untruncated {
// Small file - return all content in head
Some(StderrOutput {
head: Some(all_lines.join("\n")),
tail: None,
truncated: false,
total_lines: Some(line_count),
})
} else {
// Large file - return head and tail separately
let head = all_lines[..LOG_HEAD_LINES].join("\n");
let tail = all_lines[line_count - LOG_TAIL_LINES..].join("\n");
Some(StderrOutput {
head: Some(head),
tail: Some(tail),
truncated: true,
total_lines: Some(line_count),
})
}
}
}

View file

@ -24,7 +24,8 @@ use sandbox_agent_universal_agent_schema::{
AgentUnparsedData, ContentPart, ErrorData, EventConversion, EventSource, FileAction,
ItemDeltaData, ItemEventData, ItemKind, ItemRole, ItemStatus, PermissionEventData,
PermissionStatus, QuestionEventData, QuestionStatus, ReasoningVisibility, SessionEndReason,
SessionEndedData, SessionStartedData, TerminatedBy, UniversalEvent, UniversalEventData,
SessionEndedData, SessionStartedData, StderrOutput, TerminatedBy, UniversalEvent,
UniversalEventData,
UniversalEventType, UniversalItem,
};
use schemars::JsonSchema;
@ -1377,6 +1378,7 @@ impl AgentServerManager {
let owner = { self.owner.lock().expect("owner lock").clone() };
if let Some(owner) = owner.and_then(|weak| weak.upgrade()) {
let logs = owner.read_agent_stderr(agent);
for session_id in session_ids {
owner
.record_error(
@ -1393,6 +1395,7 @@ impl AgentServerManager {
message,
SessionEndReason::Error,
TerminatedBy::Daemon,
logs.clone(),
)
.await;
}
@ -1442,6 +1445,15 @@ impl SessionManager {
sessions.iter_mut().find(|session| session.session_id == session_id)
}
/// Read agent stderr for error diagnostics
fn read_agent_stderr(&self, agent: AgentId) -> Option<StderrOutput> {
let logs = AgentServerLogs::new(
self.server_manager.log_base_dir.clone(),
agent.as_str(),
);
logs.read_stderr()
}
async fn create_session(
self: &Arc<Self>,
session_id: String,
@ -1697,6 +1709,9 @@ impl SessionManager {
UniversalEventData::SessionEnded(SessionEndedData {
reason: SessionEndReason::Terminated,
terminated_by: TerminatedBy::Daemon,
message: None,
exit_code: None,
stderr: None,
}),
)
.synthetic()
@ -2232,6 +2247,7 @@ impl SessionManager {
&message,
SessionEndReason::Completed,
TerminatedBy::Agent,
None,
)
.await;
}
@ -2247,12 +2263,14 @@ impl SessionManager {
)
.await;
}
let logs = self.read_agent_stderr(agent);
self.mark_session_ended(
&session_id,
status.code(),
&message,
SessionEndReason::Error,
TerminatedBy::Agent,
logs,
)
.await;
}
@ -2267,12 +2285,14 @@ impl SessionManager {
)
.await;
}
let logs = self.read_agent_stderr(agent);
self.mark_session_ended(
&session_id,
None,
&message,
SessionEndReason::Error,
TerminatedBy::Daemon,
logs,
)
.await;
}
@ -2287,12 +2307,14 @@ impl SessionManager {
)
.await;
}
let logs = self.read_agent_stderr(agent);
self.mark_session_ended(
&session_id,
None,
&message,
SessionEndReason::Error,
TerminatedBy::Daemon,
logs,
)
.await;
}
@ -2419,6 +2441,7 @@ impl SessionManager {
message: &str,
reason: SessionEndReason,
terminated_by: TerminatedBy,
stderr: Option<StderrOutput>,
) {
let mut sessions = self.sessions.lock().await;
if let Some(session) = Self::session_mut(&mut sessions, session_id) {
@ -2431,11 +2454,20 @@ impl SessionManager {
reason.clone(),
terminated_by.clone(),
);
let (error_message, error_exit_code, error_stderr) =
if reason == SessionEndReason::Error {
(Some(message.to_string()), exit_code, stderr)
} else {
(None, None, None)
};
let ended = EventConversion::new(
UniversalEventType::SessionEnded,
UniversalEventData::SessionEnded(SessionEndedData {
reason,
terminated_by,
message: error_message,
exit_code: error_exit_code,
stderr: error_stderr,
}),
)
.synthetic()
@ -2493,12 +2525,14 @@ impl SessionManager {
None,
)
.await;
let logs = self.read_agent_stderr(AgentId::Opencode);
self.mark_session_ended(
&session_id,
None,
"opencode server unavailable",
SessionEndReason::Error,
TerminatedBy::Daemon,
logs,
)
.await;
return;
@ -2516,12 +2550,14 @@ impl SessionManager {
None,
)
.await;
let logs = self.read_agent_stderr(AgentId::Opencode);
self.mark_session_ended(
&session_id,
None,
"opencode sse connection failed",
SessionEndReason::Error,
TerminatedBy::Daemon,
logs,
)
.await;
return;
@ -2538,12 +2574,14 @@ impl SessionManager {
None,
)
.await;
let logs = self.read_agent_stderr(AgentId::Opencode);
self.mark_session_ended(
&session_id,
None,
"opencode sse error",
SessionEndReason::Error,
TerminatedBy::Daemon,
logs,
)
.await;
return;
@ -2562,12 +2600,14 @@ impl SessionManager {
None,
)
.await;
let logs = self.read_agent_stderr(AgentId::Opencode);
self.mark_session_ended(
&session_id,
None,
"opencode sse stream error",
SessionEndReason::Error,
TerminatedBy::Daemon,
logs,
)
.await;
return;
@ -5804,6 +5844,9 @@ fn mock_session_end_sequence(_prefix: &str) -> Vec<EventConversion> {
UniversalEventData::SessionEnded(SessionEndedData {
reason: SessionEndReason::Completed,
terminated_by: TerminatedBy::Agent,
message: None,
exit_code: None,
stderr: None,
}),
)
.synthetic()]

View file

@ -109,6 +109,9 @@ pub fn event_to_universal(event: &schema::StreamJsonMessage) -> Result<Vec<Event
UniversalEventData::SessionEnded(SessionEndedData {
reason: SessionEndReason::Completed,
terminated_by: TerminatedBy::Agent,
message: None,
exit_code: None,
stderr: None,
}),
)
.with_raw(serde_json::to_value(event).ok()),

View file

@ -521,6 +521,9 @@ pub fn session_ended_event(thread_id: &str, reason: SessionEndReason) -> EventCo
UniversalEventData::SessionEnded(SessionEndedData {
reason,
terminated_by: TerminatedBy::Agent,
message: None,
exit_code: None,
stderr: None,
}),
)
.with_native_session(Some(thread_id.to_string()))

View file

@ -79,9 +79,33 @@ pub struct SessionStartedData {
pub struct SessionEndedData {
pub reason: SessionEndReason,
pub terminated_by: TerminatedBy,
/// Error message when reason is Error
#[serde(default, skip_serializing_if = "Option::is_none")]
pub message: Option<String>,
/// Process exit code when reason is Error
#[serde(default, skip_serializing_if = "Option::is_none")]
pub exit_code: Option<i32>,
/// Agent stderr output when reason is Error
#[serde(default, skip_serializing_if = "Option::is_none")]
pub stderr: Option<StderrOutput>,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)]
pub struct StderrOutput {
/// First N lines of stderr (if truncated) or full stderr (if not truncated)
#[serde(default, skip_serializing_if = "Option::is_none")]
pub head: Option<String>,
/// Last N lines of stderr (only present if truncated)
#[serde(default, skip_serializing_if = "Option::is_none")]
pub tail: Option<String>,
/// Whether the output was truncated
pub truncated: bool,
/// Total number of lines in stderr
#[serde(default, skip_serializing_if = "Option::is_none")]
pub total_lines: Option<usize>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema, ToSchema)]
#[serde(rename_all = "snake_case")]
pub enum SessionEndReason {
Completed,