mirror of
https://github.com/harivansh-afk/sandbox-agent.git
synced 2026-04-15 06:04:43 +00:00
chore: improve error output
This commit is contained in:
parent
ee9ad25069
commit
267269db90
7 changed files with 363 additions and 50 deletions
|
|
@ -25,6 +25,12 @@ export type SandboxAgentSpawnHandle = {
|
|||
dispose: () => Promise<void>;
|
||||
};
|
||||
|
||||
type ProcessDiagnostics = {
|
||||
getSpawnError: () => Error | undefined;
|
||||
format: () => string;
|
||||
dispose: () => void;
|
||||
};
|
||||
|
||||
const PLATFORM_PACKAGES: Record<string, string> = {
|
||||
"darwin-arm64": "@sandbox-agent/cli-darwin-arm64",
|
||||
"darwin-x64": "@sandbox-agent/cli-darwin-x64",
|
||||
|
|
@ -35,6 +41,7 @@ const PLATFORM_PACKAGES: Record<string, string> = {
|
|||
|
||||
const TRUST_PACKAGES =
|
||||
"@sandbox-agent/cli-linux-x64 @sandbox-agent/cli-linux-arm64 @sandbox-agent/cli-darwin-arm64 @sandbox-agent/cli-darwin-x64 @sandbox-agent/cli-win32-x64";
|
||||
const PROCESS_OUTPUT_TAIL_CHARS = 16_384;
|
||||
|
||||
export function isNodeRuntime(): boolean {
|
||||
return typeof process !== "undefined" && !!process.versions?.node;
|
||||
|
|
@ -101,6 +108,7 @@ export async function spawnSandboxAgent(
|
|||
|
||||
const stdio = logMode === "inherit" ? "inherit" : logMode === "silent" ? "ignore" : "pipe";
|
||||
const args = ["server", "--host", bindHost, "--port", String(port), "--token", token];
|
||||
const command = formatCommand(binaryPath, args);
|
||||
const child = spawn(binaryPath, args, {
|
||||
stdio,
|
||||
env: {
|
||||
|
|
@ -108,15 +116,34 @@ export async function spawnSandboxAgent(
|
|||
...(options.env ?? {}),
|
||||
},
|
||||
});
|
||||
const diagnostics = attachProcessDiagnostics(child, logMode);
|
||||
const cleanup = registerProcessCleanup(child);
|
||||
|
||||
const baseUrl = `http://${connectHost}:${port}`;
|
||||
const ready = waitForHealth(baseUrl, fetcher ?? globalThis.fetch, timeoutMs, child, token);
|
||||
|
||||
await ready;
|
||||
const ready = waitForHealth(
|
||||
baseUrl,
|
||||
fetcher ?? globalThis.fetch,
|
||||
timeoutMs,
|
||||
child,
|
||||
token,
|
||||
command,
|
||||
diagnostics,
|
||||
);
|
||||
try {
|
||||
await ready;
|
||||
} catch (err) {
|
||||
if (child.exitCode === null && child.signalCode === null) {
|
||||
child.kill("SIGTERM");
|
||||
await waitForExit(child, 1_000);
|
||||
}
|
||||
diagnostics.dispose();
|
||||
cleanup.dispose();
|
||||
throw err;
|
||||
}
|
||||
|
||||
const dispose = async () => {
|
||||
if (child.exitCode !== null) {
|
||||
if (child.exitCode !== null || child.signalCode !== null) {
|
||||
diagnostics.dispose();
|
||||
cleanup.dispose();
|
||||
return;
|
||||
}
|
||||
|
|
@ -125,6 +152,7 @@ export async function spawnSandboxAgent(
|
|||
if (!exited) {
|
||||
child.kill("SIGKILL");
|
||||
}
|
||||
diagnostics.dispose();
|
||||
cleanup.dispose();
|
||||
};
|
||||
|
||||
|
|
@ -195,6 +223,8 @@ async function waitForHealth(
|
|||
timeoutMs: number,
|
||||
child: ChildProcess,
|
||||
token: string,
|
||||
command: string,
|
||||
diagnostics: ProcessDiagnostics,
|
||||
): Promise<void> {
|
||||
if (!fetcher) {
|
||||
throw new Error("Fetch API is not available; provide a fetch implementation.");
|
||||
|
|
@ -203,8 +233,17 @@ async function waitForHealth(
|
|||
let lastError: string | undefined;
|
||||
|
||||
while (Date.now() - start < timeoutMs) {
|
||||
if (child.exitCode !== null) {
|
||||
throw new Error("sandbox-agent exited before becoming healthy.");
|
||||
const spawnError = diagnostics.getSpawnError();
|
||||
if (spawnError) {
|
||||
throw new Error(
|
||||
`Failed to spawn sandbox-agent subprocess \`${command}\`: ${spawnError.message}${diagnostics.format()}`,
|
||||
);
|
||||
}
|
||||
|
||||
if (child.exitCode !== null || child.signalCode !== null) {
|
||||
throw new Error(
|
||||
`sandbox-agent exited before becoming healthy (exitCode=${child.exitCode ?? "null"}, signal=${child.signalCode ?? "none"}).${diagnostics.format()}`,
|
||||
);
|
||||
}
|
||||
try {
|
||||
const response = await fetcher(`${baseUrl}/v1/health`, {
|
||||
|
|
@ -220,7 +259,9 @@ async function waitForHealth(
|
|||
await new Promise((resolve) => setTimeout(resolve, 200));
|
||||
}
|
||||
|
||||
throw new Error(`Timed out waiting for sandbox-agent health (${lastError ?? "unknown error"}).`);
|
||||
throw new Error(
|
||||
`Timed out waiting for sandbox-agent health (${lastError ?? "unknown error"}).${diagnostics.format()}`,
|
||||
);
|
||||
}
|
||||
|
||||
async function waitForExit(child: ChildProcess, timeoutMs: number): Promise<boolean> {
|
||||
|
|
@ -255,3 +296,74 @@ function registerProcessCleanup(child: ChildProcess): { dispose: () => void } {
|
|||
},
|
||||
};
|
||||
}
|
||||
|
||||
function formatCommand(binaryPath: string, args: string[]): string {
|
||||
const parts = [binaryPath, ...args].map(shellQuote);
|
||||
return parts.join(" ");
|
||||
}
|
||||
|
||||
function shellQuote(value: string): string {
|
||||
return `'${value.replaceAll("'", "'\\''")}'`;
|
||||
}
|
||||
|
||||
function appendTail(current: string, chunk: string): string {
|
||||
const merged = current + chunk;
|
||||
if (merged.length <= PROCESS_OUTPUT_TAIL_CHARS) {
|
||||
return merged;
|
||||
}
|
||||
return merged.slice(merged.length - PROCESS_OUTPUT_TAIL_CHARS);
|
||||
}
|
||||
|
||||
function attachProcessDiagnostics(
|
||||
child: ChildProcess,
|
||||
logMode: SandboxAgentSpawnLogMode,
|
||||
): ProcessDiagnostics {
|
||||
let stdoutTail = "";
|
||||
let stderrTail = "";
|
||||
let spawnError: Error | undefined;
|
||||
const removers: Array<() => void> = [];
|
||||
|
||||
const onError = (error: Error) => {
|
||||
spawnError = error;
|
||||
};
|
||||
child.on("error", onError);
|
||||
removers.push(() => child.off("error", onError));
|
||||
|
||||
if (logMode === "pipe" && child.stdout) {
|
||||
const onStdout = (chunk: string | Buffer) => {
|
||||
stdoutTail = appendTail(stdoutTail, chunk.toString());
|
||||
};
|
||||
child.stdout.on("data", onStdout);
|
||||
removers.push(() => child.stdout?.off("data", onStdout));
|
||||
}
|
||||
|
||||
if (logMode === "pipe" && child.stderr) {
|
||||
const onStderr = (chunk: string | Buffer) => {
|
||||
stderrTail = appendTail(stderrTail, chunk.toString());
|
||||
};
|
||||
child.stderr.on("data", onStderr);
|
||||
removers.push(() => child.stderr?.off("data", onStderr));
|
||||
}
|
||||
|
||||
return {
|
||||
getSpawnError: () => spawnError,
|
||||
format: () => {
|
||||
const parts: string[] = [];
|
||||
if (stdoutTail.trim().length > 0) {
|
||||
parts.push(`stdout:\n${stdoutTail.trim()}`);
|
||||
}
|
||||
if (stderrTail.trim().length > 0) {
|
||||
parts.push(`stderr:\n${stderrTail.trim()}`);
|
||||
}
|
||||
if (parts.length === 0) {
|
||||
return "";
|
||||
}
|
||||
return `\n--- subprocess output tail ---\n${parts.join("\n")}`;
|
||||
},
|
||||
dispose: () => {
|
||||
for (const remove of removers) {
|
||||
remove();
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -103,10 +103,10 @@ fn map_error(err: AdapterError) -> Response {
|
|||
"serialize_failed",
|
||||
&format!("failed to serialize JSON payload: {ser}"),
|
||||
),
|
||||
AdapterError::Spawn(spawn) => problem(
|
||||
AdapterError::Spawn { command, error } => problem(
|
||||
StatusCode::BAD_GATEWAY,
|
||||
"spawn_failed",
|
||||
&format!("failed to start agent process: {spawn}"),
|
||||
&format!("failed to start agent process `{command}`: {error}"),
|
||||
),
|
||||
AdapterError::MissingStdin | AdapterError::MissingStdout | AdapterError::MissingStderr => {
|
||||
problem(
|
||||
|
|
|
|||
|
|
@ -19,8 +19,12 @@ const RING_BUFFER_SIZE: usize = 1024;
|
|||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum AdapterError {
|
||||
#[error("failed to spawn subprocess: {0}")]
|
||||
Spawn(std::io::Error),
|
||||
#[error("failed to spawn subprocess `{command}`: {error}")]
|
||||
Spawn {
|
||||
command: String,
|
||||
#[source]
|
||||
error: std::io::Error,
|
||||
},
|
||||
#[error("failed to capture subprocess stdin")]
|
||||
MissingStdin,
|
||||
#[error("failed to capture subprocess stdout")]
|
||||
|
|
@ -87,13 +91,18 @@ impl AdapterRuntime {
|
|||
"spawning agent process"
|
||||
);
|
||||
|
||||
let command_for_display = format_command_for_display(&launch.program, &launch.args);
|
||||
let mut child = command.spawn().map_err(|err| {
|
||||
tracing::error!(
|
||||
program = ?launch.program,
|
||||
args = ?launch.args,
|
||||
error = %err,
|
||||
"failed to spawn agent process"
|
||||
);
|
||||
AdapterError::Spawn(err)
|
||||
AdapterError::Spawn {
|
||||
command: command_for_display.clone(),
|
||||
error: err,
|
||||
}
|
||||
})?;
|
||||
|
||||
let pid = child.id().unwrap_or(0);
|
||||
|
|
@ -565,3 +574,16 @@ impl AdapterRuntime {
|
|||
fn id_key(value: &Value) -> String {
|
||||
serde_json::to_string(value).unwrap_or_else(|_| "null".to_string())
|
||||
}
|
||||
|
||||
fn format_command_for_display(program: &std::path::Path, args: &[String]) -> String {
|
||||
let mut parts = Vec::with_capacity(args.len() + 1);
|
||||
parts.push(shell_quote(program.to_string_lossy().as_ref()));
|
||||
for arg in args {
|
||||
parts.push(shell_quote(arg));
|
||||
}
|
||||
parts.join(" ")
|
||||
}
|
||||
|
||||
fn shell_quote(value: &str) -> String {
|
||||
format!("'{}'", value.replace('\'', "'\\''"))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ use url::Url;
|
|||
|
||||
const DEFAULT_ACP_REGISTRY_URL: &str =
|
||||
"https://cdn.agentclientprotocol.com/registry/v1/latest/registry.json";
|
||||
const VERIFY_OUTPUT_TAIL_BYTES: usize = 16 * 1024;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
|
|
@ -875,28 +876,78 @@ fn write_text_file(path: &Path, contents: &str) -> Result<(), AgentError> {
|
|||
|
||||
fn verify_command(path: &Path, args: &[&str]) -> Result<(), AgentError> {
|
||||
let mut command = Command::new(path);
|
||||
if args.is_empty() {
|
||||
command.arg("--help");
|
||||
let effective_args = if args.is_empty() {
|
||||
vec!["--help"]
|
||||
} else {
|
||||
command.args(args);
|
||||
}
|
||||
command.stdout(Stdio::null()).stderr(Stdio::null());
|
||||
args.to_vec()
|
||||
};
|
||||
command.args(&effective_args);
|
||||
command.stdout(Stdio::piped()).stderr(Stdio::piped());
|
||||
|
||||
match command.status() {
|
||||
Ok(status) if status.success() => Ok(()),
|
||||
Ok(status) => Err(AgentError::VerifyFailed(format!(
|
||||
"{} exited with status {}",
|
||||
path.display(),
|
||||
status
|
||||
match command.output() {
|
||||
Ok(output) if output.status.success() => Ok(()),
|
||||
Ok(output) => Err(AgentError::VerifyFailed(format_subprocess_failure(
|
||||
path,
|
||||
&effective_args,
|
||||
output.status.to_string(),
|
||||
&output.stdout,
|
||||
&output.stderr,
|
||||
))),
|
||||
Err(err) => Err(AgentError::VerifyFailed(format!(
|
||||
"{} failed to execute: {}",
|
||||
path.display(),
|
||||
err
|
||||
"failed to execute `{}`: {err}",
|
||||
format_command_for_display(path, &effective_args),
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
fn format_subprocess_failure(
|
||||
path: &Path,
|
||||
args: &[&str],
|
||||
status: String,
|
||||
stdout: &[u8],
|
||||
stderr: &[u8],
|
||||
) -> String {
|
||||
format!(
|
||||
"verification failed for `{}`\nstatus: {}\nstdout:\n{}\nstderr:\n{}",
|
||||
format_command_for_display(path, args),
|
||||
status,
|
||||
format_output_tail(stdout),
|
||||
format_output_tail(stderr),
|
||||
)
|
||||
}
|
||||
|
||||
fn format_command_for_display(path: &Path, args: &[&str]) -> String {
|
||||
let mut parts = Vec::with_capacity(args.len() + 1);
|
||||
parts.push(shell_quote(path.to_string_lossy().as_ref()));
|
||||
for arg in args {
|
||||
parts.push(shell_quote(arg));
|
||||
}
|
||||
parts.join(" ")
|
||||
}
|
||||
|
||||
fn shell_quote(value: &str) -> String {
|
||||
format!("'{}'", shell_escape(value))
|
||||
}
|
||||
|
||||
fn format_output_tail(bytes: &[u8]) -> String {
|
||||
if bytes.is_empty() {
|
||||
return "<empty>".to_string();
|
||||
}
|
||||
|
||||
let start = bytes.len().saturating_sub(VERIFY_OUTPUT_TAIL_BYTES);
|
||||
let suffix = String::from_utf8_lossy(&bytes[start..]).to_string();
|
||||
let text = suffix.trim();
|
||||
if text.is_empty() {
|
||||
return "<empty>".to_string();
|
||||
}
|
||||
|
||||
if start > 0 {
|
||||
format!("[truncated {} bytes]\n{}", start, text)
|
||||
} else {
|
||||
text.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
fn fetch_registry(url: &Url) -> Result<RegistryDocument, AgentError> {
|
||||
let client = Client::builder().build()?;
|
||||
let response = client.get(url.clone()).send()?;
|
||||
|
|
@ -1653,4 +1704,37 @@ mod tests {
|
|||
"cursor re-install should be idempotent"
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
#[test]
|
||||
fn verify_command_failure_contains_status_and_stdio() {
|
||||
let temp_dir = tempfile::tempdir().expect("create tempdir");
|
||||
let failing = temp_dir.path().join("failing");
|
||||
write_exec(
|
||||
&failing,
|
||||
"#!/usr/bin/env sh\necho 'hello from stdout'\necho 'boom on stderr' 1>&2\nexit 42\n",
|
||||
);
|
||||
|
||||
let err = verify_command(&failing, &[]).expect_err("verify should fail");
|
||||
let AgentError::VerifyFailed(message) = err else {
|
||||
panic!("expected VerifyFailed");
|
||||
};
|
||||
|
||||
assert!(
|
||||
message.contains("verification failed for"),
|
||||
"missing prefix"
|
||||
);
|
||||
assert!(
|
||||
message.contains("status: exit status: 42"),
|
||||
"missing exit status"
|
||||
);
|
||||
assert!(
|
||||
message.contains("stdout:\nhello from stdout"),
|
||||
"missing stdout"
|
||||
);
|
||||
assert!(
|
||||
message.contains("stderr:\nboom on stderr"),
|
||||
"missing stderr"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
use std::fs::{self, OpenOptions};
|
||||
use std::fs::{self, File, OpenOptions};
|
||||
use std::io::{Read, Seek, SeekFrom};
|
||||
use std::net::TcpListener;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::{Child, Command, ExitStatus, Stdio};
|
||||
|
|
@ -15,6 +16,7 @@ const HEALTH_ENDPOINTS: [&str; 4] = ["health", "healthz", "app/agents", "agents"
|
|||
const HEALTH_ATTEMPTS: usize = 20;
|
||||
const HEALTH_DELAY_MS: u64 = 150;
|
||||
const MONITOR_DELAY_MS: u64 = 500;
|
||||
const OPENCODE_LOG_TAIL_BYTES: usize = 16 * 1024;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct OpenCodeServerManagerConfig {
|
||||
|
|
@ -157,7 +159,20 @@ impl OpenCodeServerManager {
|
|||
sleep(Duration::from_millis(HEALTH_DELAY_MS)).await;
|
||||
}
|
||||
|
||||
Err("OpenCode server health check failed".to_string())
|
||||
let log_path = opencode_log_path(&self.inner.config.log_dir);
|
||||
let mut message = format!(
|
||||
"OpenCode server health check failed (logs: {})",
|
||||
log_path.display()
|
||||
);
|
||||
match read_log_tail(&log_path, OPENCODE_LOG_TAIL_BYTES) {
|
||||
Some(tail) if !tail.trim().is_empty() => {
|
||||
message.push_str("\n--- log tail ---\n");
|
||||
message.push_str(tail.trim());
|
||||
}
|
||||
_ => message.push_str("\n(log file is empty or unavailable)"),
|
||||
}
|
||||
|
||||
Err(message)
|
||||
}
|
||||
|
||||
async fn spawn_http_server(&self) -> Result<(String, Arc<StdMutex<Option<Child>>>), String> {
|
||||
|
|
@ -169,16 +184,26 @@ impl OpenCodeServerManager {
|
|||
.resolve_binary(AgentId::Opencode)
|
||||
.map_err(|err| err.to_string())?;
|
||||
let port = find_available_port()?;
|
||||
let mut command = Command::new(path);
|
||||
let stderr = open_opencode_log(&log_dir).unwrap_or_else(|_| Stdio::null());
|
||||
let command_preview = format!("{} serve --port {port}", path.display());
|
||||
let mut command = Command::new(&path);
|
||||
let log_path = opencode_log_path(&log_dir);
|
||||
let log_file = open_opencode_log_file(&log_dir)?;
|
||||
let log_file_err = log_file
|
||||
.try_clone()
|
||||
.map_err(|err| format!("failed to clone OpenCode log file: {err}"))?;
|
||||
command
|
||||
.arg("serve")
|
||||
.arg("--port")
|
||||
.arg(port.to_string())
|
||||
.stdout(Stdio::null())
|
||||
.stderr(stderr);
|
||||
.stdout(Stdio::from(log_file))
|
||||
.stderr(Stdio::from(log_file_err));
|
||||
|
||||
let child = command.spawn().map_err(|err| err.to_string())?;
|
||||
let child = command.spawn().map_err(|err| {
|
||||
format!(
|
||||
"failed to spawn OpenCode server `{command_preview}` (logs: {}): {err}",
|
||||
log_path.display()
|
||||
)
|
||||
})?;
|
||||
Ok::<(String, Child), String>((format!("http://127.0.0.1:{port}"), child))
|
||||
})
|
||||
.await
|
||||
|
|
@ -263,16 +288,29 @@ fn default_log_dir() -> PathBuf {
|
|||
base
|
||||
}
|
||||
|
||||
fn open_opencode_log(log_dir: &Path) -> Result<Stdio, String> {
|
||||
fn opencode_log_path(log_dir: &Path) -> PathBuf {
|
||||
log_dir.join("opencode").join("opencode-compat.log")
|
||||
}
|
||||
|
||||
fn open_opencode_log_file(log_dir: &Path) -> Result<File, String> {
|
||||
let directory = log_dir.join("opencode");
|
||||
fs::create_dir_all(&directory).map_err(|err| err.to_string())?;
|
||||
let path = directory.join("opencode-compat.log");
|
||||
let file = OpenOptions::new()
|
||||
OpenOptions::new()
|
||||
.create(true)
|
||||
.append(true)
|
||||
.open(path)
|
||||
.map_err(|err| err.to_string())?;
|
||||
Ok(file.into())
|
||||
.open(opencode_log_path(log_dir))
|
||||
.map_err(|err| err.to_string())
|
||||
}
|
||||
|
||||
fn read_log_tail(path: &Path, max_bytes: usize) -> Option<String> {
|
||||
let mut file = File::open(path).ok()?;
|
||||
let len = file.metadata().ok()?.len();
|
||||
let start = len.saturating_sub(max_bytes as u64);
|
||||
file.seek(SeekFrom::Start(start)).ok()?;
|
||||
|
||||
let mut bytes = Vec::new();
|
||||
file.read_to_end(&mut bytes).ok()?;
|
||||
Some(String::from_utf8_lossy(&bytes).to_string())
|
||||
}
|
||||
|
||||
fn find_available_port() -> Result<u16, String> {
|
||||
|
|
|
|||
|
|
@ -446,8 +446,8 @@ fn map_adapter_error(err: AdapterError) -> SandboxError {
|
|||
AdapterError::Write(error) => SandboxError::StreamError {
|
||||
message: format!("failed writing to agent stdin: {error}"),
|
||||
},
|
||||
AdapterError::Spawn(error) => SandboxError::StreamError {
|
||||
message: format!("failed to start agent process: {error}"),
|
||||
AdapterError::Spawn { command, error } => SandboxError::StreamError {
|
||||
message: format!("failed to start agent process `{command}`: {error}"),
|
||||
},
|
||||
AdapterError::MissingStdin | AdapterError::MissingStdout | AdapterError::MissingStderr => {
|
||||
SandboxError::StreamError {
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
use std::fs;
|
||||
use std::io::{Read, Seek, SeekFrom};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::{Child, Command as ProcessCommand, Stdio};
|
||||
use std::time::{Duration, Instant};
|
||||
|
|
@ -15,6 +16,7 @@ pub use build_id::BUILD_ID;
|
|||
const DAEMON_HEALTH_TIMEOUT: Duration = Duration::from_secs(30);
|
||||
const HEALTH_CHECK_CONNECT_TIMEOUT: Duration = Duration::from_secs(2);
|
||||
const HEALTH_CHECK_REQUEST_TIMEOUT: Duration = Duration::from_secs(5);
|
||||
const DAEMON_LOG_TAIL_BYTES: usize = 16 * 1024;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Paths
|
||||
|
|
@ -187,6 +189,7 @@ pub fn wait_for_health(
|
|||
base_url: &str,
|
||||
token: Option<&str>,
|
||||
timeout: Duration,
|
||||
log_path: Option<&Path>,
|
||||
) -> Result<(), CliError> {
|
||||
let client = HttpClient::builder()
|
||||
.connect_timeout(HEALTH_CHECK_CONNECT_TIMEOUT)
|
||||
|
|
@ -199,8 +202,9 @@ pub fn wait_for_health(
|
|||
attempts += 1;
|
||||
if let Some(child) = server_child.as_mut() {
|
||||
if let Some(status) = child.try_wait()? {
|
||||
return Err(CliError::Server(format!(
|
||||
"sandbox-agent exited before becoming healthy ({status})"
|
||||
return Err(CliError::Server(with_log_tail(
|
||||
format!("sandbox-agent exited before becoming healthy ({status})"),
|
||||
log_path,
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
|
@ -248,9 +252,10 @@ pub fn wait_for_health(
|
|||
timeout_ms = timeout.as_millis(),
|
||||
"timed out waiting for daemon health"
|
||||
);
|
||||
Err(CliError::Server(
|
||||
Err(CliError::Server(with_log_tail(
|
||||
"timed out waiting for sandbox-agent health".to_string(),
|
||||
))
|
||||
log_path,
|
||||
)))
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
@ -271,7 +276,8 @@ pub fn spawn_sandbox_agent_daemon(
|
|||
let log_file_err = log_file.try_clone()?;
|
||||
|
||||
let exe = std::env::current_exe()?;
|
||||
let mut cmd = ProcessCommand::new(exe);
|
||||
let command_preview = format!("{} server --host {} --port {}", exe.display(), host, port);
|
||||
let mut cmd = ProcessCommand::new(&exe);
|
||||
cmd.arg("server")
|
||||
.arg("--host")
|
||||
.arg(host)
|
||||
|
|
@ -286,7 +292,12 @@ pub fn spawn_sandbox_agent_daemon(
|
|||
cmd.arg("--token").arg(token);
|
||||
}
|
||||
|
||||
cmd.spawn().map_err(CliError::from)
|
||||
cmd.spawn().map_err(|err| {
|
||||
CliError::Server(format!(
|
||||
"failed to spawn daemon subprocess `{command_preview}` (logs: {}): {err}",
|
||||
log_path.display()
|
||||
))
|
||||
})
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
@ -384,7 +395,13 @@ pub fn start(cli: &CliConfig, host: &str, port: u16, token: Option<&str>) -> Res
|
|||
if let Some(pid) = read_pid(&pid_path) {
|
||||
if is_process_running(pid) {
|
||||
eprintln!("daemon process {pid} exists; waiting for health");
|
||||
return wait_for_health(None, &base_url, token, DAEMON_HEALTH_TIMEOUT);
|
||||
return wait_for_health(
|
||||
None,
|
||||
&base_url,
|
||||
token,
|
||||
DAEMON_HEALTH_TIMEOUT,
|
||||
Some(&log_path),
|
||||
);
|
||||
}
|
||||
let _ = remove_pid(&pid_path);
|
||||
}
|
||||
|
|
@ -399,7 +416,13 @@ pub fn start(cli: &CliConfig, host: &str, port: u16, token: Option<&str>) -> Res
|
|||
write_pid(&pid_path, pid)?;
|
||||
write_daemon_version(host, port)?;
|
||||
|
||||
let result = wait_for_health(Some(&mut child), &base_url, token, DAEMON_HEALTH_TIMEOUT);
|
||||
let result = wait_for_health(
|
||||
Some(&mut child),
|
||||
&base_url,
|
||||
token,
|
||||
DAEMON_HEALTH_TIMEOUT,
|
||||
Some(&log_path),
|
||||
);
|
||||
if result.is_err() {
|
||||
let _ = remove_pid(&pid_path);
|
||||
let _ = remove_version_file(host, port);
|
||||
|
|
@ -572,7 +595,14 @@ pub fn ensure_running(
|
|||
if let Some(pid) = read_pid(&pid_path) {
|
||||
if is_process_running(pid) {
|
||||
eprintln!("daemon process {pid} running; waiting for health");
|
||||
return wait_for_health(None, &base_url, token, DAEMON_HEALTH_TIMEOUT);
|
||||
let log_path = daemon_log_path(host, port);
|
||||
return wait_for_health(
|
||||
None,
|
||||
&base_url,
|
||||
token,
|
||||
DAEMON_HEALTH_TIMEOUT,
|
||||
Some(&log_path),
|
||||
);
|
||||
}
|
||||
let _ = remove_pid(&pid_path);
|
||||
let _ = remove_version_file(host, port);
|
||||
|
|
@ -580,3 +610,30 @@ pub fn ensure_running(
|
|||
|
||||
start(cli, host, port, token)
|
||||
}
|
||||
|
||||
fn with_log_tail(message: String, log_path: Option<&Path>) -> String {
|
||||
let Some(log_path) = log_path else {
|
||||
return message;
|
||||
};
|
||||
|
||||
let mut output = format!("{message}\nlogs: {}", log_path.display());
|
||||
match read_log_tail(log_path, DAEMON_LOG_TAIL_BYTES) {
|
||||
Some(tail) if !tail.trim().is_empty() => {
|
||||
output.push_str("\n--- log tail ---\n");
|
||||
output.push_str(tail.trim());
|
||||
}
|
||||
_ => output.push_str("\n(log file is empty or unavailable)"),
|
||||
}
|
||||
output
|
||||
}
|
||||
|
||||
fn read_log_tail(path: &Path, max_bytes: usize) -> Option<String> {
|
||||
let mut file = fs::File::open(path).ok()?;
|
||||
let len = file.metadata().ok()?.len();
|
||||
let start = len.saturating_sub(max_bytes as u64);
|
||||
file.seek(SeekFrom::Start(start)).ok()?;
|
||||
|
||||
let mut bytes = Vec::new();
|
||||
file.read_to_end(&mut bytes).ok()?;
|
||||
Some(String::from_utf8_lossy(&bytes).to_string())
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue