perf: improve startup instrumentation and replace npx with npm install (#208)

Add comprehensive tracing instrumentation across the entire agent startup path (gigacode CLI, ACP HTTP adapter, agent installation, and process spawning) to enable detailed performance profiling. Replace npm-based agent process launchers that use npx (incurring resolution overhead on every spawn) with pre-installed npm packages, reducing startup latency. Improve error diagnostics when agent processes crash by capturing exit codes and stderr tails. Update error handling to map exited processes to dedicated error variants with actionable error messages. Co-authored-by: Claude Haiku 4.5 <noreply@anthropic.com>
2026-04-16 15:02:39 +00:00 · 2026-03-06 12:05:19 -08:00 · 2026-03-06 12:05:19 -08:00 · e7656d78f0
commit e7656d78f0
parent 9ada842cf2
7 changed files with 691 additions and 78 deletions
--- a/server/packages/acp-http-adapter/src/app.rs
+++ b/server/packages/acp-http-adapter/src/app.rs
@ -93,6 +93,20 @@ fn map_error(err: AdapterError) -> Response {
            "timeout",
            "timed out waiting for agent response",
        ),
+        AdapterError::Exited { exit_code, stderr } => {
+            let detail = if let Some(stderr) = stderr {
+                format!(
+                    "agent process exited before responding (exit_code: {:?}, stderr: {})",
+                    exit_code, stderr
+                )
+            } else {
+                format!(
+                    "agent process exited before responding (exit_code: {:?})",
+                    exit_code
+                )
+            };
+            problem(StatusCode::BAD_GATEWAY, "agent_exited", &detail)
+        }
        AdapterError::Write(write) => problem(
            StatusCode::BAD_GATEWAY,
            "write_failed",
--- a/server/packages/acp-http-adapter/src/main.rs
+++ b/server/packages/acp-http-adapter/src/main.rs
@ -32,6 +32,7 @@ async fn main() {
 }

 async fn run() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
+    let started = std::time::Instant::now();
    tracing_subscriber::fmt()
        .with_env_filter(
            tracing_subscriber::EnvFilter::try_from_default_env()
@ -41,6 +42,12 @@ async fn run() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
        .init();

    let cli = Cli::parse();
+    tracing::info!(
+        host = %cli.host,
+        port = cli.port,
+        startup_ms = started.elapsed().as_millis() as u64,
+        "acp-http-adapter.run: starting server"
+    );
    run_server(ServerConfig {
        host: cli.host,
        port: cli.port,
--- a/server/packages/acp-http-adapter/src/process.rs
+++ b/server/packages/acp-http-adapter/src/process.rs
@ -16,6 +16,7 @@ use tokio_stream::wrappers::BroadcastStream;
 use crate::registry::LaunchSpec;

 const RING_BUFFER_SIZE: usize = 1024;
+const STDERR_TAIL_SIZE: usize = 16;

 #[derive(Debug, Error)]
 pub enum AdapterError {
@ -33,6 +34,11 @@ pub enum AdapterError {
    Serialize(serde_json::Error),
    #[error("failed to write subprocess stdin: {0}")]
    Write(std::io::Error),
+    #[error("agent process exited before responding")]
+    Exited {
+        exit_code: Option<i32>,
+        stderr: Option<String>,
+    },
    #[error("timeout waiting for response")]
    Timeout,
 }
@ -61,6 +67,7 @@ pub struct AdapterRuntime {
    shutting_down: AtomicBool,
    spawned_at: Instant,
    first_stdout: Arc<AtomicBool>,
+    stderr_tail: Arc<Mutex<VecDeque<String>>>,
 }

 impl AdapterRuntime {
@ -120,6 +127,7 @@ impl AdapterRuntime {
            shutting_down: AtomicBool::new(false),
            spawned_at: spawn_start,
            first_stdout: Arc::new(AtomicBool::new(false)),
+            stderr_tail: Arc::new(Mutex::new(VecDeque::with_capacity(STDERR_TAIL_SIZE))),
        };

        runtime.spawn_stdout_loop(stdout);
@ -198,6 +206,16 @@ impl AdapterRuntime {
                        "post: response channel dropped (agent process may have exited)"
                    );
                    self.pending.lock().await.remove(&key);
+                    if let Some((exit_code, stderr)) = self.try_process_exit_info().await {
+                        tracing::error!(
+                            method = %method,
+                            id = %key,
+                            exit_code = ?exit_code,
+                            stderr = ?stderr,
+                            "post: agent process exited before response channel completed"
+                        );
+                        return Err(AdapterError::Exited { exit_code, stderr });
+                    }
                    Err(AdapterError::Timeout)
                }
                Err(_) => {
@ -213,6 +231,16 @@ impl AdapterRuntime {
                        "post: TIMEOUT waiting for agent response"
                    );
                    self.pending.lock().await.remove(&key);
+                    if let Some((exit_code, stderr)) = self.try_process_exit_info().await {
+                        tracing::error!(
+                            method = %method,
+                            id = %key,
+                            exit_code = ?exit_code,
+                            stderr = ?stderr,
+                            "post: agent process exited before timeout completed"
+                        );
+                        return Err(AdapterError::Exited { exit_code, stderr });
+                    }
                    Err(AdapterError::Timeout)
                }
            }
@ -445,6 +473,7 @@ impl AdapterRuntime {

    fn spawn_stderr_loop(&self, stderr: tokio::process::ChildStderr) {
        let spawned_at = self.spawned_at;
+        let stderr_tail = self.stderr_tail.clone();

        tokio::spawn(async move {
            let mut lines = BufReader::new(stderr).lines();
@ -452,6 +481,13 @@ impl AdapterRuntime {

            while let Ok(Some(line)) = lines.next_line().await {
                line_count += 1;
+                {
+                    let mut tail = stderr_tail.lock().await;
+                    tail.push_back(line.clone());
+                    while tail.len() > STDERR_TAIL_SIZE {
+                        tail.pop_front();
+                    }
+                }
                tracing::info!(
                    line_number = line_count,
                    age_ms = spawned_at.elapsed().as_millis() as u64,
@ -560,6 +596,28 @@ impl AdapterRuntime {
        tracing::debug!(method = method, id = %id, "stdin: write+flush complete");
        Ok(())
    }
+
+    async fn try_process_exit_info(&self) -> Option<(Option<i32>, Option<String>)> {
+        let mut child = self.child.lock().await;
+        match child.try_wait() {
+            Ok(Some(status)) => {
+                let exit_code = status.code();
+                drop(child);
+                let stderr = self.stderr_tail_summary().await;
+                Some((exit_code, stderr))
+            }
+            Ok(None) => None,
+            Err(_) => None,
+        }
+    }
+
+    async fn stderr_tail_summary(&self) -> Option<String> {
+        let tail = self.stderr_tail.lock().await;
+        if tail.is_empty() {
+            return None;
+        }
+        Some(tail.iter().cloned().collect::<Vec<_>>().join("\n"))
+    }
 }

 fn id_key(value: &Value) -> String {