host daemon touches (#4)

* feat: launch config tests

* feat: readiness probe port alignment
This commit is contained in:
Hari 2026-04-08 12:56:07 -04:00 committed by GitHub
parent e2f9e54970
commit 592df1e1df
10 changed files with 439 additions and 49 deletions

View file

@ -77,6 +77,12 @@ func (d *Daemon) CreateMachine(ctx context.Context, req contracthost.CreateMachi
return nil, err return nil, err
} }
ports := defaultMachinePorts()
if err := waitForGuestReady(ctx, state.RuntimeHost, ports); err != nil {
_ = d.runtime.Delete(context.Background(), *state)
return nil, err
}
now := time.Now().UTC() now := time.Now().UTC()
systemVolumeRecord := model.VolumeRecord{ systemVolumeRecord := model.VolumeRecord{
ID: d.systemVolumeID(req.MachineID), ID: d.systemVolumeID(req.MachineID),
@ -117,7 +123,7 @@ func (d *Daemon) CreateMachine(ctx context.Context, req contracthost.CreateMachi
UserVolumeIDs: append([]contracthost.VolumeID(nil), attachedUserVolumeIDs...), UserVolumeIDs: append([]contracthost.VolumeID(nil), attachedUserVolumeIDs...),
RuntimeHost: state.RuntimeHost, RuntimeHost: state.RuntimeHost,
TapDevice: state.TapName, TapDevice: state.TapName,
Ports: defaultMachinePorts(), Ports: ports,
Phase: contracthost.MachinePhaseRunning, Phase: contracthost.MachinePhaseRunning,
PID: state.PID, PID: state.PID,
SocketPath: state.SocketPath, SocketPath: state.SocketPath,

View file

@ -5,6 +5,7 @@ import (
"fmt" "fmt"
"os" "os"
"sync" "sync"
"time"
appconfig "github.com/getcompanion-ai/computer-host/internal/config" appconfig "github.com/getcompanion-ai/computer-host/internal/config"
"github.com/getcompanion-ai/computer-host/internal/firecracker" "github.com/getcompanion-ai/computer-host/internal/firecracker"
@ -19,6 +20,9 @@ const (
defaultSSHPort = uint16(2222) defaultSSHPort = uint16(2222)
defaultVNCPort = uint16(6080) defaultVNCPort = uint16(6080)
defaultCopyBufferSize = 1024 * 1024 defaultCopyBufferSize = 1024 * 1024
defaultGuestDialTimeout = 500 * time.Millisecond
defaultGuestReadyPollInterval = 100 * time.Millisecond
defaultGuestReadyTimeout = 30 * time.Second
) )
type Runtime interface { type Runtime interface {

View file

@ -2,10 +2,14 @@ package daemon
import ( import (
"context" "context"
"errors"
"net"
"net/http" "net/http"
"net/http/httptest" "net/http/httptest"
"os" "os"
"path/filepath" "path/filepath"
"strconv"
"strings"
"testing" "testing"
"time" "time"
@ -40,8 +44,6 @@ func (f *fakeRuntime) Delete(_ context.Context, state firecracker.MachineState)
} }
func TestCreateMachineStagesArtifactsAndPersistsState(t *testing.T) { func TestCreateMachineStagesArtifactsAndPersistsState(t *testing.T) {
t.Parallel()
root := t.TempDir() root := t.TempDir()
cfg := testConfig(root) cfg := testConfig(root)
fileStore, err := store.NewFileStore(cfg.StatePath, cfg.OperationsPath) fileStore, err := store.NewFileStore(cfg.StatePath, cfg.OperationsPath)
@ -49,13 +51,18 @@ func TestCreateMachineStagesArtifactsAndPersistsState(t *testing.T) {
t.Fatalf("create file store: %v", err) t.Fatalf("create file store: %v", err)
} }
sshListener := listenTestPort(t, int(defaultSSHPort))
defer sshListener.Close()
vncListener := listenTestPort(t, int(defaultVNCPort))
defer vncListener.Close()
startedAt := time.Unix(1700000005, 0).UTC() startedAt := time.Unix(1700000005, 0).UTC()
runtime := &fakeRuntime{ runtime := &fakeRuntime{
bootState: firecracker.MachineState{ bootState: firecracker.MachineState{
ID: "vm-1", ID: "vm-1",
Phase: firecracker.PhaseRunning, Phase: firecracker.PhaseRunning,
PID: 4321, PID: 4321,
RuntimeHost: "172.16.0.2", RuntimeHost: "127.0.0.1",
SocketPath: filepath.Join(cfg.RuntimeDir, "machines", "vm-1", "root", "run", "firecracker.sock"), SocketPath: filepath.Join(cfg.RuntimeDir, "machines", "vm-1", "root", "run", "firecracker.sock"),
TapName: "fctap0", TapName: "fctap0",
StartedAt: &startedAt, StartedAt: &startedAt,
@ -95,12 +102,15 @@ func TestCreateMachineStagesArtifactsAndPersistsState(t *testing.T) {
if response.Machine.Phase != contracthost.MachinePhaseRunning { if response.Machine.Phase != contracthost.MachinePhaseRunning {
t.Fatalf("machine phase mismatch: got %q", response.Machine.Phase) t.Fatalf("machine phase mismatch: got %q", response.Machine.Phase)
} }
if response.Machine.RuntimeHost != "172.16.0.2" { if response.Machine.RuntimeHost != "127.0.0.1" {
t.Fatalf("runtime host mismatch: got %q", response.Machine.RuntimeHost) t.Fatalf("runtime host mismatch: got %q", response.Machine.RuntimeHost)
} }
if len(response.Machine.Ports) != 2 { if len(response.Machine.Ports) != 2 {
t.Fatalf("machine ports mismatch: got %d want 2", len(response.Machine.Ports)) t.Fatalf("machine ports mismatch: got %d want 2", len(response.Machine.Ports))
} }
if response.Machine.Ports[0].Port != defaultSSHPort || response.Machine.Ports[1].Port != defaultVNCPort {
t.Fatalf("machine ports mismatch: got %#v", response.Machine.Ports)
}
if runtime.bootCalls != 1 { if runtime.bootCalls != 1 {
t.Fatalf("boot call count mismatch: got %d want 1", runtime.bootCalls) t.Fatalf("boot call count mismatch: got %d want 1", runtime.bootCalls)
} }
@ -209,3 +219,27 @@ func testConfig(root string) appconfig.Config {
JailerBinaryPath: "/usr/bin/jailer", JailerBinaryPath: "/usr/bin/jailer",
} }
} }
func listenTestPort(t *testing.T, port int) net.Listener {
t.Helper()
listener, err := net.Listen("tcp", net.JoinHostPort("127.0.0.1", strconv.Itoa(port)))
if err != nil {
var bindErr *net.OpError
if errors.As(err, &bindErr) && strings.Contains(strings.ToLower(err.Error()), "address already in use") {
t.Skipf("port %d already in use", port)
}
t.Fatalf("listen on port %d: %v", port, err)
}
go func() {
for {
connection, err := listener.Accept()
if err != nil {
return
}
_ = connection.Close()
}
}()
return listener
}

View file

@ -0,0 +1,52 @@
package daemon
import (
"context"
"fmt"
"net"
"strconv"
"strings"
"time"
contracthost "github.com/getcompanion-ai/computer-host/contract"
)
func waitForGuestReady(ctx context.Context, host string, ports []contracthost.MachinePort) error {
host = strings.TrimSpace(host)
if host == "" {
return fmt.Errorf("guest runtime host is required")
}
waitContext, cancel := context.WithTimeout(ctx, defaultGuestReadyTimeout)
defer cancel()
for _, port := range ports {
if err := waitForGuestPort(waitContext, host, port); err != nil {
return err
}
}
return nil
}
func waitForGuestPort(ctx context.Context, host string, port contracthost.MachinePort) error {
address := net.JoinHostPort(host, strconv.Itoa(int(port.Port)))
dialer := net.Dialer{Timeout: defaultGuestDialTimeout}
ticker := time.NewTicker(defaultGuestReadyPollInterval)
defer ticker.Stop()
var lastErr error
for {
connection, err := dialer.DialContext(ctx, string(port.Protocol), address)
if err == nil {
_ = connection.Close()
return nil
}
lastErr = err
select {
case <-ctx.Done():
return fmt.Errorf("wait for guest port %q on %s: %w (last_err=%v)", port.Name, address, ctx.Err(), lastErr)
case <-ticker.C:
}
}
}

View file

@ -33,6 +33,8 @@ type driveRequest struct {
PathOnHost string `json:"path_on_host"` PathOnHost string `json:"path_on_host"`
} }
type entropyRequest struct{}
type faultResponse struct { type faultResponse struct {
FaultMessage string `json:"fault_message"` FaultMessage string `json:"fault_message"`
} }
@ -56,6 +58,10 @@ type networkInterfaceRequest struct {
IfaceID string `json:"iface_id"` IfaceID string `json:"iface_id"`
} }
type serialRequest struct {
SerialOutPath string `json:"serial_out_path"`
}
type vsockRequest struct { type vsockRequest struct {
GuestCID int64 `json:"guest_cid"` GuestCID int64 `json:"guest_cid"`
UDSPath string `json:"uds_path"` UDSPath string `json:"uds_path"`
@ -98,6 +104,10 @@ func (c *apiClient) PutDrive(ctx context.Context, drive driveRequest) error {
return c.do(ctx, http.MethodPut, endpoint, drive, nil, http.StatusNoContent) return c.do(ctx, http.MethodPut, endpoint, drive, nil, http.StatusNoContent)
} }
func (c *apiClient) PutEntropy(ctx context.Context) error {
return c.do(ctx, http.MethodPut, "/entropy", entropyRequest{}, nil, http.StatusNoContent)
}
func (c *apiClient) PutMachineConfig(ctx context.Context, spec MachineSpec) error { func (c *apiClient) PutMachineConfig(ctx context.Context, spec MachineSpec) error {
body := machineConfigRequest{ body := machineConfigRequest{
MemSizeMib: spec.MemoryMiB, MemSizeMib: spec.MemoryMiB,
@ -117,6 +127,17 @@ func (c *apiClient) PutNetworkInterface(ctx context.Context, network NetworkAllo
return c.do(ctx, http.MethodPut, endpoint, body, nil, http.StatusNoContent) return c.do(ctx, http.MethodPut, endpoint, body, nil, http.StatusNoContent)
} }
func (c *apiClient) PutSerial(ctx context.Context, serialOutPath string) error {
return c.do(
ctx,
http.MethodPut,
"/serial",
serialRequest{SerialOutPath: serialOutPath},
nil,
http.StatusNoContent,
)
}
func (c *apiClient) PutVsock(ctx context.Context, spec VsockSpec) error { func (c *apiClient) PutVsock(ctx context.Context, spec VsockSpec) error {
body := vsockRequest{ body := vsockRequest{
GuestCID: int64(spec.CID), GuestCID: int64(spec.CID),

View file

@ -0,0 +1,104 @@
package firecracker
import (
"context"
"io"
"net"
"net/http"
"path/filepath"
"testing"
)
type capturedRequest struct {
Method string
Path string
Body string
}
func TestConfigureMachineEnablesEntropyAndSerialBeforeStart(t *testing.T) {
var requests []capturedRequest
socketPath, shutdown := startUnixSocketServer(t, func(w http.ResponseWriter, r *http.Request) {
body, err := io.ReadAll(r.Body)
if err != nil {
t.Fatalf("read request body: %v", err)
}
requests = append(requests, capturedRequest{
Method: r.Method,
Path: r.URL.Path,
Body: string(body),
})
w.WriteHeader(http.StatusNoContent)
})
defer shutdown()
client := newAPIClient(socketPath)
spec := MachineSpec{
ID: "vm-1",
VCPUs: 1,
MemoryMiB: 512,
KernelImagePath: "/kernel",
RootFSPath: "/rootfs",
}
paths := machinePaths{
JailedSerialLogPath: "/logs/serial.log",
}
network := NetworkAllocation{
InterfaceID: defaultInterfaceID,
TapName: "fctap0",
GuestMAC: "06:00:ac:10:00:02",
}
if err := configureMachine(context.Background(), client, paths, spec, network); err != nil {
t.Fatalf("configure machine: %v", err)
}
gotPaths := make([]string, 0, len(requests))
for _, request := range requests {
gotPaths = append(gotPaths, request.Path)
}
wantPaths := []string{
"/machine-config",
"/boot-source",
"/drives/root_drive",
"/network-interfaces/net0",
"/entropy",
"/serial",
"/actions",
}
if len(gotPaths) != len(wantPaths) {
t.Fatalf("request count mismatch: got %d want %d (%v)", len(gotPaths), len(wantPaths), gotPaths)
}
for i := range wantPaths {
if gotPaths[i] != wantPaths[i] {
t.Fatalf("request %d mismatch: got %q want %q", i, gotPaths[i], wantPaths[i])
}
}
if requests[4].Body != "{}" {
t.Fatalf("entropy body mismatch: got %q", requests[4].Body)
}
if requests[5].Body != "{\"serial_out_path\":\"/logs/serial.log\"}" {
t.Fatalf("serial body mismatch: got %q", requests[5].Body)
}
}
func startUnixSocketServer(t *testing.T, handler http.HandlerFunc) (string, func()) {
t.Helper()
socketPath := filepath.Join(t.TempDir(), "firecracker.sock")
listener, err := net.Listen("unix", socketPath)
if err != nil {
t.Fatalf("listen on unix socket: %v", err)
}
server := &http.Server{Handler: handler}
go func() {
_ = server.Serve(listener)
}()
return socketPath, func() {
_ = server.Shutdown(context.Background())
_ = listener.Close()
}
}

View file

@ -15,12 +15,13 @@ import (
const ( const (
defaultCgroupVersion = "2" defaultCgroupVersion = "2"
defaultFirecrackerInitTimeout = 10 * time.Second defaultFirecrackerInitTimeout = 10 * time.Second
defaultFirecrackerLogLevel = "Warning"
defaultFirecrackerPollInterval = 10 * time.Millisecond defaultFirecrackerPollInterval = 10 * time.Millisecond
defaultRootDriveID = "root_drive" defaultRootDriveID = "root_drive"
defaultVSockRunDir = "/run" defaultVSockRunDir = "/run"
) )
func configureMachine(ctx context.Context, client *apiClient, spec MachineSpec, network NetworkAllocation) error { func configureMachine(ctx context.Context, client *apiClient, paths machinePaths, spec MachineSpec, network NetworkAllocation) error {
if err := client.PutMachineConfig(ctx, spec); err != nil { if err := client.PutMachineConfig(ctx, spec); err != nil {
return fmt.Errorf("put machine config: %w", err) return fmt.Errorf("put machine config: %w", err)
} }
@ -38,6 +39,12 @@ func configureMachine(ctx context.Context, client *apiClient, spec MachineSpec,
if err := client.PutNetworkInterface(ctx, network); err != nil { if err := client.PutNetworkInterface(ctx, network); err != nil {
return fmt.Errorf("put network interface: %w", err) return fmt.Errorf("put network interface: %w", err)
} }
if err := client.PutEntropy(ctx); err != nil {
return fmt.Errorf("put entropy device: %w", err)
}
if err := client.PutSerial(ctx, paths.JailedSerialLogPath); err != nil {
return fmt.Errorf("put serial device: %w", err)
}
if spec.Vsock != nil { if spec.Vsock != nil {
if err := client.PutVsock(ctx, *spec.Vsock); err != nil { if err := client.PutVsock(ctx, *spec.Vsock); err != nil {
return fmt.Errorf("put vsock: %w", err) return fmt.Errorf("put vsock: %w", err)
@ -58,14 +65,21 @@ func launchJailedFirecracker(paths machinePaths, machineID MachineID, firecracke
"--exec-file", firecrackerBinaryPath, "--exec-file", firecrackerBinaryPath,
"--cgroup-version", defaultCgroupVersion, "--cgroup-version", defaultCgroupVersion,
"--chroot-base-dir", paths.JailerBaseDir, "--chroot-base-dir", paths.JailerBaseDir,
"--daemonize",
"--new-pid-ns",
"--", "--",
"--api-sock", defaultFirecrackerSocketPath, "--api-sock", defaultFirecrackerSocketPath,
"--log-path", paths.JailedFirecrackerLogPath,
"--level", defaultFirecrackerLogLevel,
"--show-level",
"--show-log-origin",
) )
command.Stdout = os.Stderr
command.Stderr = os.Stderr
if err := command.Start(); err != nil { if err := command.Start(); err != nil {
return nil, fmt.Errorf("start jailer: %w", err) return nil, fmt.Errorf("start jailer: %w", err)
} }
go func() {
_ = command.Wait()
}()
return command, nil return command, nil
} }
@ -171,7 +185,50 @@ func cleanupStartedProcess(command *exec.Cmd) {
return return
} }
_ = command.Process.Kill() _ = command.Process.Kill()
_ = command.Wait() }
func readPIDFile(pidFilePath string) (int, error) {
payload, err := os.ReadFile(pidFilePath)
if err != nil {
return 0, err
}
pid, err := strconv.Atoi(strings.TrimSpace(string(payload)))
if err != nil {
return 0, fmt.Errorf("parse pid file %q: %w", pidFilePath, err)
}
if pid < 1 {
return 0, fmt.Errorf("pid file %q must contain a positive pid", pidFilePath)
}
return pid, nil
}
func waitForPIDFile(ctx context.Context, pidFilePath string) (int, error) {
waitContext, cancel := context.WithTimeout(ctx, defaultFirecrackerInitTimeout)
defer cancel()
ticker := time.NewTicker(defaultFirecrackerPollInterval)
defer ticker.Stop()
var lastErr error
for {
select {
case <-waitContext.Done():
if lastErr != nil {
return 0, fmt.Errorf("%w (pid_file=%q last_err=%v)", waitContext.Err(), pidFilePath, lastErr)
}
return 0, fmt.Errorf("%w (pid_file=%q)", waitContext.Err(), pidFilePath)
case <-ticker.C:
pid, err := readPIDFile(pidFilePath)
if err == nil {
return pid, nil
}
lastErr = err
if os.IsNotExist(err) {
continue
}
return 0, err
}
}
} }
func hostVSockPath(paths machinePaths, spec MachineSpec) string { func hostVSockPath(paths machinePaths, spec MachineSpec) string {

View file

@ -0,0 +1,95 @@
package firecracker
import (
"context"
"os"
"path/filepath"
"strings"
"testing"
"time"
)
func TestLaunchJailedFirecrackerPassesDaemonAndLoggingFlags(t *testing.T) {
root := t.TempDir()
argsPath := filepath.Join(root, "args.txt")
jailerPath := filepath.Join(root, "fake-jailer.sh")
script := "#!/bin/sh\nprintf '%s\n' \"$@\" > " + shellQuote(argsPath) + "\n"
if err := os.WriteFile(jailerPath, []byte(script), 0o755); err != nil {
t.Fatalf("write fake jailer: %v", err)
}
paths, err := buildMachinePaths(root, "vm-1", "/usr/bin/firecracker")
if err != nil {
t.Fatalf("build machine paths: %v", err)
}
if err := os.MkdirAll(paths.LogDir, 0o755); err != nil {
t.Fatalf("create log dir: %v", err)
}
if _, err := launchJailedFirecracker(paths, "vm-1", "/usr/bin/firecracker", jailerPath); err != nil {
t.Fatalf("launch jailed firecracker: %v", err)
}
args := waitForFileContents(t, argsPath)
for _, want := range []string{
"--daemonize",
"--new-pid-ns",
"--log-path",
paths.JailedFirecrackerLogPath,
"--show-level",
"--show-log-origin",
} {
if !containsLine(args, want) {
t.Fatalf("missing launch argument %q in %v", want, args)
}
}
}
func TestWaitForPIDFileReadsPID(t *testing.T) {
pidFilePath := filepath.Join(t.TempDir(), "firecracker.pid")
if err := os.WriteFile(pidFilePath, []byte("4321\n"), 0o644); err != nil {
t.Fatalf("write pid file: %v", err)
}
pid, err := waitForPIDFile(context.Background(), pidFilePath)
if err != nil {
t.Fatalf("wait for pid file: %v", err)
}
if pid != 4321 {
t.Fatalf("pid mismatch: got %d want %d", pid, 4321)
}
}
func waitForFileContents(t *testing.T, path string) []string {
t.Helper()
timeout := time.NewTimer(2 * time.Second)
defer timeout.Stop()
ticker := time.NewTicker(10 * time.Millisecond)
defer ticker.Stop()
for {
payload, err := os.ReadFile(path)
if err == nil {
return strings.Split(strings.TrimSpace(string(payload)), "\n")
}
select {
case <-timeout.C:
t.Fatalf("timed out waiting for %q", path)
case <-ticker.C:
}
}
}
func containsLine(lines []string, want string) bool {
for _, line := range lines {
if line == want {
return true
}
}
return false
}
func shellQuote(value string) string {
return "'" + strings.ReplaceAll(value, "'", "'\"'\"'") + "'"
}

View file

@ -2,6 +2,7 @@ package firecracker
import ( import (
"fmt" "fmt"
"path"
"path/filepath" "path/filepath"
"strconv" "strconv"
"strings" "strings"
@ -9,7 +10,10 @@ import (
const ( const (
defaultChrootRootDirName = "root" defaultChrootRootDirName = "root"
defaultLogDirName = "logs"
defaultSerialLogName = "serial.log"
defaultFirecrackerSocketDir = "run" defaultFirecrackerSocketDir = "run"
defaultFirecrackerLogName = "firecracker.log"
defaultFirecrackerSocketName = "firecracker.socket" defaultFirecrackerSocketName = "firecracker.socket"
defaultFirecrackerSocketPath = "/run/firecracker.socket" defaultFirecrackerSocketPath = "/run/firecracker.socket"
) )
@ -18,6 +22,12 @@ type machinePaths struct {
BaseDir string BaseDir string
ChrootRootDir string ChrootRootDir string
JailerBaseDir string JailerBaseDir string
LogDir string
FirecrackerLogPath string
JailedFirecrackerLogPath string
SerialLogPath string
JailedSerialLogPath string
PIDFilePath string
SocketPath string SocketPath string
} }
@ -38,11 +48,18 @@ func buildMachinePaths(rootDir string, id MachineID, firecrackerBinaryPath strin
baseDir := filepath.Join(rootDir, "machines", string(id)) baseDir := filepath.Join(rootDir, "machines", string(id))
jailerBaseDir := filepath.Join(baseDir, "jailer") jailerBaseDir := filepath.Join(baseDir, "jailer")
chrootRootDir := filepath.Join(jailerBaseDir, binName, string(id), defaultChrootRootDirName) chrootRootDir := filepath.Join(jailerBaseDir, binName, string(id), defaultChrootRootDirName)
logDir := filepath.Join(chrootRootDir, defaultLogDirName)
return machinePaths{ return machinePaths{
BaseDir: baseDir, BaseDir: baseDir,
ChrootRootDir: chrootRootDir, ChrootRootDir: chrootRootDir,
JailerBaseDir: jailerBaseDir, JailerBaseDir: jailerBaseDir,
LogDir: logDir,
FirecrackerLogPath: filepath.Join(logDir, defaultFirecrackerLogName),
JailedFirecrackerLogPath: path.Join("/", defaultLogDirName, defaultFirecrackerLogName),
SerialLogPath: filepath.Join(logDir, defaultSerialLogName),
JailedSerialLogPath: path.Join("/", defaultLogDirName, defaultSerialLogName),
PIDFilePath: filepath.Join(chrootRootDir, binName+".pid"),
SocketPath: filepath.Join(chrootRootDir, defaultFirecrackerSocketDir, defaultFirecrackerSocketName), SocketPath: filepath.Join(chrootRootDir, defaultFirecrackerSocketDir, defaultFirecrackerSocketName),
}, nil }, nil
} }

View file

@ -69,11 +69,11 @@ func (r *Runtime) Boot(ctx context.Context, spec MachineSpec, usedNetworks []Net
return nil, err return nil, err
} }
cleanup := func(network NetworkAllocation, paths machinePaths, command *exec.Cmd) { cleanup := func(network NetworkAllocation, paths machinePaths, command *exec.Cmd, firecrackerPID int) {
if preserveFailureArtifacts() { if preserveFailureArtifacts() {
fmt.Fprintf(os.Stderr, "firecracker debug: preserving failure artifacts machine=%s pid=%d socket=%s base=%s\n", spec.ID, pidOf(command), paths.SocketPath, paths.BaseDir)
return return
} }
cleanupRunningProcess(firecrackerPID)
cleanupStartedProcess(command) cleanupStartedProcess(command)
_ = r.networkProvisioner.Remove(context.Background(), network) _ = r.networkProvisioner.Remove(context.Background(), network)
if paths.BaseDir != "" { if paths.BaseDir != "" {
@ -88,55 +88,51 @@ func (r *Runtime) Boot(ctx context.Context, spec MachineSpec, usedNetworks []Net
paths, err := buildMachinePaths(r.rootDir, spec.ID, r.firecrackerBinaryPath) paths, err := buildMachinePaths(r.rootDir, spec.ID, r.firecrackerBinaryPath)
if err != nil { if err != nil {
cleanup(network, machinePaths{}, nil) cleanup(network, machinePaths{}, nil, 0)
return nil, err return nil, err
} }
if err := os.MkdirAll(paths.JailerBaseDir, 0o755); err != nil { if err := os.MkdirAll(paths.LogDir, 0o755); err != nil {
cleanup(network, paths, nil) cleanup(network, paths, nil, 0)
return nil, fmt.Errorf("create machine jailer dir %q: %w", paths.JailerBaseDir, err) return nil, fmt.Errorf("create machine log dir %q: %w", paths.LogDir, err)
} }
if err := r.networkProvisioner.Ensure(ctx, network); err != nil { if err := r.networkProvisioner.Ensure(ctx, network); err != nil {
cleanup(network, paths, nil) cleanup(network, paths, nil, 0)
return nil, err return nil, err
} }
command, err := launchJailedFirecracker(paths, spec.ID, r.firecrackerBinaryPath, r.jailerBinaryPath) command, err := launchJailedFirecracker(paths, spec.ID, r.firecrackerBinaryPath, r.jailerBinaryPath)
if err != nil { if err != nil {
cleanup(network, paths, nil) cleanup(network, paths, nil, 0)
return nil, err return nil, err
} }
socketPath := paths.SocketPath firecrackerPID, err := waitForPIDFile(ctx, paths.PIDFilePath)
if pid := pidOf(command); pid > 0 { if err != nil {
socketPath = procSocketPath(pid) cleanup(network, paths, command, 0)
return nil, fmt.Errorf("wait for firecracker pid: %w", err)
} }
fmt.Fprintf(os.Stderr, "firecracker debug: launched machine=%s pid=%d socket=%s jailer_base=%s\n", spec.ID, pidOf(command), socketPath, paths.JailerBaseDir)
socketPath := procSocketPath(firecrackerPID)
client := newAPIClient(socketPath) client := newAPIClient(socketPath)
if err := waitForSocket(ctx, client, socketPath); err != nil { if err := waitForSocket(ctx, client, socketPath); err != nil {
cleanup(network, paths, command) cleanup(network, paths, command, firecrackerPID)
return nil, fmt.Errorf("wait for firecracker socket: %w", err) return nil, fmt.Errorf("wait for firecracker socket: %w", err)
} }
jailedSpec, err := stageMachineFiles(spec, paths) jailedSpec, err := stageMachineFiles(spec, paths)
if err != nil { if err != nil {
cleanup(network, paths, command) cleanup(network, paths, command, firecrackerPID)
return nil, err return nil, err
} }
if err := configureMachine(ctx, client, jailedSpec, network); err != nil { if err := configureMachine(ctx, client, paths, jailedSpec, network); err != nil {
cleanup(network, paths, command) cleanup(network, paths, command, firecrackerPID)
return nil, err return nil, err
} }
pid := 0
if command.Process != nil {
pid = command.Process.Pid
}
now := time.Now().UTC() now := time.Now().UTC()
state := MachineState{ state := MachineState{
ID: spec.ID, ID: spec.ID,
Phase: PhaseRunning, Phase: PhaseRunning,
PID: pid, PID: firecrackerPID,
RuntimeHost: network.GuestIP().String(), RuntimeHost: network.GuestIP().String(),
SocketPath: socketPath, SocketPath: socketPath,
TapName: network.TapName, TapName: network.TapName,
@ -214,11 +210,15 @@ func processExists(pid int) bool {
return err == nil || err == syscall.EPERM return err == nil || err == syscall.EPERM
} }
func pidOf(command *exec.Cmd) int { func cleanupRunningProcess(pid int) {
if command == nil || command.Process == nil { if pid < 1 {
return 0 return
} }
return command.Process.Pid process, err := os.FindProcess(pid)
if err != nil {
return
}
_ = process.Kill()
} }
func preserveFailureArtifacts() bool { func preserveFailureArtifacts() bool {