From eb9d2a76df5b2c4aeabcfc392400ea4c69ddd47f Mon Sep 17 00:00:00 2001 From: Harivansh Rathi Date: Fri, 10 Apr 2026 04:04:08 +0000 Subject: [PATCH] feat: nvme disk on m6 --- internal/config/config.go | 42 ++++++++++++++++ internal/config/config_test.go | 76 ++++++++++++++++++++++++++++- internal/daemon/create.go | 12 +++-- internal/daemon/daemon.go | 27 +++++++--- internal/daemon/daemon_test.go | 17 +++++++ internal/daemon/files.go | 41 +++++++++++++++- internal/firecracker/launch.go | 11 +++-- internal/firecracker/launch_test.go | 29 ++++++++++- internal/firecracker/runtime.go | 7 ++- 9 files changed, 240 insertions(+), 22 deletions(-) diff --git a/internal/config/config.go b/internal/config/config.go index 8059457..510d28e 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -4,6 +4,7 @@ import ( "fmt" "os" "path/filepath" + "strconv" "strings" "github.com/getcompanion-ai/computer-host/internal/firecracker" @@ -21,6 +22,8 @@ type Config struct { SnapshotsDir string RuntimeDir string DiskCloneMode DiskCloneMode + DriveIOEngine firecracker.DriveIOEngine + EnablePCI bool SocketPath string HTTPAddr string EgressInterface string @@ -42,6 +45,10 @@ const ( // Load loads and validates the firecracker-host daemon configuration from the environment. func Load() (Config, error) { rootDir := filepath.Clean(strings.TrimSpace(os.Getenv("FIRECRACKER_HOST_ROOT_DIR"))) + enablePCI, err := loadBool("FIRECRACKER_HOST_ENABLE_PCI") + if err != nil { + return Config{}, err + } cfg := Config{ RootDir: rootDir, StatePath: filepath.Join(rootDir, "state", "state.json"), @@ -51,6 +58,8 @@ func Load() (Config, error) { SnapshotsDir: filepath.Join(rootDir, "snapshots"), RuntimeDir: filepath.Join(rootDir, "runtime"), DiskCloneMode: loadDiskCloneMode(os.Getenv("FIRECRACKER_HOST_DISK_CLONE_MODE")), + DriveIOEngine: loadDriveIOEngine(os.Getenv("FIRECRACKER_HOST_DRIVE_IO_ENGINE")), + EnablePCI: enablePCI, SocketPath: filepath.Join(rootDir, defaultSocketName), HTTPAddr: strings.TrimSpace(os.Getenv("FIRECRACKER_HOST_HTTP_ADDR")), EgressInterface: strings.TrimSpace(os.Getenv("FIRECRACKER_HOST_EGRESS_INTERFACE")), @@ -96,6 +105,9 @@ func (c Config) Validate() error { if err := c.DiskCloneMode.Validate(); err != nil { return err } + if err := validateDriveIOEngine(c.DriveIOEngine); err != nil { + return err + } if strings.TrimSpace(c.SocketPath) == "" { return fmt.Errorf("socket path is required") } @@ -112,6 +124,7 @@ func (c Config) FirecrackerRuntimeConfig() firecracker.RuntimeConfig { EgressInterface: c.EgressInterface, FirecrackerBinaryPath: c.FirecrackerBinaryPath, JailerBinaryPath: c.JailerBinaryPath, + EnablePCI: c.EnablePCI, } } @@ -132,3 +145,32 @@ func (m DiskCloneMode) Validate() error { return fmt.Errorf("FIRECRACKER_HOST_DISK_CLONE_MODE must be %q or %q", DiskCloneModeReflink, DiskCloneModeCopy) } } + +func loadDriveIOEngine(raw string) firecracker.DriveIOEngine { + value := strings.TrimSpace(raw) + if value == "" { + return firecracker.DriveIOEngineSync + } + return firecracker.DriveIOEngine(value) +} + +func validateDriveIOEngine(engine firecracker.DriveIOEngine) error { + switch engine { + case firecracker.DriveIOEngineSync, firecracker.DriveIOEngineAsync: + return nil + default: + return fmt.Errorf("FIRECRACKER_HOST_DRIVE_IO_ENGINE must be %q or %q", firecracker.DriveIOEngineSync, firecracker.DriveIOEngineAsync) + } +} + +func loadBool(name string) (bool, error) { + value := strings.TrimSpace(os.Getenv(name)) + if value == "" { + return false, nil + } + parsed, err := strconv.ParseBool(value) + if err != nil { + return false, fmt.Errorf("%s must be a boolean: %w", name, err) + } + return parsed, nil +} diff --git a/internal/config/config_test.go b/internal/config/config_test.go index 3c15434..28bf173 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -1,6 +1,10 @@ package config -import "testing" +import ( + "testing" + + "github.com/getcompanion-ai/computer-host/internal/firecracker" +) func TestLoadDiskCloneModeDefaultsToReflink(t *testing.T) { t.Parallel() @@ -38,3 +42,73 @@ func TestDiskCloneModeValidate(t *testing.T) { }) } } + +func TestLoadDriveIOEngineDefaultsToSync(t *testing.T) { + t.Parallel() + + if got := loadDriveIOEngine(""); got != firecracker.DriveIOEngineSync { + t.Fatalf("drive io engine = %q, want %q", got, firecracker.DriveIOEngineSync) + } +} + +func TestValidateDriveIOEngine(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + engine firecracker.DriveIOEngine + wantErr bool + }{ + {name: "sync", engine: firecracker.DriveIOEngineSync}, + {name: "async", engine: firecracker.DriveIOEngineAsync}, + {name: "empty", engine: "", wantErr: true}, + {name: "unknown", engine: "Aio", wantErr: true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + err := validateDriveIOEngine(tt.engine) + if tt.wantErr && err == nil { + t.Fatal("validateDriveIOEngine() error = nil, want error") + } + if !tt.wantErr && err != nil { + t.Fatalf("validateDriveIOEngine() error = %v, want nil", err) + } + }) + } +} + +func TestLoadBool(t *testing.T) { + tests := []struct { + name string + value string + want bool + wantErr bool + }{ + {name: "default"}, + {name: "true", value: "true", want: true}, + {name: "false", value: "false"}, + {name: "one", value: "1", want: true}, + {name: "invalid", value: "enabled", wantErr: true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + envName := "TEST_FIRECRACKER_BOOL" + t.Setenv(envName, tt.value) + + got, err := loadBool(envName) + if tt.wantErr && err == nil { + t.Fatal("loadBool() error = nil, want error") + } + if !tt.wantErr && err != nil { + t.Fatalf("loadBool() error = %v, want nil", err) + } + if got != tt.want { + t.Fatalf("loadBool() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/internal/daemon/create.go b/internal/daemon/create.go index 26fa435..297dcee 100644 --- a/internal/daemon/create.go +++ b/internal/daemon/create.go @@ -159,9 +159,11 @@ func (d *Daemon) buildMachineSpec(machineID contracthost.MachineID, artifact *mo drives := make([]firecracker.DriveSpec, 0, len(userVolumes)) for i, volume := range userVolumes { drives = append(drives, firecracker.DriveSpec{ - ID: fmt.Sprintf("user-%d", i), - Path: volume.Path, - ReadOnly: false, + ID: fmt.Sprintf("user-%d", i), + Path: volume.Path, + ReadOnly: false, + CacheType: firecracker.DriveCacheTypeUnsafe, + IOEngine: d.config.DriveIOEngine, }) } @@ -179,9 +181,9 @@ func (d *Daemon) buildMachineSpec(machineID contracthost.MachineID, artifact *mo ID: "root_drive", Path: systemVolumePath, CacheType: firecracker.DriveCacheTypeUnsafe, - IOEngine: firecracker.DriveIOEngineSync, + IOEngine: d.config.DriveIOEngine, }, - KernelArgs: defaultGuestKernelArgs, + KernelArgs: guestKernelArgs(d.config.EnablePCI), Drives: drives, MMDS: mmds, Vsock: guestVsockSpec(machineID), diff --git a/internal/daemon/daemon.go b/internal/daemon/daemon.go index a6ad5eb..bcd8192 100644 --- a/internal/daemon/daemon.go +++ b/internal/daemon/daemon.go @@ -8,21 +8,22 @@ import ( "sync" "time" - contracthost "github.com/getcompanion-ai/computer-host/contract" appconfig "github.com/getcompanion-ai/computer-host/internal/config" "github.com/getcompanion-ai/computer-host/internal/firecracker" "github.com/getcompanion-ai/computer-host/internal/model" "github.com/getcompanion-ai/computer-host/internal/store" + contracthost "github.com/getcompanion-ai/computer-host/contract" ) const ( - defaultGuestKernelArgs = "console=ttyS0 reboot=k panic=1 pci=off" - defaultGuestMemoryMiB = int64(3072) - defaultGuestVCPUs = int64(2) - defaultSSHPort = uint16(2222) - defaultVNCPort = uint16(6080) - defaultCopyBufferSize = 1024 * 1024 - defaultGuestDialTimeout = 500 * time.Millisecond + defaultGuestKernelArgs = "console=ttyS0 reboot=k panic=1" + defaultGuestKernelArgsNoPCI = defaultGuestKernelArgs + " pci=off" + defaultGuestMemoryMiB = int64(3072) + defaultGuestVCPUs = int64(2) + defaultSSHPort = uint16(2222) + defaultVNCPort = uint16(6080) + defaultCopyBufferSize = 1024 * 1024 + defaultGuestDialTimeout = 500 * time.Millisecond ) type Runtime interface { @@ -73,6 +74,9 @@ func New(cfg appconfig.Config, store store.Store, runtime Runtime) (*Daemon, err return nil, fmt.Errorf("create daemon dir %q: %w", dir, err) } } + if err := validateDiskCloneBackend(cfg); err != nil { + return nil, err + } daemon := &Daemon{ config: cfg, store: store, @@ -127,3 +131,10 @@ func (d *Daemon) lockArtifact(key string) func() { lock.Lock() return lock.Unlock } + +func guestKernelArgs(enablePCI bool) string { + if enablePCI { + return defaultGuestKernelArgs + } + return defaultGuestKernelArgsNoPCI +} diff --git a/internal/daemon/daemon_test.go b/internal/daemon/daemon_test.go index 1b41904..c0eb297 100644 --- a/internal/daemon/daemon_test.go +++ b/internal/daemon/daemon_test.go @@ -758,6 +758,7 @@ func testConfig(root string) appconfig.Config { SnapshotsDir: filepath.Join(root, "snapshots"), RuntimeDir: filepath.Join(root, "runtime"), DiskCloneMode: appconfig.DiskCloneModeCopy, + DriveIOEngine: firecracker.DriveIOEngineSync, SocketPath: filepath.Join(root, "firecracker-host.sock"), EgressInterface: "eth0", FirecrackerBinaryPath: "/usr/bin/firecracker", @@ -765,6 +766,22 @@ func testConfig(root string) appconfig.Config { } } +func TestGuestKernelArgsDisablesPCIByDefault(t *testing.T) { + t.Parallel() + + if got := guestKernelArgs(false); !strings.Contains(got, "pci=off") { + t.Fatalf("guestKernelArgs(false) = %q, want pci=off", got) + } +} + +func TestGuestKernelArgsRemovesPCIOffWhenPCIEnabled(t *testing.T) { + t.Parallel() + + if got := guestKernelArgs(true); strings.Contains(got, "pci=off") { + t.Fatalf("guestKernelArgs(true) = %q, want no pci=off", got) + } +} + func stubGuestSSHPublicKeyReader(hostDaemon *Daemon) { hostDaemon.readGuestSSHPublicKey = func(context.Context, string) (string, error) { return "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIO0j1AyW0mQm9a1G2rY0R4fP2G5+4Qx2V3FJ9P2mA6N3", nil diff --git a/internal/daemon/files.go b/internal/daemon/files.go index 9d56182..5e0b442 100644 --- a/internal/daemon/files.go +++ b/internal/daemon/files.go @@ -97,7 +97,10 @@ func cloneFile(source string, target string) error { func cloneDiskFile(source string, target string, mode appconfig.DiskCloneMode) error { switch mode { case appconfig.DiskCloneModeReflink: - return reflinkFile(source, target) + if err := reflinkFile(source, target); err != nil { + return reflinkRequiredError(err) + } + return nil case appconfig.DiskCloneModeCopy: return cloneFile(source, target) default: @@ -105,6 +108,42 @@ func cloneDiskFile(source string, target string, mode appconfig.DiskCloneMode) e } } +func validateDiskCloneBackend(cfg appconfig.Config) error { + if cfg.DiskCloneMode != appconfig.DiskCloneModeReflink { + return nil + } + + sourceFile, err := os.CreateTemp(cfg.ArtifactsDir, ".reflink-probe-source-*") + if err != nil { + return fmt.Errorf("create reflink probe source in %q: %w", cfg.ArtifactsDir, err) + } + sourcePath := sourceFile.Name() + defer func() { + _ = os.Remove(sourcePath) + }() + + if _, err := sourceFile.WriteString("reflink-probe"); err != nil { + _ = sourceFile.Close() + return fmt.Errorf("write reflink probe source %q: %w", sourcePath, err) + } + if err := sourceFile.Close(); err != nil { + return fmt.Errorf("close reflink probe source %q: %w", sourcePath, err) + } + + targetPath := filepath.Join(cfg.MachineDisksDir, "."+filepath.Base(sourcePath)+".target") + defer func() { + _ = os.Remove(targetPath) + }() + if err := cloneDiskFile(sourcePath, targetPath, cfg.DiskCloneMode); err != nil { + return fmt.Errorf("validate disk clone backend from artifacts dir %q to machine disks dir %q: %w", cfg.ArtifactsDir, cfg.MachineDisksDir, err) + } + return nil +} + +func reflinkRequiredError(err error) error { + return fmt.Errorf("FIRECRACKER_HOST_DISK_CLONE_MODE=reflink requires a CoW filesystem with reflink support across artifacts and machine-disks; mount FIRECRACKER_HOST_ROOT_DIR on XFS with reflink=1 or Btrfs, preferably on local NVMe, or set FIRECRACKER_HOST_DISK_CLONE_MODE=copy for the slow full-copy fallback: %w", err) +} + func reflinkFile(source string, target string) error { if err := os.MkdirAll(filepath.Dir(target), 0o755); err != nil { return fmt.Errorf("create target dir for %q: %w", target, err) diff --git a/internal/firecracker/launch.go b/internal/firecracker/launch.go index 3615655..2cceddb 100644 --- a/internal/firecracker/launch.go +++ b/internal/firecracker/launch.go @@ -66,9 +66,8 @@ func configureMachine(ctx context.Context, client *apiClient, paths machinePaths return nil } -func launchJailedFirecracker(paths machinePaths, machineID MachineID, firecrackerBinaryPath string, jailerBinaryPath string) (*exec.Cmd, error) { - command := exec.Command( - jailerBinaryPath, +func launchJailedFirecracker(paths machinePaths, machineID MachineID, firecrackerBinaryPath string, jailerBinaryPath string, enablePCI bool) (*exec.Cmd, error) { + args := []string{ "--id", string(machineID), "--uid", strconv.Itoa(os.Getuid()), "--gid", strconv.Itoa(os.Getgid()), @@ -83,7 +82,11 @@ func launchJailedFirecracker(paths machinePaths, machineID MachineID, firecracke "--level", defaultFirecrackerLogLevel, "--show-level", "--show-log-origin", - ) + } + if enablePCI { + args = append(args, "--enable-pci") + } + command := exec.Command(jailerBinaryPath, args...) if err := command.Start(); err != nil { return nil, fmt.Errorf("start jailer: %w", err) } diff --git a/internal/firecracker/launch_test.go b/internal/firecracker/launch_test.go index 6af2bee..76756b3 100644 --- a/internal/firecracker/launch_test.go +++ b/internal/firecracker/launch_test.go @@ -26,7 +26,7 @@ func TestLaunchJailedFirecrackerPassesDaemonAndLoggingFlags(t *testing.T) { t.Fatalf("create log dir: %v", err) } - if _, err := launchJailedFirecracker(paths, "vm-1", "/usr/bin/firecracker", jailerPath); err != nil { + if _, err := launchJailedFirecracker(paths, "vm-1", "/usr/bin/firecracker", jailerPath, false); err != nil { t.Fatalf("launch jailed firecracker: %v", err) } @@ -45,6 +45,33 @@ func TestLaunchJailedFirecrackerPassesDaemonAndLoggingFlags(t *testing.T) { } } +func TestLaunchJailedFirecrackerPassesEnablePCIWhenConfigured(t *testing.T) { + root := t.TempDir() + argsPath := filepath.Join(root, "args.txt") + jailerPath := filepath.Join(root, "fake-jailer.sh") + script := "#!/bin/sh\nprintf '%s\n' \"$@\" > " + shellQuote(argsPath) + "\n" + if err := os.WriteFile(jailerPath, []byte(script), 0o755); err != nil { + t.Fatalf("write fake jailer: %v", err) + } + + paths, err := buildMachinePaths(root, "vm-1", "/usr/bin/firecracker") + if err != nil { + t.Fatalf("build machine paths: %v", err) + } + if err := os.MkdirAll(paths.LogDir, 0o755); err != nil { + t.Fatalf("create log dir: %v", err) + } + + if _, err := launchJailedFirecracker(paths, "vm-1", "/usr/bin/firecracker", jailerPath, true); err != nil { + t.Fatalf("launch jailed firecracker: %v", err) + } + + args := waitForFileContents(t, argsPath) + if !containsLine(args, "--enable-pci") { + t.Fatalf("missing launch argument %q in %v", "--enable-pci", args) + } +} + func TestWaitForPIDFileReadsPID(t *testing.T) { pidFilePath := filepath.Join(t.TempDir(), "firecracker.pid") if err := os.WriteFile(pidFilePath, []byte("4321\n"), 0o644); err != nil { diff --git a/internal/firecracker/runtime.go b/internal/firecracker/runtime.go index ec04de6..d6d5b16 100644 --- a/internal/firecracker/runtime.go +++ b/internal/firecracker/runtime.go @@ -24,12 +24,14 @@ type RuntimeConfig struct { EgressInterface string FirecrackerBinaryPath string JailerBinaryPath string + EnablePCI bool } type Runtime struct { rootDir string firecrackerBinaryPath string jailerBinaryPath string + enablePCI bool networkAllocator *NetworkAllocator networkProvisioner NetworkProvisioner } @@ -69,6 +71,7 @@ func NewRuntime(cfg RuntimeConfig) (*Runtime, error) { rootDir: rootDir, firecrackerBinaryPath: firecrackerBinaryPath, jailerBinaryPath: jailerBinaryPath, + enablePCI: cfg.EnablePCI, networkAllocator: allocator, networkProvisioner: NewIPTapProvisioner(defaultNetworkCIDR, egressInterface), }, nil @@ -110,7 +113,7 @@ func (r *Runtime) Boot(ctx context.Context, spec MachineSpec, usedNetworks []Net return nil, err } - command, err := launchJailedFirecracker(paths, spec.ID, r.firecrackerBinaryPath, r.jailerBinaryPath) + command, err := launchJailedFirecracker(paths, spec.ID, r.firecrackerBinaryPath, r.jailerBinaryPath, r.enablePCI) if err != nil { cleanup(network, paths, nil, 0) return nil, err @@ -277,7 +280,7 @@ func (r *Runtime) RestoreBoot(ctx context.Context, loadSpec SnapshotLoadSpec, us return nil, err } - command, err := launchJailedFirecracker(paths, loadSpec.ID, r.firecrackerBinaryPath, r.jailerBinaryPath) + command, err := launchJailedFirecracker(paths, loadSpec.ID, r.firecrackerBinaryPath, r.jailerBinaryPath, r.enablePCI) if err != nil { cleanup(network, paths, nil, 0) return nil, err