feat: nvme disk on m6

This commit is contained in:
Harivansh Rathi 2026-04-10 04:04:08 +00:00
parent 54a4c423a6
commit eb9d2a76df
9 changed files with 240 additions and 22 deletions

View file

@ -4,6 +4,7 @@ import (
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
"github.com/getcompanion-ai/computer-host/internal/firecracker"
@ -21,6 +22,8 @@ type Config struct {
SnapshotsDir string
RuntimeDir string
DiskCloneMode DiskCloneMode
DriveIOEngine firecracker.DriveIOEngine
EnablePCI bool
SocketPath string
HTTPAddr string
EgressInterface string
@ -42,6 +45,10 @@ const (
// Load loads and validates the firecracker-host daemon configuration from the environment.
func Load() (Config, error) {
rootDir := filepath.Clean(strings.TrimSpace(os.Getenv("FIRECRACKER_HOST_ROOT_DIR")))
enablePCI, err := loadBool("FIRECRACKER_HOST_ENABLE_PCI")
if err != nil {
return Config{}, err
}
cfg := Config{
RootDir: rootDir,
StatePath: filepath.Join(rootDir, "state", "state.json"),
@ -51,6 +58,8 @@ func Load() (Config, error) {
SnapshotsDir: filepath.Join(rootDir, "snapshots"),
RuntimeDir: filepath.Join(rootDir, "runtime"),
DiskCloneMode: loadDiskCloneMode(os.Getenv("FIRECRACKER_HOST_DISK_CLONE_MODE")),
DriveIOEngine: loadDriveIOEngine(os.Getenv("FIRECRACKER_HOST_DRIVE_IO_ENGINE")),
EnablePCI: enablePCI,
SocketPath: filepath.Join(rootDir, defaultSocketName),
HTTPAddr: strings.TrimSpace(os.Getenv("FIRECRACKER_HOST_HTTP_ADDR")),
EgressInterface: strings.TrimSpace(os.Getenv("FIRECRACKER_HOST_EGRESS_INTERFACE")),
@ -96,6 +105,9 @@ func (c Config) Validate() error {
if err := c.DiskCloneMode.Validate(); err != nil {
return err
}
if err := validateDriveIOEngine(c.DriveIOEngine); err != nil {
return err
}
if strings.TrimSpace(c.SocketPath) == "" {
return fmt.Errorf("socket path is required")
}
@ -112,6 +124,7 @@ func (c Config) FirecrackerRuntimeConfig() firecracker.RuntimeConfig {
EgressInterface: c.EgressInterface,
FirecrackerBinaryPath: c.FirecrackerBinaryPath,
JailerBinaryPath: c.JailerBinaryPath,
EnablePCI: c.EnablePCI,
}
}
@ -132,3 +145,32 @@ func (m DiskCloneMode) Validate() error {
return fmt.Errorf("FIRECRACKER_HOST_DISK_CLONE_MODE must be %q or %q", DiskCloneModeReflink, DiskCloneModeCopy)
}
}
func loadDriveIOEngine(raw string) firecracker.DriveIOEngine {
value := strings.TrimSpace(raw)
if value == "" {
return firecracker.DriveIOEngineSync
}
return firecracker.DriveIOEngine(value)
}
func validateDriveIOEngine(engine firecracker.DriveIOEngine) error {
switch engine {
case firecracker.DriveIOEngineSync, firecracker.DriveIOEngineAsync:
return nil
default:
return fmt.Errorf("FIRECRACKER_HOST_DRIVE_IO_ENGINE must be %q or %q", firecracker.DriveIOEngineSync, firecracker.DriveIOEngineAsync)
}
}
func loadBool(name string) (bool, error) {
value := strings.TrimSpace(os.Getenv(name))
if value == "" {
return false, nil
}
parsed, err := strconv.ParseBool(value)
if err != nil {
return false, fmt.Errorf("%s must be a boolean: %w", name, err)
}
return parsed, nil
}

View file

@ -1,6 +1,10 @@
package config
import "testing"
import (
"testing"
"github.com/getcompanion-ai/computer-host/internal/firecracker"
)
func TestLoadDiskCloneModeDefaultsToReflink(t *testing.T) {
t.Parallel()
@ -38,3 +42,73 @@ func TestDiskCloneModeValidate(t *testing.T) {
})
}
}
func TestLoadDriveIOEngineDefaultsToSync(t *testing.T) {
t.Parallel()
if got := loadDriveIOEngine(""); got != firecracker.DriveIOEngineSync {
t.Fatalf("drive io engine = %q, want %q", got, firecracker.DriveIOEngineSync)
}
}
func TestValidateDriveIOEngine(t *testing.T) {
t.Parallel()
tests := []struct {
name string
engine firecracker.DriveIOEngine
wantErr bool
}{
{name: "sync", engine: firecracker.DriveIOEngineSync},
{name: "async", engine: firecracker.DriveIOEngineAsync},
{name: "empty", engine: "", wantErr: true},
{name: "unknown", engine: "Aio", wantErr: true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
err := validateDriveIOEngine(tt.engine)
if tt.wantErr && err == nil {
t.Fatal("validateDriveIOEngine() error = nil, want error")
}
if !tt.wantErr && err != nil {
t.Fatalf("validateDriveIOEngine() error = %v, want nil", err)
}
})
}
}
func TestLoadBool(t *testing.T) {
tests := []struct {
name string
value string
want bool
wantErr bool
}{
{name: "default"},
{name: "true", value: "true", want: true},
{name: "false", value: "false"},
{name: "one", value: "1", want: true},
{name: "invalid", value: "enabled", wantErr: true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
envName := "TEST_FIRECRACKER_BOOL"
t.Setenv(envName, tt.value)
got, err := loadBool(envName)
if tt.wantErr && err == nil {
t.Fatal("loadBool() error = nil, want error")
}
if !tt.wantErr && err != nil {
t.Fatalf("loadBool() error = %v, want nil", err)
}
if got != tt.want {
t.Fatalf("loadBool() = %v, want %v", got, tt.want)
}
})
}
}

View file

@ -159,9 +159,11 @@ func (d *Daemon) buildMachineSpec(machineID contracthost.MachineID, artifact *mo
drives := make([]firecracker.DriveSpec, 0, len(userVolumes))
for i, volume := range userVolumes {
drives = append(drives, firecracker.DriveSpec{
ID: fmt.Sprintf("user-%d", i),
Path: volume.Path,
ReadOnly: false,
ID: fmt.Sprintf("user-%d", i),
Path: volume.Path,
ReadOnly: false,
CacheType: firecracker.DriveCacheTypeUnsafe,
IOEngine: d.config.DriveIOEngine,
})
}
@ -179,9 +181,9 @@ func (d *Daemon) buildMachineSpec(machineID contracthost.MachineID, artifact *mo
ID: "root_drive",
Path: systemVolumePath,
CacheType: firecracker.DriveCacheTypeUnsafe,
IOEngine: firecracker.DriveIOEngineSync,
IOEngine: d.config.DriveIOEngine,
},
KernelArgs: defaultGuestKernelArgs,
KernelArgs: guestKernelArgs(d.config.EnablePCI),
Drives: drives,
MMDS: mmds,
Vsock: guestVsockSpec(machineID),

View file

@ -8,21 +8,22 @@ import (
"sync"
"time"
contracthost "github.com/getcompanion-ai/computer-host/contract"
appconfig "github.com/getcompanion-ai/computer-host/internal/config"
"github.com/getcompanion-ai/computer-host/internal/firecracker"
"github.com/getcompanion-ai/computer-host/internal/model"
"github.com/getcompanion-ai/computer-host/internal/store"
contracthost "github.com/getcompanion-ai/computer-host/contract"
)
const (
defaultGuestKernelArgs = "console=ttyS0 reboot=k panic=1 pci=off"
defaultGuestMemoryMiB = int64(3072)
defaultGuestVCPUs = int64(2)
defaultSSHPort = uint16(2222)
defaultVNCPort = uint16(6080)
defaultCopyBufferSize = 1024 * 1024
defaultGuestDialTimeout = 500 * time.Millisecond
defaultGuestKernelArgs = "console=ttyS0 reboot=k panic=1"
defaultGuestKernelArgsNoPCI = defaultGuestKernelArgs + " pci=off"
defaultGuestMemoryMiB = int64(3072)
defaultGuestVCPUs = int64(2)
defaultSSHPort = uint16(2222)
defaultVNCPort = uint16(6080)
defaultCopyBufferSize = 1024 * 1024
defaultGuestDialTimeout = 500 * time.Millisecond
)
type Runtime interface {
@ -73,6 +74,9 @@ func New(cfg appconfig.Config, store store.Store, runtime Runtime) (*Daemon, err
return nil, fmt.Errorf("create daemon dir %q: %w", dir, err)
}
}
if err := validateDiskCloneBackend(cfg); err != nil {
return nil, err
}
daemon := &Daemon{
config: cfg,
store: store,
@ -127,3 +131,10 @@ func (d *Daemon) lockArtifact(key string) func() {
lock.Lock()
return lock.Unlock
}
func guestKernelArgs(enablePCI bool) string {
if enablePCI {
return defaultGuestKernelArgs
}
return defaultGuestKernelArgsNoPCI
}

View file

@ -758,6 +758,7 @@ func testConfig(root string) appconfig.Config {
SnapshotsDir: filepath.Join(root, "snapshots"),
RuntimeDir: filepath.Join(root, "runtime"),
DiskCloneMode: appconfig.DiskCloneModeCopy,
DriveIOEngine: firecracker.DriveIOEngineSync,
SocketPath: filepath.Join(root, "firecracker-host.sock"),
EgressInterface: "eth0",
FirecrackerBinaryPath: "/usr/bin/firecracker",
@ -765,6 +766,22 @@ func testConfig(root string) appconfig.Config {
}
}
func TestGuestKernelArgsDisablesPCIByDefault(t *testing.T) {
t.Parallel()
if got := guestKernelArgs(false); !strings.Contains(got, "pci=off") {
t.Fatalf("guestKernelArgs(false) = %q, want pci=off", got)
}
}
func TestGuestKernelArgsRemovesPCIOffWhenPCIEnabled(t *testing.T) {
t.Parallel()
if got := guestKernelArgs(true); strings.Contains(got, "pci=off") {
t.Fatalf("guestKernelArgs(true) = %q, want no pci=off", got)
}
}
func stubGuestSSHPublicKeyReader(hostDaemon *Daemon) {
hostDaemon.readGuestSSHPublicKey = func(context.Context, string) (string, error) {
return "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIO0j1AyW0mQm9a1G2rY0R4fP2G5+4Qx2V3FJ9P2mA6N3", nil

View file

@ -97,7 +97,10 @@ func cloneFile(source string, target string) error {
func cloneDiskFile(source string, target string, mode appconfig.DiskCloneMode) error {
switch mode {
case appconfig.DiskCloneModeReflink:
return reflinkFile(source, target)
if err := reflinkFile(source, target); err != nil {
return reflinkRequiredError(err)
}
return nil
case appconfig.DiskCloneModeCopy:
return cloneFile(source, target)
default:
@ -105,6 +108,42 @@ func cloneDiskFile(source string, target string, mode appconfig.DiskCloneMode) e
}
}
func validateDiskCloneBackend(cfg appconfig.Config) error {
if cfg.DiskCloneMode != appconfig.DiskCloneModeReflink {
return nil
}
sourceFile, err := os.CreateTemp(cfg.ArtifactsDir, ".reflink-probe-source-*")
if err != nil {
return fmt.Errorf("create reflink probe source in %q: %w", cfg.ArtifactsDir, err)
}
sourcePath := sourceFile.Name()
defer func() {
_ = os.Remove(sourcePath)
}()
if _, err := sourceFile.WriteString("reflink-probe"); err != nil {
_ = sourceFile.Close()
return fmt.Errorf("write reflink probe source %q: %w", sourcePath, err)
}
if err := sourceFile.Close(); err != nil {
return fmt.Errorf("close reflink probe source %q: %w", sourcePath, err)
}
targetPath := filepath.Join(cfg.MachineDisksDir, "."+filepath.Base(sourcePath)+".target")
defer func() {
_ = os.Remove(targetPath)
}()
if err := cloneDiskFile(sourcePath, targetPath, cfg.DiskCloneMode); err != nil {
return fmt.Errorf("validate disk clone backend from artifacts dir %q to machine disks dir %q: %w", cfg.ArtifactsDir, cfg.MachineDisksDir, err)
}
return nil
}
func reflinkRequiredError(err error) error {
return fmt.Errorf("FIRECRACKER_HOST_DISK_CLONE_MODE=reflink requires a CoW filesystem with reflink support across artifacts and machine-disks; mount FIRECRACKER_HOST_ROOT_DIR on XFS with reflink=1 or Btrfs, preferably on local NVMe, or set FIRECRACKER_HOST_DISK_CLONE_MODE=copy for the slow full-copy fallback: %w", err)
}
func reflinkFile(source string, target string) error {
if err := os.MkdirAll(filepath.Dir(target), 0o755); err != nil {
return fmt.Errorf("create target dir for %q: %w", target, err)

View file

@ -66,9 +66,8 @@ func configureMachine(ctx context.Context, client *apiClient, paths machinePaths
return nil
}
func launchJailedFirecracker(paths machinePaths, machineID MachineID, firecrackerBinaryPath string, jailerBinaryPath string) (*exec.Cmd, error) {
command := exec.Command(
jailerBinaryPath,
func launchJailedFirecracker(paths machinePaths, machineID MachineID, firecrackerBinaryPath string, jailerBinaryPath string, enablePCI bool) (*exec.Cmd, error) {
args := []string{
"--id", string(machineID),
"--uid", strconv.Itoa(os.Getuid()),
"--gid", strconv.Itoa(os.Getgid()),
@ -83,7 +82,11 @@ func launchJailedFirecracker(paths machinePaths, machineID MachineID, firecracke
"--level", defaultFirecrackerLogLevel,
"--show-level",
"--show-log-origin",
)
}
if enablePCI {
args = append(args, "--enable-pci")
}
command := exec.Command(jailerBinaryPath, args...)
if err := command.Start(); err != nil {
return nil, fmt.Errorf("start jailer: %w", err)
}

View file

@ -26,7 +26,7 @@ func TestLaunchJailedFirecrackerPassesDaemonAndLoggingFlags(t *testing.T) {
t.Fatalf("create log dir: %v", err)
}
if _, err := launchJailedFirecracker(paths, "vm-1", "/usr/bin/firecracker", jailerPath); err != nil {
if _, err := launchJailedFirecracker(paths, "vm-1", "/usr/bin/firecracker", jailerPath, false); err != nil {
t.Fatalf("launch jailed firecracker: %v", err)
}
@ -45,6 +45,33 @@ func TestLaunchJailedFirecrackerPassesDaemonAndLoggingFlags(t *testing.T) {
}
}
func TestLaunchJailedFirecrackerPassesEnablePCIWhenConfigured(t *testing.T) {
root := t.TempDir()
argsPath := filepath.Join(root, "args.txt")
jailerPath := filepath.Join(root, "fake-jailer.sh")
script := "#!/bin/sh\nprintf '%s\n' \"$@\" > " + shellQuote(argsPath) + "\n"
if err := os.WriteFile(jailerPath, []byte(script), 0o755); err != nil {
t.Fatalf("write fake jailer: %v", err)
}
paths, err := buildMachinePaths(root, "vm-1", "/usr/bin/firecracker")
if err != nil {
t.Fatalf("build machine paths: %v", err)
}
if err := os.MkdirAll(paths.LogDir, 0o755); err != nil {
t.Fatalf("create log dir: %v", err)
}
if _, err := launchJailedFirecracker(paths, "vm-1", "/usr/bin/firecracker", jailerPath, true); err != nil {
t.Fatalf("launch jailed firecracker: %v", err)
}
args := waitForFileContents(t, argsPath)
if !containsLine(args, "--enable-pci") {
t.Fatalf("missing launch argument %q in %v", "--enable-pci", args)
}
}
func TestWaitForPIDFileReadsPID(t *testing.T) {
pidFilePath := filepath.Join(t.TempDir(), "firecracker.pid")
if err := os.WriteFile(pidFilePath, []byte("4321\n"), 0o644); err != nil {

View file

@ -24,12 +24,14 @@ type RuntimeConfig struct {
EgressInterface string
FirecrackerBinaryPath string
JailerBinaryPath string
EnablePCI bool
}
type Runtime struct {
rootDir string
firecrackerBinaryPath string
jailerBinaryPath string
enablePCI bool
networkAllocator *NetworkAllocator
networkProvisioner NetworkProvisioner
}
@ -69,6 +71,7 @@ func NewRuntime(cfg RuntimeConfig) (*Runtime, error) {
rootDir: rootDir,
firecrackerBinaryPath: firecrackerBinaryPath,
jailerBinaryPath: jailerBinaryPath,
enablePCI: cfg.EnablePCI,
networkAllocator: allocator,
networkProvisioner: NewIPTapProvisioner(defaultNetworkCIDR, egressInterface),
}, nil
@ -110,7 +113,7 @@ func (r *Runtime) Boot(ctx context.Context, spec MachineSpec, usedNetworks []Net
return nil, err
}
command, err := launchJailedFirecracker(paths, spec.ID, r.firecrackerBinaryPath, r.jailerBinaryPath)
command, err := launchJailedFirecracker(paths, spec.ID, r.firecrackerBinaryPath, r.jailerBinaryPath, r.enablePCI)
if err != nil {
cleanup(network, paths, nil, 0)
return nil, err
@ -277,7 +280,7 @@ func (r *Runtime) RestoreBoot(ctx context.Context, loadSpec SnapshotLoadSpec, us
return nil, err
}
command, err := launchJailedFirecracker(paths, loadSpec.ID, r.firecrackerBinaryPath, r.jailerBinaryPath)
command, err := launchJailedFirecracker(paths, loadSpec.ID, r.firecrackerBinaryPath, r.jailerBinaryPath, r.enablePCI)
if err != nil {
cleanup(network, paths, nil, 0)
return nil, err