feat: firecracker mmds identity

This commit is contained in:
Harivansh Rathi 2026-04-10 00:53:47 +00:00
parent 500354cd9b
commit 3eb610b703
23 changed files with 1813 additions and 263 deletions

View file

@ -141,7 +141,7 @@ func TestCreateMachineStagesArtifactsAndPersistsState(t *testing.T) {
t.Fatalf("create machine: %v", err)
}
if response.Machine.Phase != contracthost.MachinePhaseRunning {
if response.Machine.Phase != contracthost.MachinePhaseStarting {
t.Fatalf("machine phase mismatch: got %q", response.Machine.Phase)
}
if response.Machine.RuntimeHost != "127.0.0.1" {
@ -169,29 +169,25 @@ func TestCreateMachineStagesArtifactsAndPersistsState(t *testing.T) {
if err != nil {
t.Fatalf("read backend ssh public key: %v", err)
}
authorizedKeys, err := readExt4File(runtime.lastSpec.RootFSPath, "/etc/microagent/authorized_keys")
if err != nil {
t.Fatalf("read injected authorized_keys: %v", err)
if runtime.lastSpec.MMDS == nil {
t.Fatalf("expected MMDS configuration on machine spec")
}
if runtime.lastSpec.MMDS.Version != firecracker.MMDSVersionV2 {
t.Fatalf("mmds version mismatch: got %q", runtime.lastSpec.MMDS.Version)
}
payload, ok := runtime.lastSpec.MMDS.Data.(guestMetadataEnvelope)
if !ok {
t.Fatalf("mmds payload type mismatch: got %T", runtime.lastSpec.MMDS.Data)
}
if payload.Latest.MetaData.Hostname != "vm-1" {
t.Fatalf("mmds hostname mismatch: got %q", payload.Latest.MetaData.Hostname)
}
authorizedKeys := strings.Join(payload.Latest.MetaData.AuthorizedKeys, "\n")
if !strings.Contains(authorizedKeys, strings.TrimSpace(string(hostAuthorizedKeyBytes))) {
t.Fatalf("authorized_keys missing backend ssh key: %q", authorizedKeys)
t.Fatalf("mmds authorized_keys missing backend ssh key: %q", authorizedKeys)
}
if !strings.Contains(authorizedKeys, "daemon-test") {
t.Fatalf("authorized_keys missing request override key: %q", authorizedKeys)
}
machineName, err := readExt4File(runtime.lastSpec.RootFSPath, "/etc/microagent/machine-name")
if err != nil {
t.Fatalf("read injected machine-name: %v", err)
}
if machineName != "vm-1\n" {
t.Fatalf("machine-name mismatch: got %q want %q", machineName, "vm-1\n")
}
hosts, err := readExt4File(runtime.lastSpec.RootFSPath, "/etc/hosts")
if err != nil {
t.Fatalf("read injected hosts: %v", err)
}
if !strings.Contains(hosts, "127.0.1.1 vm-1") {
t.Fatalf("hosts missing machine identity: %q", hosts)
t.Fatalf("mmds authorized_keys missing request override key: %q", authorizedKeys)
}
artifact, err := fileStore.GetArtifact(context.Background(), response.Machine.Artifact)
@ -214,6 +210,12 @@ func TestCreateMachineStagesArtifactsAndPersistsState(t *testing.T) {
if machine.SystemVolumeID != "vm-1-system" {
t.Fatalf("system volume mismatch: got %q", machine.SystemVolumeID)
}
if machine.Phase != contracthost.MachinePhaseStarting {
t.Fatalf("stored machine phase mismatch: got %q", machine.Phase)
}
if machine.GuestConfig == nil || len(machine.GuestConfig.AuthorizedKeys) == 0 {
t.Fatalf("stored guest config missing authorized keys: %#v", machine.GuestConfig)
}
operations, err := fileStore.ListOperations(context.Background())
if err != nil {
@ -224,6 +226,65 @@ func TestCreateMachineStagesArtifactsAndPersistsState(t *testing.T) {
}
}
func TestStopMachineSyncsGuestFilesystemBeforeDelete(t *testing.T) {
root := t.TempDir()
cfg := testConfig(root)
fileStore, err := store.NewFileStore(cfg.StatePath, cfg.OperationsPath)
if err != nil {
t.Fatalf("create file store: %v", err)
}
runtime := &fakeRuntime{}
hostDaemon, err := New(cfg, fileStore, runtime)
if err != nil {
t.Fatalf("create daemon: %v", err)
}
var syncedHost string
hostDaemon.syncGuestFilesystem = func(_ context.Context, runtimeHost string) error {
syncedHost = runtimeHost
return nil
}
now := time.Now().UTC()
if err := fileStore.CreateMachine(context.Background(), model.MachineRecord{
ID: "vm-stop",
SystemVolumeID: "vm-stop-system",
RuntimeHost: "172.16.0.2",
TapDevice: "fctap-stop",
Phase: contracthost.MachinePhaseRunning,
PID: 1234,
SocketPath: filepath.Join(root, "runtime", "vm-stop.sock"),
Ports: defaultMachinePorts(),
CreatedAt: now,
StartedAt: &now,
}); err != nil {
t.Fatalf("create machine: %v", err)
}
if err := hostDaemon.StopMachine(context.Background(), "vm-stop"); err != nil {
t.Fatalf("stop machine: %v", err)
}
if syncedHost != "172.16.0.2" {
t.Fatalf("sync host mismatch: got %q want %q", syncedHost, "172.16.0.2")
}
if len(runtime.deleteCalls) != 1 {
t.Fatalf("runtime delete call count mismatch: got %d want 1", len(runtime.deleteCalls))
}
stopped, err := fileStore.GetMachine(context.Background(), "vm-stop")
if err != nil {
t.Fatalf("get stopped machine: %v", err)
}
if stopped.Phase != contracthost.MachinePhaseStopped {
t.Fatalf("machine phase mismatch: got %q want %q", stopped.Phase, contracthost.MachinePhaseStopped)
}
if stopped.RuntimeHost != "" {
t.Fatalf("runtime host should be cleared after stop, got %q", stopped.RuntimeHost)
}
}
func TestNewEnsuresBackendSSHKeyPair(t *testing.T) {
root := t.TempDir()
cfg := testConfig(root)
@ -249,7 +310,7 @@ func TestNewEnsuresBackendSSHKeyPair(t *testing.T) {
}
}
func TestRestoreSnapshotRejectsRunningSourceMachine(t *testing.T) {
func TestRestoreSnapshotFallsBackToLocalSnapshotNetwork(t *testing.T) {
root := t.TempDir()
cfg := testConfig(root)
fileStore, err := store.NewFileStore(cfg.StatePath, cfg.OperationsPath)
@ -257,7 +318,23 @@ func TestRestoreSnapshotRejectsRunningSourceMachine(t *testing.T) {
t.Fatalf("create file store: %v", err)
}
runtime := &fakeRuntime{}
sshListener := listenTestPort(t, int(defaultSSHPort))
defer func() { _ = sshListener.Close() }()
vncListener := listenTestPort(t, int(defaultVNCPort))
defer func() { _ = vncListener.Close() }()
startedAt := time.Unix(1700000099, 0).UTC()
runtime := &fakeRuntime{
bootState: firecracker.MachineState{
ID: "restored",
Phase: firecracker.PhaseRunning,
PID: 1234,
RuntimeHost: "127.0.0.1",
SocketPath: filepath.Join(cfg.RuntimeDir, "machines", "restored", "root", "run", "firecracker.sock"),
TapName: "fctap0",
StartedAt: &startedAt,
},
}
hostDaemon, err := New(cfg, fileStore, runtime)
if err != nil {
t.Fatalf("create daemon: %v", err)
@ -281,32 +358,13 @@ func TestRestoreSnapshotRejectsRunningSourceMachine(t *testing.T) {
t.Fatalf("put artifact: %v", err)
}
if err := fileStore.CreateMachine(context.Background(), model.MachineRecord{
ID: "source",
Artifact: artifactRef,
SystemVolumeID: "source-system",
RuntimeHost: "172.16.0.2",
TapDevice: "fctap0",
Phase: contracthost.MachinePhaseRunning,
CreatedAt: time.Now().UTC(),
}); err != nil {
t.Fatalf("create source machine: %v", err)
}
snapDisk := filepath.Join(root, "snapshots", "snap1", "system.img")
if err := os.MkdirAll(filepath.Dir(snapDisk), 0o755); err != nil {
t.Fatalf("create snapshot dir: %v", err)
}
if err := os.WriteFile(snapDisk, []byte("disk"), 0o644); err != nil {
t.Fatalf("write snapshot disk: %v", err)
}
if err := fileStore.CreateSnapshot(context.Background(), model.SnapshotRecord{
ID: "snap1",
MachineID: "source",
Artifact: artifactRef,
MemFilePath: filepath.Join(root, "snapshots", "snap1", "memory.bin"),
StateFilePath: filepath.Join(root, "snapshots", "snap1", "vmstate.bin"),
DiskPaths: []string{snapDisk},
DiskPaths: []string{filepath.Join(root, "snapshots", "snap1", "system.img")},
SourceRuntimeHost: "172.16.0.2",
SourceTapDevice: "fctap0",
CreatedAt: time.Now().UTC(),
@ -314,17 +372,49 @@ func TestRestoreSnapshotRejectsRunningSourceMachine(t *testing.T) {
t.Fatalf("create snapshot: %v", err)
}
_, err = hostDaemon.RestoreSnapshot(context.Background(), "snap1", contracthost.RestoreSnapshotRequest{
MachineID: "restored",
server := newRestoreArtifactServer(t, map[string][]byte{
"/kernel": []byte("kernel"),
"/rootfs": []byte("rootfs"),
"/memory": []byte("mem"),
"/vmstate": []byte("state"),
"/system": []byte("disk"),
})
if err == nil {
t.Fatal("expected restore rejection while source is running")
defer server.Close()
response, err := hostDaemon.RestoreSnapshot(context.Background(), "snap1", contracthost.RestoreSnapshotRequest{
MachineID: "restored",
Artifact: contracthost.ArtifactRef{
KernelImageURL: server.URL + "/kernel",
RootFSURL: server.URL + "/rootfs",
},
Snapshot: contracthost.DurableSnapshotSpec{
SnapshotID: "snap1",
MachineID: "source",
ImageID: "image-1",
Artifacts: []contracthost.SnapshotArtifact{
{ID: "memory", Kind: contracthost.SnapshotArtifactKindMemory, Name: "memory.bin", DownloadURL: server.URL + "/memory"},
{ID: "vmstate", Kind: contracthost.SnapshotArtifactKindVMState, Name: "vmstate.bin", DownloadURL: server.URL + "/vmstate"},
{ID: "disk-system", Kind: contracthost.SnapshotArtifactKindDisk, Name: "system.img", DownloadURL: server.URL + "/system"},
},
},
})
if err != nil {
t.Fatalf("restore snapshot: %v", err)
}
if !strings.Contains(err.Error(), `source machine "source" is running`) {
t.Fatalf("unexpected restore error: %v", err)
if response.Machine.ID != "restored" {
t.Fatalf("restored machine id mismatch: got %q", response.Machine.ID)
}
if runtime.restoreCalls != 0 {
t.Fatalf("restore boot should not run when source machine is still running: got %d", runtime.restoreCalls)
if runtime.restoreCalls != 1 {
t.Fatalf("restore boot call count mismatch: got %d want 1", runtime.restoreCalls)
}
if runtime.lastLoadSpec.Network == nil {
t.Fatalf("restore boot should preserve snapshot network")
}
if got := runtime.lastLoadSpec.Network.GuestIP().String(); got != "172.16.0.2" {
t.Fatalf("restore guest ip mismatch: got %q want %q", got, "172.16.0.2")
}
if got := runtime.lastLoadSpec.Network.TapName; got != "fctap0" {
t.Fatalf("restore tap mismatch: got %q want %q", got, "fctap0")
}
ops, err := fileStore.ListOperations(context.Background())
@ -332,11 +422,11 @@ func TestRestoreSnapshotRejectsRunningSourceMachine(t *testing.T) {
t.Fatalf("list operations: %v", err)
}
if len(ops) != 0 {
t.Fatalf("operation journal should be empty after handled restore rejection: got %d entries", len(ops))
t.Fatalf("operation journal should be empty after successful restore: got %d entries", len(ops))
}
}
func TestRestoreSnapshotUsesSnapshotMetadataWithoutSourceMachine(t *testing.T) {
func TestRestoreSnapshotUsesDurableSnapshotSpec(t *testing.T) {
root := t.TempDir()
cfg := testConfig(root)
fileStore, err := store.NewFileStore(cfg.StatePath, cfg.OperationsPath)
@ -378,56 +468,35 @@ func TestRestoreSnapshotUsesSnapshotMetadataWithoutSourceMachine(t *testing.T) {
return nil
}
artifactRef := contracthost.ArtifactRef{KernelImageURL: "kernel", RootFSURL: "rootfs"}
kernelPath := filepath.Join(root, "artifact-kernel")
rootFSPath := filepath.Join(root, "artifact-rootfs")
if err := os.WriteFile(kernelPath, []byte("kernel"), 0o644); err != nil {
t.Fatalf("write kernel: %v", err)
}
if err := os.WriteFile(rootFSPath, []byte("rootfs"), 0o644); err != nil {
t.Fatalf("write rootfs: %v", err)
}
if err := fileStore.PutArtifact(context.Background(), model.ArtifactRecord{
Ref: artifactRef,
LocalKey: "artifact",
LocalDir: filepath.Join(root, "artifact"),
KernelImagePath: kernelPath,
RootFSPath: rootFSPath,
CreatedAt: time.Now().UTC(),
}); err != nil {
t.Fatalf("put artifact: %v", err)
}
snapDir := filepath.Join(root, "snapshots", "snap1")
if err := os.MkdirAll(snapDir, 0o755); err != nil {
t.Fatalf("create snapshot dir: %v", err)
}
snapDisk := filepath.Join(snapDir, "system.img")
if err := os.WriteFile(snapDisk, []byte("disk"), 0o644); err != nil {
t.Fatalf("write snapshot disk: %v", err)
}
if err := os.WriteFile(filepath.Join(snapDir, "memory.bin"), []byte("mem"), 0o644); err != nil {
t.Fatalf("write memory snapshot: %v", err)
}
if err := os.WriteFile(filepath.Join(snapDir, "vmstate.bin"), []byte("state"), 0o644); err != nil {
t.Fatalf("write vmstate snapshot: %v", err)
}
if err := fileStore.CreateSnapshot(context.Background(), model.SnapshotRecord{
ID: "snap1",
MachineID: "source",
Artifact: artifactRef,
MemFilePath: filepath.Join(snapDir, "memory.bin"),
StateFilePath: filepath.Join(snapDir, "vmstate.bin"),
DiskPaths: []string{snapDisk},
SourceRuntimeHost: "172.16.0.2",
SourceTapDevice: "fctap0",
CreatedAt: time.Now().UTC(),
}); err != nil {
t.Fatalf("create snapshot: %v", err)
}
server := newRestoreArtifactServer(t, map[string][]byte{
"/kernel": []byte("kernel"),
"/rootfs": []byte("rootfs"),
"/memory": []byte("mem"),
"/vmstate": []byte("state"),
"/system": []byte("disk"),
"/user-0": []byte("user-disk"),
})
defer server.Close()
response, err := hostDaemon.RestoreSnapshot(context.Background(), "snap1", contracthost.RestoreSnapshotRequest{
MachineID: "restored",
Artifact: contracthost.ArtifactRef{
KernelImageURL: server.URL + "/kernel",
RootFSURL: server.URL + "/rootfs",
},
Snapshot: contracthost.DurableSnapshotSpec{
SnapshotID: "snap1",
MachineID: "source",
ImageID: "image-1",
SourceRuntimeHost: "172.16.0.2",
SourceTapDevice: "fctap0",
Artifacts: []contracthost.SnapshotArtifact{
{ID: "memory", Kind: contracthost.SnapshotArtifactKindMemory, Name: "memory.bin", DownloadURL: server.URL + "/memory"},
{ID: "vmstate", Kind: contracthost.SnapshotArtifactKindVMState, Name: "vmstate.bin", DownloadURL: server.URL + "/vmstate"},
{ID: "disk-system", Kind: contracthost.SnapshotArtifactKindDisk, Name: "system.img", DownloadURL: server.URL + "/system"},
{ID: "disk-user-0", Kind: contracthost.SnapshotArtifactKindDisk, Name: "user-0.img", DownloadURL: server.URL + "/user-0"},
},
},
})
if err != nil {
t.Fatalf("restore snapshot: %v", err)
@ -439,13 +508,19 @@ func TestRestoreSnapshotUsesSnapshotMetadataWithoutSourceMachine(t *testing.T) {
t.Fatalf("restore boot call count mismatch: got %d want 1", runtime.restoreCalls)
}
if runtime.lastLoadSpec.Network == nil {
t.Fatal("restore boot did not receive snapshot network")
t.Fatalf("restore boot should preserve durable snapshot network")
}
if got := runtime.lastLoadSpec.Network.GuestIP().String(); got != "172.16.0.2" {
t.Fatalf("restored guest network mismatch: got %q want %q", got, "172.16.0.2")
t.Fatalf("restore guest ip mismatch: got %q want %q", got, "172.16.0.2")
}
if runtime.lastLoadSpec.KernelImagePath != kernelPath {
t.Fatalf("restore boot kernel path mismatch: got %q want %q", runtime.lastLoadSpec.KernelImagePath, kernelPath)
if got := runtime.lastLoadSpec.Network.TapName; got != "fctap0" {
t.Fatalf("restore tap mismatch: got %q want %q", got, "fctap0")
}
if !strings.Contains(runtime.lastLoadSpec.KernelImagePath, filepath.Join("artifacts", artifactKey(contracthost.ArtifactRef{
KernelImageURL: server.URL + "/kernel",
RootFSURL: server.URL + "/rootfs",
}), "kernel")) {
t.Fatalf("restore boot kernel path mismatch: got %q", runtime.lastLoadSpec.KernelImagePath)
}
if reconfiguredHost != "127.0.0.1" || reconfiguredMachine != "restored" {
t.Fatalf("guest identity reconfigure mismatch: host=%q machine=%q", reconfiguredHost, reconfiguredMachine)
@ -458,6 +533,12 @@ func TestRestoreSnapshotUsesSnapshotMetadataWithoutSourceMachine(t *testing.T) {
if machine.Phase != contracthost.MachinePhaseRunning {
t.Fatalf("restored machine phase mismatch: got %q", machine.Phase)
}
if len(machine.UserVolumeIDs) != 1 {
t.Fatalf("restored machine user volumes mismatch: got %#v", machine.UserVolumeIDs)
}
if _, err := os.Stat(filepath.Join(cfg.MachineDisksDir, "restored", "user-0.img")); err != nil {
t.Fatalf("restored user disk missing: %v", err)
}
ops, err := fileStore.ListOperations(context.Background())
if err != nil {
@ -468,6 +549,67 @@ func TestRestoreSnapshotUsesSnapshotMetadataWithoutSourceMachine(t *testing.T) {
}
}
func TestRestoreSnapshotRejectsWhenRestoreNetworkInUseOnHost(t *testing.T) {
root := t.TempDir()
cfg := testConfig(root)
fileStore, err := store.NewFileStore(cfg.StatePath, cfg.OperationsPath)
if err != nil {
t.Fatalf("create file store: %v", err)
}
runtime := &fakeRuntime{}
hostDaemon, err := New(cfg, fileStore, runtime)
if err != nil {
t.Fatalf("create daemon: %v", err)
}
if err := fileStore.CreateMachine(context.Background(), model.MachineRecord{
ID: "source",
Artifact: contracthost.ArtifactRef{KernelImageURL: "https://example.com/kernel", RootFSURL: "https://example.com/rootfs"},
SystemVolumeID: "source-system",
RuntimeHost: "172.16.0.2",
TapDevice: "fctap0",
Phase: contracthost.MachinePhaseRunning,
CreatedAt: time.Now().UTC(),
}); err != nil {
t.Fatalf("create running source machine: %v", err)
}
_, err = hostDaemon.RestoreSnapshot(context.Background(), "snap1", contracthost.RestoreSnapshotRequest{
MachineID: "restored",
Artifact: contracthost.ArtifactRef{
KernelImageURL: "https://example.com/kernel",
RootFSURL: "https://example.com/rootfs",
},
Snapshot: contracthost.DurableSnapshotSpec{
SnapshotID: "snap1",
MachineID: "source",
ImageID: "image-1",
SourceRuntimeHost: "172.16.0.2",
SourceTapDevice: "fctap0",
},
})
if err == nil || !strings.Contains(err.Error(), "still in use on this host") {
t.Fatalf("restore snapshot error = %v, want restore network in-use failure", err)
}
if runtime.restoreCalls != 0 {
t.Fatalf("restore boot should not be attempted, got %d calls", runtime.restoreCalls)
}
}
func newRestoreArtifactServer(t *testing.T, payloads map[string][]byte) *httptest.Server {
t.Helper()
return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
payload, ok := payloads[r.URL.Path]
if !ok {
http.NotFound(w, r)
return
}
_, _ = w.Write(payload)
}))
}
func TestCreateMachineRejectsNonHTTPArtifactURLs(t *testing.T) {
t.Parallel()