mirror of
https://github.com/getcompanion-ai/computer-host.git
synced 2026-04-18 02:03:03 +00:00
chore: termination cleanup tighten + test
This commit is contained in:
parent
592df1e1df
commit
28ca0219d9
2 changed files with 176 additions and 1 deletions
|
|
@ -14,6 +14,11 @@ import (
|
||||||
|
|
||||||
var ErrMachineNotFound = errors.New("machine not found")
|
var ErrMachineNotFound = errors.New("machine not found")
|
||||||
|
|
||||||
|
var (
|
||||||
|
stopGracePeriod = 5 * time.Second
|
||||||
|
stopPollInterval = 50 * time.Millisecond
|
||||||
|
)
|
||||||
|
|
||||||
type RuntimeConfig struct {
|
type RuntimeConfig struct {
|
||||||
RootDir string
|
RootDir string
|
||||||
FirecrackerBinaryPath string
|
FirecrackerBinaryPath string
|
||||||
|
|
@ -163,13 +168,21 @@ func (r *Runtime) Stop(ctx context.Context, state MachineState) error {
|
||||||
return fmt.Errorf("stop machine %q: %w", state.ID, err)
|
return fmt.Errorf("stop machine %q: %w", state.ID, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
ticker := time.NewTicker(50 * time.Millisecond)
|
ticker := time.NewTicker(stopPollInterval)
|
||||||
defer ticker.Stop()
|
defer ticker.Stop()
|
||||||
|
deadline := time.Now().Add(stopGracePeriod)
|
||||||
|
sentKill := false
|
||||||
|
|
||||||
for {
|
for {
|
||||||
if !processExists(state.PID) {
|
if !processExists(state.PID) {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
if !sentKill && time.Now().After(deadline) {
|
||||||
|
if err := process.Signal(syscall.SIGKILL); err != nil && !errors.Is(err, os.ErrProcessDone) {
|
||||||
|
return fmt.Errorf("kill machine %q: %w", state.ID, err)
|
||||||
|
}
|
||||||
|
sentKill = true
|
||||||
|
}
|
||||||
select {
|
select {
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
return ctx.Err()
|
return ctx.Err()
|
||||||
|
|
@ -206,6 +219,13 @@ func processExists(pid int) bool {
|
||||||
if pid < 1 {
|
if pid < 1 {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
if payload, err := os.ReadFile(filepath.Join("/proc", fmt.Sprintf("%d", pid), "stat")); err == nil {
|
||||||
|
if marker := strings.LastIndexByte(string(payload), ')'); marker >= 0 && marker+2 < len(payload) {
|
||||||
|
if payload[marker+2] == 'Z' {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
err := syscall.Kill(pid, 0)
|
err := syscall.Kill(pid, 0)
|
||||||
return err == nil || err == syscall.EPERM
|
return err == nil || err == syscall.EPERM
|
||||||
}
|
}
|
||||||
|
|
|
||||||
155
internal/firecracker/runtime_test.go
Normal file
155
internal/firecracker/runtime_test.go
Normal file
|
|
@ -0,0 +1,155 @@
|
||||||
|
package firecracker
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"os/signal"
|
||||||
|
"path/filepath"
|
||||||
|
"syscall"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
const testHelperProcessEnv = "MICROAGENT_TEST_HELPER_PROCESS"
|
||||||
|
const testHelperReadyFileEnv = "MICROAGENT_TEST_HELPER_READY_FILE"
|
||||||
|
|
||||||
|
func TestStopEscalatesToSIGKILL(t *testing.T) {
|
||||||
|
if os.Getenv(testHelperProcessEnv) == "ignore-term" {
|
||||||
|
runIgnoreTERMHelper()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
readyFile := filepath.Join(t.TempDir(), "ready")
|
||||||
|
command := exec.Command(os.Args[0], "-test.run=TestStopEscalatesToSIGKILL")
|
||||||
|
command.Env = append(
|
||||||
|
os.Environ(),
|
||||||
|
testHelperProcessEnv+"=ignore-term",
|
||||||
|
testHelperReadyFileEnv+"="+readyFile,
|
||||||
|
)
|
||||||
|
if err := command.Start(); err != nil {
|
||||||
|
t.Fatalf("start helper process: %v", err)
|
||||||
|
}
|
||||||
|
t.Cleanup(func() {
|
||||||
|
_ = command.Process.Kill()
|
||||||
|
_ = command.Wait()
|
||||||
|
})
|
||||||
|
waitForHelperReady(t, readyFile)
|
||||||
|
|
||||||
|
restore := setStopTimings(20*time.Millisecond, 5*time.Millisecond)
|
||||||
|
defer restore()
|
||||||
|
|
||||||
|
runtime := &Runtime{}
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
if err := runtime.Stop(ctx, MachineState{ID: "vm-1", PID: command.Process.Pid}); err != nil {
|
||||||
|
t.Fatalf("stop machine: %v", err)
|
||||||
|
}
|
||||||
|
if processExists(command.Process.Pid) {
|
||||||
|
t.Fatalf("process %d still running after stop", command.Process.Pid)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDeleteRemovesRuntimeDirAfterForcedKill(t *testing.T) {
|
||||||
|
if os.Getenv(testHelperProcessEnv) == "ignore-term" {
|
||||||
|
runIgnoreTERMHelper()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
root := t.TempDir()
|
||||||
|
paths, err := buildMachinePaths(root, "vm-1", "/usr/bin/firecracker")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("build machine paths: %v", err)
|
||||||
|
}
|
||||||
|
if err := os.MkdirAll(paths.BaseDir, 0o755); err != nil {
|
||||||
|
t.Fatalf("create machine base dir: %v", err)
|
||||||
|
}
|
||||||
|
sentinelPath := filepath.Join(paths.BaseDir, "sentinel")
|
||||||
|
if err := os.WriteFile(sentinelPath, []byte("ok"), 0o644); err != nil {
|
||||||
|
t.Fatalf("write sentinel file: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
readyFile := filepath.Join(t.TempDir(), "ready")
|
||||||
|
command := exec.Command(os.Args[0], "-test.run=TestDeleteRemovesRuntimeDirAfterForcedKill")
|
||||||
|
command.Env = append(
|
||||||
|
os.Environ(),
|
||||||
|
testHelperProcessEnv+"=ignore-term",
|
||||||
|
testHelperReadyFileEnv+"="+readyFile,
|
||||||
|
)
|
||||||
|
if err := command.Start(); err != nil {
|
||||||
|
t.Fatalf("start helper process: %v", err)
|
||||||
|
}
|
||||||
|
t.Cleanup(func() {
|
||||||
|
_ = command.Process.Kill()
|
||||||
|
_ = command.Wait()
|
||||||
|
})
|
||||||
|
waitForHelperReady(t, readyFile)
|
||||||
|
|
||||||
|
restore := setStopTimings(20*time.Millisecond, 5*time.Millisecond)
|
||||||
|
defer restore()
|
||||||
|
|
||||||
|
runtime := &Runtime{
|
||||||
|
rootDir: root,
|
||||||
|
firecrackerBinaryPath: "/usr/bin/firecracker",
|
||||||
|
networkProvisioner: &IPTapProvisioner{
|
||||||
|
runCommand: func(context.Context, string, ...string) error { return nil },
|
||||||
|
},
|
||||||
|
}
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
state := MachineState{
|
||||||
|
ID: "vm-1",
|
||||||
|
PID: command.Process.Pid,
|
||||||
|
}
|
||||||
|
if err := runtime.Delete(ctx, state); err != nil {
|
||||||
|
t.Fatalf("delete machine: %v", err)
|
||||||
|
}
|
||||||
|
if _, err := os.Stat(paths.BaseDir); !os.IsNotExist(err) {
|
||||||
|
t.Fatalf("machine dir still exists after delete: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func runIgnoreTERMHelper() {
|
||||||
|
signals := make(chan os.Signal, 1)
|
||||||
|
signal.Notify(signals, syscall.SIGTERM)
|
||||||
|
defer signal.Stop(signals)
|
||||||
|
if readyFile := os.Getenv(testHelperReadyFileEnv); readyFile != "" {
|
||||||
|
_ = os.WriteFile(readyFile, []byte("ready"), 0o644)
|
||||||
|
}
|
||||||
|
for {
|
||||||
|
<-signals
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func setStopTimings(grace time.Duration, poll time.Duration) func() {
|
||||||
|
previousGrace := stopGracePeriod
|
||||||
|
previousPoll := stopPollInterval
|
||||||
|
stopGracePeriod = grace
|
||||||
|
stopPollInterval = poll
|
||||||
|
return func() {
|
||||||
|
stopGracePeriod = previousGrace
|
||||||
|
stopPollInterval = previousPoll
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func waitForHelperReady(t *testing.T, readyFile string) {
|
||||||
|
t.Helper()
|
||||||
|
|
||||||
|
timeout := time.NewTimer(time.Second)
|
||||||
|
defer timeout.Stop()
|
||||||
|
ticker := time.NewTicker(5 * time.Millisecond)
|
||||||
|
defer ticker.Stop()
|
||||||
|
|
||||||
|
for {
|
||||||
|
if _, err := os.Stat(readyFile); err == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case <-timeout.C:
|
||||||
|
t.Fatalf("timed out waiting for helper ready file %q", readyFile)
|
||||||
|
case <-ticker.C:
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue