This commit is contained in:
Harivansh Rathi 2026-02-07 13:49:11 -05:00
commit 0595d93c49
28 changed files with 1763 additions and 0 deletions

117
internal/commands/create.go Normal file
View file

@ -0,0 +1,117 @@
package commands
import (
"context"
"fmt"
"time"
"github.com/rathi/agentikube/internal/kube"
"github.com/spf13/cobra"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime/schema"
)
func NewCreateCmd() *cobra.Command {
var provider string
var apiKey string
cmd := &cobra.Command{
Use: "create <handle>",
Short: "Create a new sandbox for an agent",
Long: "Creates a Secret and SandboxClaim for the given handle, then waits for it to be ready.",
Args: cobra.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
ctx := context.Background()
handle := args[0]
cfg, err := loadConfig(cmd)
if err != nil {
return err
}
client, err := kube.NewClient()
if err != nil {
return fmt.Errorf("connecting to cluster: %w", err)
}
ns := cfg.Namespace
name := "sandbox-" + handle
// Create the secret with provider credentials
secret := &unstructured.Unstructured{
Object: map[string]interface{}{
"apiVersion": "v1",
"kind": "Secret",
"metadata": map[string]interface{}{
"name": name,
"namespace": ns,
},
"stringData": map[string]interface{}{
"PROVIDER": provider,
"PROVIDER_KEY": apiKey,
"USER_NAME": handle,
},
},
}
secretGVR := schema.GroupVersionResource{Group: "", Version: "v1", Resource: "secrets"}
_, err = client.Dynamic().Resource(secretGVR).Namespace(ns).Create(ctx, secret, metav1.CreateOptions{})
if err != nil {
return fmt.Errorf("creating secret %q: %w", name, err)
}
fmt.Printf("[ok] secret %q created\n", name)
// Create the SandboxClaim
claim := &unstructured.Unstructured{
Object: map[string]interface{}{
"apiVersion": "agentsandbox.dev/v1",
"kind": "SandboxClaim",
"metadata": map[string]interface{}{
"name": name,
"namespace": ns,
},
"spec": map[string]interface{}{
"templateRef": map[string]interface{}{
"name": "sandbox-template",
},
"secretRef": map[string]interface{}{
"name": name,
},
},
},
}
claimGVR := schema.GroupVersionResource{
Group: "agentsandbox.dev",
Version: "v1",
Resource: "sandboxclaims",
}
_, err = client.Dynamic().Resource(claimGVR).Namespace(ns).Create(ctx, claim, metav1.CreateOptions{})
if err != nil {
return fmt.Errorf("creating SandboxClaim %q: %w", name, err)
}
fmt.Printf("[ok] SandboxClaim %q created\n", name)
// Wait for the sandbox to become ready (3 min timeout)
fmt.Println("waiting for sandbox to be ready...")
waitCtx, cancel := context.WithTimeout(ctx, 3*time.Minute)
defer cancel()
if err := client.WaitForReady(waitCtx, ns, "sandboxclaims", name); err != nil {
return fmt.Errorf("waiting for sandbox: %w", err)
}
fmt.Printf("\nsandbox %q is ready\n", handle)
fmt.Printf(" name: %s\n", name)
fmt.Printf(" namespace: %s\n", ns)
fmt.Printf(" ssh: agentikube ssh %s\n", handle)
return nil
},
}
cmd.Flags().StringVar(&provider, "provider", "", "LLM provider name (env: SANDBOX_LLM_PROVIDER)")
cmd.Flags().StringVar(&apiKey, "api-key", "", "LLM provider API key (env: SANDBOX_API_KEY)")
return cmd
}

View file

@ -0,0 +1,94 @@
package commands
import (
"bufio"
"context"
"fmt"
"os"
"strings"
"github.com/rathi/agentikube/internal/kube"
"github.com/spf13/cobra"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/api/errors"
)
func NewDestroyCmd() *cobra.Command {
var yes bool
cmd := &cobra.Command{
Use: "destroy <handle>",
Short: "Destroy a sandbox and its resources",
Long: "Deletes the SandboxClaim, Secret, and PVC for the given handle.",
Args: cobra.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
ctx := context.Background()
handle := args[0]
if !yes {
fmt.Printf("are you sure you want to destroy sandbox %q? [y/N] ", handle)
scanner := bufio.NewScanner(os.Stdin)
scanner.Scan()
answer := strings.TrimSpace(strings.ToLower(scanner.Text()))
if answer != "y" && answer != "yes" {
fmt.Println("aborted")
return nil
}
}
cfg, err := loadConfig(cmd)
if err != nil {
return err
}
client, err := kube.NewClient()
if err != nil {
return fmt.Errorf("connecting to cluster: %w", err)
}
ns := cfg.Namespace
name := "sandbox-" + handle
claimGVR := schema.GroupVersionResource{
Group: "agentsandbox.dev",
Version: "v1",
Resource: "sandboxclaims",
}
secretGVR := schema.GroupVersionResource{Group: "", Version: "v1", Resource: "secrets"}
pvcGVR := schema.GroupVersionResource{Group: "", Version: "v1", Resource: "persistentvolumeclaims"}
// Delete SandboxClaim
err = client.Dynamic().Resource(claimGVR).Namespace(ns).Delete(ctx, name, metav1.DeleteOptions{})
if err != nil {
return fmt.Errorf("deleting SandboxClaim %q: %w", name, err)
}
fmt.Printf("[ok] SandboxClaim %q deleted\n", name)
// Delete Secret
err = client.Dynamic().Resource(secretGVR).Namespace(ns).Delete(ctx, name, metav1.DeleteOptions{})
if err != nil {
return fmt.Errorf("deleting Secret %q: %w", name, err)
}
fmt.Printf("[ok] Secret %q deleted\n", name)
// Delete PVC (best-effort)
err = client.Dynamic().Resource(pvcGVR).Namespace(ns).Delete(ctx, name, metav1.DeleteOptions{})
if err != nil {
if !errors.IsNotFound(err) {
fmt.Printf("[warn] could not delete PVC %q: %v\n", name, err)
}
} else {
fmt.Printf("[ok] PVC %q deleted\n", name)
}
fmt.Printf("\nsandbox %q destroyed\n", handle)
return nil
},
}
cmd.Flags().BoolVar(&yes, "yes", false, "skip confirmation prompt")
return cmd
}

65
internal/commands/down.go Normal file
View file

@ -0,0 +1,65 @@
package commands
import (
"context"
"fmt"
"github.com/rathi/agentikube/internal/kube"
"github.com/spf13/cobra"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
)
func NewDownCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "down",
Short: "Remove sandbox infrastructure (preserves user sandboxes)",
Long: "Deletes the SandboxWarmPool and SandboxTemplate. User sandboxes are preserved.",
RunE: func(cmd *cobra.Command, args []string) error {
ctx := context.Background()
cfg, err := loadConfig(cmd)
if err != nil {
return err
}
client, err := kube.NewClient()
if err != nil {
return fmt.Errorf("connecting to cluster: %w", err)
}
ns := cfg.Namespace
warmPoolGVR := schema.GroupVersionResource{
Group: "agentsandbox.dev",
Version: "v1",
Resource: "sandboxwarmpools",
}
templateGVR := schema.GroupVersionResource{
Group: "agentsandbox.dev",
Version: "v1",
Resource: "sandboxtemplates",
}
err = client.Dynamic().Resource(warmPoolGVR).Namespace(ns).Delete(ctx, "sandbox-warm-pool", metav1.DeleteOptions{})
if err != nil {
fmt.Printf("[warn] could not delete SandboxWarmPool: %v\n", err)
} else {
fmt.Println("[ok] SandboxWarmPool deleted")
}
err = client.Dynamic().Resource(templateGVR).Namespace(ns).Delete(ctx, "sandbox-template", metav1.DeleteOptions{})
if err != nil {
fmt.Printf("[warn] could not delete SandboxTemplate: %v\n", err)
} else {
fmt.Println("[ok] SandboxTemplate deleted")
}
fmt.Println("\nwarm pool and template deleted. User sandboxes are preserved.")
return nil
},
}
return cmd
}

View file

@ -0,0 +1,11 @@
package commands
import (
"github.com/rathi/agentikube/internal/config"
"github.com/spf13/cobra"
)
func loadConfig(cmd *cobra.Command) (*config.Config, error) {
cfgPath, _ := cmd.Flags().GetString("config")
return config.Load(cfgPath)
}

97
internal/commands/init.go Normal file
View file

@ -0,0 +1,97 @@
package commands
import (
"context"
"fmt"
"os/exec"
"strings"
"github.com/rathi/agentikube/internal/kube"
"github.com/spf13/cobra"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
const crdInstallURL = "https://raw.githubusercontent.com/agent-sandbox/agent-sandbox/main/deploy/install.yaml"
func NewInitCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "init",
Short: "Initialize the cluster for agent sandboxes",
Long: "Checks prerequisites, installs CRDs, and creates the target namespace.",
RunE: func(cmd *cobra.Command, args []string) error {
ctx := context.Background()
cfg, err := loadConfig(cmd)
if err != nil {
return err
}
// Check kubectl context
client, err := kube.NewClient()
if err != nil {
return fmt.Errorf("connecting to cluster: %w", err)
}
fmt.Println("[ok] connected to Kubernetes cluster")
// Apply agent-sandbox CRDs
fmt.Println("applying agent-sandbox CRDs...")
out, err := exec.CommandContext(ctx, "kubectl", "apply", "-f", crdInstallURL).CombinedOutput()
if err != nil {
return fmt.Errorf("applying CRDs: %s: %w", strings.TrimSpace(string(out)), err)
}
fmt.Println("[ok] agent-sandbox CRDs applied")
// Check for EFS CSI driver
dsList, err := client.Clientset().AppsV1().DaemonSets("kube-system").List(ctx, metav1.ListOptions{})
if err != nil {
return fmt.Errorf("listing daemonsets in kube-system: %w", err)
}
efsFound := false
for _, ds := range dsList.Items {
if strings.Contains(ds.Name, "efs-csi") {
efsFound = true
break
}
}
if efsFound {
fmt.Println("[ok] EFS CSI driver found")
} else {
fmt.Println("[warn] EFS CSI driver not found - install it before using EFS storage")
}
// Check for Karpenter
karpenterFound := false
for _, ns := range []string{"karpenter", "kube-system"} {
depList, err := client.Clientset().AppsV1().Deployments(ns).List(ctx, metav1.ListOptions{})
if err != nil {
continue
}
for _, dep := range depList.Items {
if strings.Contains(dep.Name, "karpenter") {
karpenterFound = true
break
}
}
if karpenterFound {
break
}
}
if karpenterFound {
fmt.Println("[ok] Karpenter found")
} else {
fmt.Println("[warn] Karpenter not found - required if compute.type is karpenter")
}
// Create namespace if it does not exist
if err := client.EnsureNamespace(ctx, cfg.Namespace); err != nil {
return fmt.Errorf("creating namespace %q: %w", cfg.Namespace, err)
}
fmt.Printf("[ok] namespace %q ready\n", cfg.Namespace)
fmt.Println("\ninit complete")
return nil
},
}
return cmd
}

138
internal/commands/list.go Normal file
View file

@ -0,0 +1,138 @@
package commands
import (
"context"
"fmt"
"os"
"text/tabwriter"
"time"
"github.com/rathi/agentikube/internal/kube"
"github.com/spf13/cobra"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
)
func NewListCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "list",
Short: "List all sandboxes",
Long: "Lists all SandboxClaims in the configured namespace.",
RunE: func(cmd *cobra.Command, args []string) error {
ctx := context.Background()
cfg, err := loadConfig(cmd)
if err != nil {
return err
}
client, err := kube.NewClient()
if err != nil {
return fmt.Errorf("connecting to cluster: %w", err)
}
claimGVR := schema.GroupVersionResource{
Group: "agentsandbox.dev",
Version: "v1",
Resource: "sandboxclaims",
}
list, err := client.Dynamic().Resource(claimGVR).Namespace(cfg.Namespace).List(ctx, metav1.ListOptions{})
if err != nil {
return fmt.Errorf("listing SandboxClaims: %w", err)
}
w := tabwriter.NewWriter(os.Stdout, 0, 4, 2, ' ', 0)
fmt.Fprintln(w, "HANDLE\tSTATUS\tAGE\tPOD")
for _, item := range list.Items {
name := item.GetName()
handle := name
if len(name) > 8 && name[:8] == "sandbox-" {
handle = name[8:]
}
status := extractStatus(item.Object)
podName := extractPodName(item.Object)
age := formatAge(item.GetCreationTimestamp().Time)
fmt.Fprintf(w, "%s\t%s\t%s\t%s\n", handle, status, age, podName)
}
w.Flush()
return nil
},
}
return cmd
}
func extractStatus(obj map[string]interface{}) string {
status, ok := obj["status"].(map[string]interface{})
if !ok {
return "Unknown"
}
conditions, ok := status["conditions"].([]interface{})
if !ok || len(conditions) == 0 {
return "Pending"
}
// Look for the Ready condition
for _, c := range conditions {
cond, ok := c.(map[string]interface{})
if !ok {
continue
}
condType, _ := cond["type"].(string)
condStatus, _ := cond["status"].(string)
if condType == "Ready" {
if condStatus == "True" {
return "Ready"
}
reason, _ := cond["reason"].(string)
if reason != "" {
return reason
}
return "NotReady"
}
}
return "Pending"
}
func extractPodName(obj map[string]interface{}) string {
status, ok := obj["status"].(map[string]interface{})
if ok {
if podName, ok := status["podName"].(string); ok && podName != "" {
return podName
}
}
// Fall back to annotations
metadata, ok := obj["metadata"].(map[string]interface{})
if ok {
annotations, ok := metadata["annotations"].(map[string]interface{})
if ok {
if podName, ok := annotations["agentsandbox.dev/pod-name"].(string); ok {
return podName
}
}
}
return "-"
}
func formatAge(created time.Time) string {
d := time.Since(created)
switch {
case d < time.Minute:
return fmt.Sprintf("%ds", int(d.Seconds()))
case d < time.Hour:
return fmt.Sprintf("%dm", int(d.Minutes()))
case d < 24*time.Hour:
return fmt.Sprintf("%dh", int(d.Hours()))
default:
return fmt.Sprintf("%dd", int(d.Hours()/24))
}
}

58
internal/commands/ssh.go Normal file
View file

@ -0,0 +1,58 @@
package commands
import (
"context"
"fmt"
"github.com/rathi/agentikube/internal/kube"
"github.com/spf13/cobra"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
)
func NewSSHCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "ssh <handle>",
Short: "Open a shell into a sandbox",
Long: "Exec into the sandbox pod for the given handle.",
Args: cobra.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
ctx := context.Background()
handle := args[0]
cfg, err := loadConfig(cmd)
if err != nil {
return err
}
client, err := kube.NewClient()
if err != nil {
return fmt.Errorf("connecting to cluster: %w", err)
}
ns := cfg.Namespace
name := "sandbox-" + handle
claimGVR := schema.GroupVersionResource{
Group: "agentsandbox.dev",
Version: "v1",
Resource: "sandboxclaims",
}
claim, err := client.Dynamic().Resource(claimGVR).Namespace(ns).Get(ctx, name, metav1.GetOptions{})
if err != nil {
return fmt.Errorf("getting SandboxClaim %q: %w", name, err)
}
podName := extractPodName(claim.Object)
if podName == "-" || podName == "" {
return fmt.Errorf("sandbox %q does not have a pod assigned yet", handle)
}
fmt.Printf("connecting to pod %s...\n", podName)
return kube.Exec(ns, podName, []string{"/bin/sh"})
},
}
return cmd
}

108
internal/commands/status.go Normal file
View file

@ -0,0 +1,108 @@
package commands
import (
"context"
"fmt"
"github.com/rathi/agentikube/internal/kube"
"github.com/spf13/cobra"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
)
func NewStatusCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "status",
Short: "Show cluster and sandbox status",
Long: "Displays warm pool status, sandbox counts, and compute node information.",
RunE: func(cmd *cobra.Command, args []string) error {
ctx := context.Background()
cfg, err := loadConfig(cmd)
if err != nil {
return err
}
client, err := kube.NewClient()
if err != nil {
return fmt.Errorf("connecting to cluster: %w", err)
}
ns := cfg.Namespace
// Warm pool status
warmPoolGVR := schema.GroupVersionResource{
Group: "agentsandbox.dev",
Version: "v1",
Resource: "sandboxwarmpools",
}
wp, err := client.Dynamic().Resource(warmPoolGVR).Namespace(ns).Get(ctx, "sandbox-warm-pool", metav1.GetOptions{})
if err != nil {
fmt.Printf("warm pool: not found (%v)\n", err)
} else {
spec, _ := wp.Object["spec"].(map[string]interface{})
status, _ := wp.Object["status"].(map[string]interface{})
replicas := getInt64(spec, "replicas")
readyReplicas := getInt64(status, "readyReplicas")
pendingReplicas := getInt64(status, "pendingReplicas")
fmt.Println("warm pool:")
fmt.Printf(" desired: %d\n", replicas)
fmt.Printf(" ready: %d\n", readyReplicas)
fmt.Printf(" pending: %d\n", pendingReplicas)
}
// Sandbox count
claimGVR := schema.GroupVersionResource{
Group: "agentsandbox.dev",
Version: "v1",
Resource: "sandboxclaims",
}
claims, err := client.Dynamic().Resource(claimGVR).Namespace(ns).List(ctx, metav1.ListOptions{})
if err != nil {
fmt.Printf("\nsandboxes: error listing (%v)\n", err)
} else {
fmt.Printf("\nsandboxes: %d\n", len(claims.Items))
}
// Karpenter nodes (if applicable)
if cfg.Compute.Type == "karpenter" {
nodes, err := client.Clientset().CoreV1().Nodes().List(ctx, metav1.ListOptions{
LabelSelector: "karpenter.sh/nodepool",
})
if err != nil {
fmt.Printf("\nkarpenter nodes: error listing (%v)\n", err)
} else {
fmt.Printf("\nkarpenter nodes: %d\n", len(nodes.Items))
}
}
return nil
},
}
return cmd
}
func getInt64(m map[string]interface{}, key string) int64 {
if m == nil {
return 0
}
v, ok := m[key]
if !ok {
return 0
}
switch n := v.(type) {
case int64:
return n
case float64:
return int64(n)
case int:
return int64(n)
default:
return 0
}
}

63
internal/commands/up.go Normal file
View file

@ -0,0 +1,63 @@
package commands
import (
"context"
"fmt"
"github.com/rathi/agentikube/internal/kube"
"github.com/rathi/agentikube/internal/manifest"
"github.com/spf13/cobra"
)
func NewUpCmd() *cobra.Command {
var dryRun bool
cmd := &cobra.Command{
Use: "up",
Short: "Apply sandbox infrastructure to the cluster",
Long: "Generates and applies all sandbox manifests (templates, warm pool, storage, compute).",
RunE: func(cmd *cobra.Command, args []string) error {
ctx := context.Background()
cfg, err := loadConfig(cmd)
if err != nil {
return err
}
manifests, err := manifest.Generate(cfg)
if err != nil {
return fmt.Errorf("generating manifests: %w", err)
}
if dryRun {
fmt.Print(string(manifests))
return nil
}
client, err := kube.NewClient()
if err != nil {
return fmt.Errorf("connecting to cluster: %w", err)
}
if err := client.ServerSideApply(ctx, manifests); err != nil {
return fmt.Errorf("applying manifests: %w", err)
}
fmt.Println("[ok] manifests applied")
if cfg.Sandbox.WarmPool.Enabled {
fmt.Println("waiting for warm pool to become ready...")
if err := client.WaitForReady(ctx, cfg.Namespace, "sandboxwarmpools", "sandbox-warm-pool"); err != nil {
return fmt.Errorf("waiting for warm pool: %w", err)
}
fmt.Println("[ok] warm pool ready")
}
fmt.Println("\ninfrastructure is up")
return nil
},
}
cmd.Flags().BoolVar(&dryRun, "dry-run", false, "print manifests to stdout without applying")
return cmd
}

102
internal/config/config.go Normal file
View file

@ -0,0 +1,102 @@
package config
import (
"fmt"
"os"
"gopkg.in/yaml.v3"
)
// Config is the top-level configuration parsed from agentikube.yaml.
type Config struct {
Namespace string `yaml:"namespace"`
Compute ComputeConfig `yaml:"compute"`
Storage StorageConfig `yaml:"storage"`
Sandbox SandboxConfig `yaml:"sandbox"`
}
type ComputeConfig struct {
Type string `yaml:"type"` // karpenter | fargate
InstanceTypes []string `yaml:"instanceTypes"`
CapacityTypes []string `yaml:"capacityTypes"`
MaxCPU int `yaml:"maxCpu"`
MaxMemory string `yaml:"maxMemory"`
Consolidation bool `yaml:"consolidation"`
FargateSelectors []FargateSelector `yaml:"fargateSelectors"`
}
type FargateSelector struct {
Namespace string `yaml:"namespace"`
}
type StorageConfig struct {
Type string `yaml:"type"` // efs
FilesystemID string `yaml:"filesystemId"`
BasePath string `yaml:"basePath"`
UID int `yaml:"uid"`
GID int `yaml:"gid"`
ReclaimPolicy string `yaml:"reclaimPolicy"`
}
type SandboxConfig struct {
Image string `yaml:"image"`
Ports []int `yaml:"ports"`
MountPath string `yaml:"mountPath"`
Resources ResourcesConfig `yaml:"resources"`
Env map[string]string `yaml:"env"`
SecurityContext SecurityContext `yaml:"securityContext"`
Probes ProbesConfig `yaml:"probes"`
WarmPool WarmPoolConfig `yaml:"warmPool"`
NetworkPolicy NetworkPolicy `yaml:"networkPolicy"`
}
type ResourcesConfig struct {
Requests ResourceValues `yaml:"requests"`
Limits ResourceValues `yaml:"limits"`
}
type ResourceValues struct {
CPU string `yaml:"cpu"`
Memory string `yaml:"memory"`
}
type SecurityContext struct {
RunAsUser int `yaml:"runAsUser"`
RunAsGroup int `yaml:"runAsGroup"`
RunAsNonRoot bool `yaml:"runAsNonRoot"`
}
type ProbesConfig struct {
Port int `yaml:"port"`
StartupFailureThreshold int `yaml:"startupFailureThreshold"`
}
type WarmPoolConfig struct {
Enabled bool `yaml:"enabled"`
Size int `yaml:"size"`
TTLMinutes int `yaml:"ttlMinutes"`
}
type NetworkPolicy struct {
EgressAllowAll bool `yaml:"egressAllowAll"`
IngressPorts []int `yaml:"ingressPorts"`
}
// Load reads and parses the config file at the given path.
func Load(path string) (*Config, error) {
data, err := os.ReadFile(path)
if err != nil {
return nil, fmt.Errorf("reading config file: %w", err)
}
var cfg Config
if err := yaml.Unmarshal(data, &cfg); err != nil {
return nil, fmt.Errorf("parsing config file: %w", err)
}
if err := Validate(&cfg); err != nil {
return nil, fmt.Errorf("validating config: %w", err)
}
return &cfg, nil
}

View file

@ -0,0 +1,99 @@
package config
import (
"fmt"
"strings"
)
// Validate checks that all required fields are present and values are valid.
func Validate(cfg *Config) error {
var errs []string
if cfg.Namespace == "" {
errs = append(errs, "namespace is required")
}
// Compute validation
switch cfg.Compute.Type {
case "karpenter":
if len(cfg.Compute.InstanceTypes) == 0 {
errs = append(errs, "compute.instanceTypes is required when type is karpenter")
}
if len(cfg.Compute.CapacityTypes) == 0 {
errs = append(errs, "compute.capacityTypes is required when type is karpenter")
}
if cfg.Compute.MaxCPU <= 0 {
errs = append(errs, "compute.maxCpu must be > 0")
}
if cfg.Compute.MaxMemory == "" {
errs = append(errs, "compute.maxMemory is required when type is karpenter")
}
case "fargate":
if len(cfg.Compute.FargateSelectors) == 0 {
errs = append(errs, "compute.fargateSelectors is required when type is fargate")
}
case "":
errs = append(errs, "compute.type is required (karpenter or fargate)")
default:
errs = append(errs, fmt.Sprintf("compute.type must be karpenter or fargate, got %q", cfg.Compute.Type))
}
// Storage validation
if cfg.Storage.Type == "" {
errs = append(errs, "storage.type is required")
} else if cfg.Storage.Type != "efs" {
errs = append(errs, fmt.Sprintf("storage.type must be efs, got %q", cfg.Storage.Type))
}
if cfg.Storage.FilesystemID == "" {
errs = append(errs, "storage.filesystemId is required")
}
if cfg.Storage.BasePath == "" {
errs = append(errs, "storage.basePath is required")
}
if cfg.Storage.ReclaimPolicy == "" {
cfg.Storage.ReclaimPolicy = "Retain"
} else if cfg.Storage.ReclaimPolicy != "Retain" && cfg.Storage.ReclaimPolicy != "Delete" {
errs = append(errs, fmt.Sprintf("storage.reclaimPolicy must be Retain or Delete, got %q", cfg.Storage.ReclaimPolicy))
}
// Storage defaults
if cfg.Storage.UID == 0 {
cfg.Storage.UID = 1000
}
if cfg.Storage.GID == 0 {
cfg.Storage.GID = 1000
}
// Sandbox validation
if cfg.Sandbox.Image == "" {
errs = append(errs, "sandbox.image is required")
}
if len(cfg.Sandbox.Ports) == 0 {
errs = append(errs, "sandbox.ports is required")
}
if cfg.Sandbox.MountPath == "" {
errs = append(errs, "sandbox.mountPath is required")
}
// Warm pool defaults
if cfg.Sandbox.WarmPool.Size == 0 && cfg.Sandbox.WarmPool.Enabled {
cfg.Sandbox.WarmPool.Size = 5
}
if cfg.Sandbox.WarmPool.TTLMinutes == 0 {
cfg.Sandbox.WarmPool.TTLMinutes = 120
}
// Probes defaults
if cfg.Sandbox.Probes.Port == 0 && len(cfg.Sandbox.Ports) > 0 {
cfg.Sandbox.Probes.Port = cfg.Sandbox.Ports[0]
}
if cfg.Sandbox.Probes.StartupFailureThreshold == 0 {
cfg.Sandbox.Probes.StartupFailureThreshold = 30
}
if len(errs) > 0 {
return fmt.Errorf("config validation errors:\n - %s", strings.Join(errs, "\n - "))
}
return nil
}

92
internal/kube/apply.go Normal file
View file

@ -0,0 +1,92 @@
package kube
import (
"bytes"
"context"
"fmt"
"io"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/types"
k8syaml "k8s.io/apimachinery/pkg/util/yaml"
"k8s.io/client-go/discovery"
"k8s.io/client-go/discovery/cached/memory"
"k8s.io/client-go/restmapper"
yamlserializer "k8s.io/apimachinery/pkg/runtime/serializer/yaml"
)
// ServerSideApply splits a multi-document YAML into individual resources
// and applies each one using server-side apply with the "agentikube" field manager.
func (c *Client) ServerSideApply(ctx context.Context, manifests []byte) error {
decoder := k8syaml.NewYAMLOrJSONDecoder(bytes.NewReader(manifests), 4096)
discoveryClient, ok := c.Clientset().Discovery().(*discovery.DiscoveryClient)
if !ok {
return fmt.Errorf("failed to get discovery client")
}
cachedDiscovery := memory.NewMemCacheClient(discoveryClient)
mapper := restmapper.NewDeferredDiscoveryRESTMapper(cachedDiscovery)
deserializer := yamlserializer.NewDecodingSerializer(unstructured.UnstructuredJSONScheme)
for {
var rawObj unstructured.Unstructured
if err := decoder.Decode(&rawObj); err != nil {
if err == io.EOF {
break
}
return fmt.Errorf("decoding YAML document: %w", err)
}
// Skip empty documents
if len(rawObj.Object) == 0 {
continue
}
// Re-encode to JSON for the patch body
rawJSON, err := rawObj.MarshalJSON()
if err != nil {
return fmt.Errorf("marshaling to JSON: %w", err)
}
// Decode to get the GVK
obj := &unstructured.Unstructured{}
_, gvk, err := deserializer.Decode(rawJSON, nil, obj)
if err != nil {
return fmt.Errorf("deserializing object: %w", err)
}
// Map GVK to GVR using the REST mapper
mapping, err := mapper.RESTMapping(gvk.GroupKind(), gvk.Version)
if err != nil {
return fmt.Errorf("mapping GVK %s to GVR: %w", gvk.String(), err)
}
gvr := mapping.Resource
name := obj.GetName()
namespace := obj.GetNamespace()
applyOpts := metav1.ApplyOptions{
FieldManager: "agentikube",
}
// Apply using the dynamic client - handle namespaced vs cluster-scoped
if namespace != "" {
_, err = c.Dynamic().Resource(gvr).Namespace(namespace).Patch(
ctx, name, types.ApplyPatchType, rawJSON, applyOpts.ToPatchOptions(),
)
} else {
_, err = c.Dynamic().Resource(gvr).Patch(
ctx, name, types.ApplyPatchType, rawJSON, applyOpts.ToPatchOptions(),
)
}
if err != nil {
return fmt.Errorf("applying %s/%s: %w", gvk.Kind, name, err)
}
fmt.Printf("applied %s/%s\n", gvk.Kind, name)
}
return nil
}

76
internal/kube/client.go Normal file
View file

@ -0,0 +1,76 @@
package kube
import (
"context"
"fmt"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/dynamic"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/clientcmd"
)
// Client wraps the Kubernetes dynamic client, typed clientset, and REST config.
type Client struct {
dynamic dynamic.Interface
clientset kubernetes.Interface
restConfig *rest.Config
}
func (c *Client) Dynamic() dynamic.Interface { return c.dynamic }
func (c *Client) Clientset() kubernetes.Interface { return c.clientset }
func (c *Client) RestConfig() *rest.Config { return c.restConfig }
// NewClient creates a Kubernetes client using the default kubeconfig loading
// rules (KUBECONFIG env var or ~/.kube/config).
func NewClient() (*Client, error) {
loadingRules := clientcmd.NewDefaultClientConfigLoadingRules()
configOverrides := &clientcmd.ConfigOverrides{}
kubeConfig := clientcmd.NewNonInteractiveDeferredLoadingClientConfig(loadingRules, configOverrides)
restConfig, err := kubeConfig.ClientConfig()
if err != nil {
return nil, fmt.Errorf("loading kubeconfig: %w", err)
}
dynamicClient, err := dynamic.NewForConfig(restConfig)
if err != nil {
return nil, fmt.Errorf("creating dynamic client: %w", err)
}
clientset, err := kubernetes.NewForConfig(restConfig)
if err != nil {
return nil, fmt.Errorf("creating clientset: %w", err)
}
return &Client{
dynamic: dynamicClient,
clientset: clientset,
restConfig: restConfig,
}, nil
}
// EnsureNamespace creates the namespace if it does not already exist.
func (c *Client) EnsureNamespace(ctx context.Context, name string) error {
_, err := c.clientset.CoreV1().Namespaces().Get(ctx, name, metav1.GetOptions{})
if err == nil {
return nil
}
if !errors.IsNotFound(err) {
return fmt.Errorf("checking namespace %q: %w", name, err)
}
ns := &corev1.Namespace{
ObjectMeta: metav1.ObjectMeta{
Name: name,
},
}
_, err = c.clientset.CoreV1().Namespaces().Create(ctx, ns, metav1.CreateOptions{})
if err != nil {
return fmt.Errorf("creating namespace %q: %w", name, err)
}
return nil
}

24
internal/kube/exec.go Normal file
View file

@ -0,0 +1,24 @@
package kube
import (
"os"
"os/exec"
)
// Exec runs kubectl exec to attach an interactive terminal to the specified
// pod. If command is empty, it defaults to /bin/sh.
func Exec(namespace, podName string, command []string) error {
if len(command) == 0 {
command = []string{"/bin/sh"}
}
args := []string{"exec", "-it", "-n", namespace, podName, "--"}
args = append(args, command...)
cmd := exec.Command("kubectl", args...)
cmd.Stdin = os.Stdin
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
return cmd.Run()
}

81
internal/kube/wait.go Normal file
View file

@ -0,0 +1,81 @@
package kube
import (
"context"
"fmt"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/watch"
)
// WaitForReady watches a resource in the agentsandbox.dev/v1 group until its
// Ready condition becomes True or the context is cancelled/times out.
// The resource parameter is the plural resource name (e.g. "sandboxclaims", "sandboxwarmpools").
func (c *Client) WaitForReady(ctx context.Context, namespace, resource, name string) error {
gvr := schema.GroupVersionResource{
Group: "agentsandbox.dev",
Version: "v1",
Resource: resource,
}
watcher, err := c.Dynamic().Resource(gvr).Namespace(namespace).Watch(ctx, metav1.ListOptions{
FieldSelector: fmt.Sprintf("metadata.name=%s", name),
})
if err != nil {
return fmt.Errorf("watching %s %s/%s: %w", resource, namespace, name, err)
}
defer watcher.Stop()
for {
select {
case <-ctx.Done():
return fmt.Errorf("timed out waiting for %s %s/%s to become ready", resource, namespace, name)
case event, ok := <-watcher.ResultChan():
if !ok {
return fmt.Errorf("watch channel closed for %s %s/%s", resource, namespace, name)
}
if event.Type == watch.Error {
return fmt.Errorf("watch error for %s %s/%s", resource, namespace, name)
}
obj, ok := event.Object.(*unstructured.Unstructured)
if !ok {
continue
}
if isReady(obj) {
return nil
}
}
}
}
// isReady checks whether an unstructured object has a condition with
// type=Ready and status=True.
func isReady(obj *unstructured.Unstructured) bool {
status, found, err := unstructured.NestedMap(obj.Object, "status")
if err != nil || !found {
return false
}
conditionsRaw, found, err := unstructured.NestedSlice(status, "conditions")
if err != nil || !found {
return false
}
for _, c := range conditionsRaw {
condition, ok := c.(map[string]interface{})
if !ok {
continue
}
condType, _ := condition["type"].(string)
condStatus, _ := condition["status"].(string)
if condType == "Ready" && condStatus == "True" {
return true
}
}
return false
}

View file

@ -0,0 +1,51 @@
package manifest
import (
"bytes"
"fmt"
"text/template"
"github.com/rathi/agentikube/internal/config"
)
// Generate renders all applicable Kubernetes manifests from the embedded
// templates using the provided configuration. Templates are selected based
// on the compute type and warm pool settings.
func Generate(cfg *config.Config) ([]byte, error) {
tmpl, err := template.ParseFS(templateFS, "templates/*.yaml.tmpl")
if err != nil {
return nil, fmt.Errorf("parsing templates: %w", err)
}
// Always-rendered templates
names := []string{
"namespace.yaml.tmpl",
"storageclass-efs.yaml.tmpl",
"sandbox-template.yaml.tmpl",
}
// Conditionally add Karpenter templates
if cfg.Compute.Type == "karpenter" {
names = append(names,
"karpenter-nodepool.yaml.tmpl",
"karpenter-ec2nodeclass.yaml.tmpl",
)
}
// Conditionally add warm pool template
if cfg.Sandbox.WarmPool.Enabled {
names = append(names, "warm-pool.yaml.tmpl")
}
var out bytes.Buffer
for i, name := range names {
if i > 0 {
out.WriteString("---\n")
}
if err := tmpl.ExecuteTemplate(&out, name, cfg); err != nil {
return nil, fmt.Errorf("rendering template %s: %w", name, err)
}
}
return out.Bytes(), nil
}

View file

@ -0,0 +1,6 @@
package manifest
import "embed"
//go:embed templates/*.yaml.tmpl
var templateFS embed.FS

View file

@ -0,0 +1,14 @@
apiVersion: karpenter.k8s.aws/v1
kind: EC2NodeClass
metadata:
name: sandbox-nodes
spec:
amiSelectorTerms:
- alias: "al2023@latest"
subnetSelectorTerms:
- tags:
karpenter.sh/discovery: "{{ .Namespace }}-cluster"
securityGroupSelectorTerms:
- tags:
karpenter.sh/discovery: "{{ .Namespace }}-cluster"
role: "KarpenterNodeRole-{{ .Namespace }}-cluster"

View file

@ -0,0 +1,33 @@
apiVersion: karpenter.sh/v1
kind: NodePool
metadata:
name: sandbox-pool
spec:
template:
spec:
requirements:
- key: node.kubernetes.io/instance-type
operator: In
values:
{{- range .Compute.InstanceTypes }}
- {{ . }}
{{- end }}
- key: karpenter.sh/capacity-type
operator: In
values:
{{- range .Compute.CapacityTypes }}
- {{ . }}
{{- end }}
- key: kubernetes.io/arch
operator: In
values:
- amd64
nodeClassRef:
name: sandbox-nodes
group: karpenter.k8s.aws
kind: EC2NodeClass
limits:
cpu: {{ .Compute.MaxCPU }}
memory: {{ .Compute.MaxMemory }}
disruption:
consolidationPolicy: {{ if .Compute.Consolidation }}WhenEmptyOrUnderutilized{{ else }}WhenEmpty{{ end }}

View file

@ -0,0 +1,4 @@
apiVersion: v1
kind: Namespace
metadata:
name: {{ .Namespace }}

View file

@ -0,0 +1,66 @@
apiVersion: agentsandbox.dev/v1
kind: SandboxTemplate
metadata:
name: sandbox-template
namespace: {{ .Namespace }}
spec:
template:
spec:
containers:
- name: sandbox
image: {{ .Sandbox.Image }}
ports:
{{- range .Sandbox.Ports }}
- containerPort: {{ . }}
{{- end }}
resources:
requests:
cpu: {{ .Sandbox.Resources.Requests.CPU }}
memory: {{ .Sandbox.Resources.Requests.Memory }}
limits:
cpu: {{ .Sandbox.Resources.Limits.CPU }}
memory: {{ .Sandbox.Resources.Limits.Memory }}
securityContext:
runAsUser: {{ .Sandbox.SecurityContext.RunAsUser }}
runAsGroup: {{ .Sandbox.SecurityContext.RunAsGroup }}
runAsNonRoot: {{ .Sandbox.SecurityContext.RunAsNonRoot }}
env:
{{- range $key, $value := .Sandbox.Env }}
- name: {{ $key }}
value: "{{ $value }}"
{{- end }}
startupProbe:
tcpSocket:
port: {{ .Sandbox.Probes.Port }}
failureThreshold: {{ .Sandbox.Probes.StartupFailureThreshold }}
periodSeconds: 10
readinessProbe:
tcpSocket:
port: {{ .Sandbox.Probes.Port }}
periodSeconds: 10
volumeMounts:
- name: workspace
mountPath: {{ .Sandbox.MountPath }}
volumeClaimTemplates:
- metadata:
name: workspace
spec:
accessModes:
- ReadWriteMany
storageClassName: efs-sandbox
resources:
requests:
storage: "10Gi"
networkPolicy:
egress:
{{- if .Sandbox.NetworkPolicy.EgressAllowAll }}
- to:
- ipBlock:
cidr: 0.0.0.0/0
{{- end }}
ingress:
{{- range .Sandbox.NetworkPolicy.IngressPorts }}
- ports:
- port: {{ . }}
protocol: TCP
{{- end }}

View file

@ -0,0 +1,14 @@
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: efs-sandbox
provisioner: efs.csi.aws.com
parameters:
provisioningMode: efs-ap
fileSystemId: {{ .Storage.FilesystemID }}
directoryPerms: "755"
uid: "{{ .Storage.UID }}"
gid: "{{ .Storage.GID }}"
basePath: {{ .Storage.BasePath }}
reclaimPolicy: {{ .Storage.ReclaimPolicy }}
volumeBindingMode: Immediate

View file

@ -0,0 +1,10 @@
apiVersion: agentsandbox.dev/v1
kind: SandboxWarmPool
metadata:
name: sandbox-warm-pool
namespace: {{ .Namespace }}
spec:
templateRef:
name: sandbox-template
replicas: {{ .Sandbox.WarmPool.Size }}
ttlMinutes: {{ .Sandbox.WarmPool.TTLMinutes }}