diff --git a/internal/service/vmservice/vm.go b/internal/service/vmservice/vm.go index b51b03bd..7c72474c 100644 --- a/internal/service/vmservice/vm.go +++ b/internal/service/vmservice/vm.go @@ -33,6 +33,7 @@ import ( "github.com/ionos-cloud/cluster-api-provider-proxmox/internal/service/scheduler" "github.com/ionos-cloud/cluster-api-provider-proxmox/internal/service/taskservice" "github.com/ionos-cloud/cluster-api-provider-proxmox/pkg/proxmox" + "github.com/ionos-cloud/cluster-api-provider-proxmox/pkg/proxmox/goproxmox" "github.com/ionos-cloud/cluster-api-provider-proxmox/pkg/scope" ) @@ -91,6 +92,10 @@ func ReconcileVM(ctx context.Context, scope *scope.MachineScope) (infrav1alpha1. return vm, err } + if requeue, err := checkCloudInitStatus(ctx, scope); err != nil || requeue { + return vm, err + } + // if the root machine is ready, we can assume that the VM is ready as well. // unmount the cloud-init iso if it is still mounted. if scope.Machine.Status.BootstrapReady && scope.Machine.Status.NodeRef != nil { @@ -103,6 +108,27 @@ func ReconcileVM(ctx context.Context, scope *scope.MachineScope) (infrav1alpha1. return vm, nil } +func checkCloudInitStatus(ctx context.Context, machineScope *scope.MachineScope) (requeue bool, err error) { + if !machineScope.VirtualMachine.IsRunning() { + // skip if the vm is not running. + return true, nil + } + + if running, err := machineScope.InfraCluster.ProxmoxClient.CloudInitStatus(ctx, machineScope.VirtualMachine); err != nil || running { + if running { + return true, nil + } + if errors.Is(goproxmox.ErrCloudInitFailed, err) { + conditions.MarkFalse(machineScope.ProxmoxMachine, infrav1alpha1.VMProvisionedCondition, infrav1alpha1.VMProvisionFailedReason, clusterv1.ConditionSeverityError, err.Error()) + machineScope.SetFailureMessage(err) + machineScope.SetFailureReason(capierrors.MachineStatusError("BootstrapFailed")) + } + return false, err + } + + return false, nil +} + // ensureVirtualMachine creates a Proxmox VM if it doesn't exist and updates the given MachineScope. func ensureVirtualMachine(ctx context.Context, machineScope *scope.MachineScope) (requeue bool, err error) { // if there's an associated task, requeue. diff --git a/internal/service/vmservice/vm_test.go b/internal/service/vmservice/vm_test.go index 5ba7c188..c03fb5c9 100644 --- a/internal/service/vmservice/vm_test.go +++ b/internal/service/vmservice/vm_test.go @@ -23,10 +23,12 @@ import ( "github.com/stretchr/testify/require" "k8s.io/utils/ptr" + capierrors "sigs.k8s.io/cluster-api/errors" infrav1alpha1 "github.com/ionos-cloud/cluster-api-provider-proxmox/api/v1alpha1" "github.com/ionos-cloud/cluster-api-provider-proxmox/internal/service/scheduler" "github.com/ionos-cloud/cluster-api-provider-proxmox/pkg/proxmox" + "github.com/ionos-cloud/cluster-api-provider-proxmox/pkg/proxmox/goproxmox" "github.com/ionos-cloud/cluster-api-provider-proxmox/pkg/scope" ) @@ -36,8 +38,10 @@ func TestReconcileVM_EverythingReady(t *testing.T) { machineScope.SetVirtualMachineID(int64(vm.VMID)) machineScope.ProxmoxMachine.Status.IPAddresses = map[string]infrav1alpha1.IPAddress{infrav1alpha1.DefaultNetworkDevice: {IPV4: "10.10.10.10"}} machineScope.ProxmoxMachine.Status.BootstrapDataProvided = ptr.To(true) + machineScope.ProxmoxMachine.Status.Ready = true proxmoxClient.EXPECT().GetVM(context.Background(), "node1", int64(123)).Return(vm, nil).Once() + proxmoxClient.EXPECT().CloudInitStatus(context.Background(), vm).Return(false, nil).Once() result, err := ReconcileVM(context.Background(), machineScope) require.NoError(t, err) @@ -308,3 +312,36 @@ func TestReconcileDisks_UnmountCloudInitISO(t *testing.T) { require.NoError(t, unmountCloudInitISO(context.Background(), machineScope)) } + +func TestReconcileVM_CloudInitFailed(t *testing.T) { + machineScope, proxmoxClient, _ := setupReconcilerTest(t) + vm := newRunningVM() + machineScope.SetVirtualMachineID(int64(vm.VMID)) + machineScope.ProxmoxMachine.Status.IPAddresses = map[string]infrav1alpha1.IPAddress{infrav1alpha1.DefaultNetworkDevice: {IPV4: "10.10.10.10"}} + machineScope.ProxmoxMachine.Status.BootstrapDataProvided = ptr.To(true) + machineScope.ProxmoxMachine.Status.Ready = true + + proxmoxClient.EXPECT().GetVM(context.Background(), "node1", int64(123)).Return(vm, nil).Once() + proxmoxClient.EXPECT().CloudInitStatus(context.Background(), vm).Return(false, goproxmox.ErrCloudInitFailed).Once() + + _, err := ReconcileVM(context.Background(), machineScope) + require.Error(t, err, "unknown error") + require.Equal(t, machineScope.ProxmoxMachine.Status.FailureReason, ptr.To(capierrors.MachineStatusError("BootstrapFailed"))) + require.Equal(t, machineScope.ProxmoxMachine.Status.FailureMessage, ptr.To("cloud-init failed execution")) +} + +func TestReconcileVM_CloudInitRunning(t *testing.T) { + machineScope, proxmoxClient, _ := setupReconcilerTest(t) + vm := newRunningVM() + machineScope.SetVirtualMachineID(int64(vm.VMID)) + machineScope.ProxmoxMachine.Status.IPAddresses = map[string]infrav1alpha1.IPAddress{infrav1alpha1.DefaultNetworkDevice: {IPV4: "10.10.10.10"}} + machineScope.ProxmoxMachine.Status.BootstrapDataProvided = ptr.To(true) + machineScope.ProxmoxMachine.Status.Ready = true + + proxmoxClient.EXPECT().GetVM(context.Background(), "node1", int64(123)).Return(vm, nil).Once() + proxmoxClient.EXPECT().CloudInitStatus(context.Background(), vm).Return(true, nil).Once() + + result, err := ReconcileVM(context.Background(), machineScope) + require.NoError(t, err) + require.Equal(t, infrav1alpha1.VirtualMachineStatePending, result.State) +} diff --git a/pkg/proxmox/client.go b/pkg/proxmox/client.go index 63e4e874..96c57261 100644 --- a/pkg/proxmox/client.go +++ b/pkg/proxmox/client.go @@ -48,4 +48,6 @@ type Client interface { TagVM(ctx context.Context, vm *proxmox.VirtualMachine, tag string) (*proxmox.Task, error) UnmountCloudInitISO(ctx context.Context, vm *proxmox.VirtualMachine, device string) error + + CloudInitStatus(ctx context.Context, vm *proxmox.VirtualMachine) (bool, error) } diff --git a/pkg/proxmox/goproxmox/api_client.go b/pkg/proxmox/goproxmox/api_client.go index 98d50ba1..cf58a855 100644 --- a/pkg/proxmox/goproxmox/api_client.go +++ b/pkg/proxmox/goproxmox/api_client.go @@ -21,9 +21,11 @@ import ( "context" "fmt" "net/url" + "strings" "github.com/go-logr/logr" "github.com/luthermonson/go-proxmox" + "github.com/pkg/errors" capmox "github.com/ionos-cloud/cluster-api-provider-proxmox/pkg/proxmox" ) @@ -258,3 +260,29 @@ func (c *APIClient) UnmountCloudInitISO(ctx context.Context, vm *proxmox.Virtual } return err } + +// CloudInitStatus returns the cloud-init status of the VM. +func (c *APIClient) CloudInitStatus(ctx context.Context, vm *proxmox.VirtualMachine) (running bool, err error) { + if err := vm.WaitForAgent(ctx, 5); err != nil { + return false, errors.Wrap(err, "error waiting for agent") + } + + pid, err := vm.AgentExec(ctx, []string{"cloud-init", "status"}, "") + if err != nil { + return false, errors.Wrap(err, "unable to get cloud-init status") + } + + status, err := vm.WaitForAgentExecExit(ctx, pid, 2) + if err != nil { + return false, errors.Wrap(err, "unable to wait for agent exec") + } + + if status.Exited == 1 && status.ExitCode == 0 && strings.Contains(status.OutData, "running") { + return true, nil + } + if status.Exited == 1 && status.ExitCode != 0 { + return false, ErrCloudInitFailed + } + + return false, nil +} diff --git a/pkg/proxmox/goproxmox/api_client_test.go b/pkg/proxmox/goproxmox/api_client_test.go index 1babd08a..62c26d8f 100644 --- a/pkg/proxmox/goproxmox/api_client_test.go +++ b/pkg/proxmox/goproxmox/api_client_test.go @@ -18,6 +18,7 @@ package goproxmox import ( "context" + "fmt" "net/http" "testing" @@ -150,3 +151,115 @@ func TestProxmoxAPIClient_GetReservableMemoryBytes(t *testing.T) { }) } } + +func TestProxmoxAPIClient_CloudInitStatus(t *testing.T) { + tests := []struct { + name string + node string // node name + vmid int64 // vmid + pid float64 // pid of agent + exited int // exited state + exitcode int // exitcode + outData string // out-data + running bool // expected running state + err error // expected error + }{ + { + name: "cloud-init success", + node: "pve", + vmid: 1111, + pid: 12234, + exited: 1, + exitcode: 0, + outData: "status: done\n", + running: false, + err: nil, + }, + { + name: "cloud-init running", + node: "pve", + vmid: 1111, + pid: 12234, + exited: 1, + exitcode: 0, + outData: "status: running\n", + running: true, + err: nil, + }, + { + name: "cloud-init failed", + node: "pve", + vmid: 1111, + pid: 12234, + exited: 1, + exitcode: 1, + outData: "status: error\n", + running: false, + err: ErrCloudInitFailed, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + client := newTestClient(t) + + httpmock.RegisterResponder(http.MethodGet, fmt.Sprintf(`=~/nodes/%s/status`, test.node), + newJSONResponder(200, proxmox.Node{Name: "pve"})) + + httpmock.RegisterResponder(http.MethodGet, fmt.Sprintf(`=~/nodes/%s/qemu/%d/status/current`, test.node, test.vmid), + newJSONResponder(200, proxmox.VirtualMachine{ + VMID: proxmox.StringOrUint64(test.vmid), + Name: "legit-worker", + Node: test.node, + })) + + httpmock.RegisterResponder(http.MethodGet, fmt.Sprintf(`=~/nodes/%s/qemu/%d/config`, test.node, test.vmid), + newJSONResponder(200, proxmox.VirtualMachineConfig{ + Name: "legit-worker", + })) + + vm, err := client.GetVM(context.Background(), test.node, test.vmid) + require.NoError(t, err) + require.NotNil(t, vm) + + // WaitForAgent mock + httpmock.RegisterResponder(http.MethodGet, fmt.Sprintf(`=~/nodes/%s/qemu/%d/agent/get-osinfo`, vm.Node, vm.VMID), + newJSONResponder(200, + map[string]*proxmox.AgentOsInfo{ + "result": { + ID: "ubuntu", + VersionID: "22.04", + Machine: "x86_64", + KernelRelease: "5.15.0-89-generic", + KernelVersion: "#99-Ubuntu SMP Mon Oct 30 20:42:41 UTC 2023", + Name: "Ubuntu", + Version: "22.04.3 LTS (Jammy Jellyfish)", + PrettyName: "Ubuntu 22.04.3 LTS", + }, + }, + )) + + // AgentExec mock + httpmock.RegisterResponder(http.MethodPost, fmt.Sprintf(`=~/nodes/%s/qemu/%d/agent/exec\z`, vm.Node, vm.VMID), + newJSONResponder(200, + map[string]interface{}{ + "pid": test.pid, + }, + )) + + // AgentExecStatus mock + httpmock.RegisterResponder(http.MethodGet, fmt.Sprintf(`=~/nodes/%s/qemu/%d/agent/exec-status\?pid=%v`, vm.Node, vm.VMID, test.pid), + newJSONResponder(200, + &proxmox.AgentExecStatus{ + Exited: test.exited, + ExitCode: test.exitcode, + OutData: test.outData, + }, + )) + + running, err := client.CloudInitStatus(context.Background(), vm) + require.Equal(t, err, test.err) + require.Equal(t, test.running, running) + }) + } +} diff --git a/pkg/proxmox/goproxmox/errors.go b/pkg/proxmox/goproxmox/errors.go new file mode 100644 index 00000000..164cabef --- /dev/null +++ b/pkg/proxmox/goproxmox/errors.go @@ -0,0 +1,8 @@ +package goproxmox + +import "github.com/pkg/errors" + +var ( + // ErrCloudInitFailed is returned when cloud-init failed execution. + ErrCloudInitFailed = errors.New("cloud-init failed execution") +) diff --git a/pkg/proxmox/proxmoxtest/mock_client.go b/pkg/proxmox/proxmoxtest/mock_client.go index 72df7a1f..0b6005f7 100644 --- a/pkg/proxmox/proxmoxtest/mock_client.go +++ b/pkg/proxmox/proxmoxtest/mock_client.go @@ -694,6 +694,60 @@ func (_c *MockClient_UnmountCloudInitISO_Call) RunAndReturn(run func(context.Con return _c } +// CloudInitStatus provides a mock function with given fields: ctx, vm +func (_m *MockClient) CloudInitStatus(ctx context.Context, vm *go_proxmox.VirtualMachine) (bool, error) { + ret := _m.Called(ctx, vm) + + var r0 bool + var r1 error + if rf, ok := ret.Get(0).(func(context.Context, *go_proxmox.VirtualMachine) (bool, error)); ok { + return rf(ctx, vm) + } + if rf, ok := ret.Get(0).(func(context.Context, *go_proxmox.VirtualMachine) bool); ok { + r0 = rf(ctx, vm) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(bool) + } + } + + if rf, ok := ret.Get(1).(func(context.Context, *go_proxmox.VirtualMachine) error); ok { + r1 = rf(ctx, vm) + } else { + r1 = ret.Error(1) + } + return r0, r1 +} + +// MockClient_CloudInitStatus_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'CloudInitStatus' +type MockClient_CloudInitStatus_Call struct { + *mock.Call +} + +// CloudInitStatus is a helper method to define mock.On call +// - ctx context.Context +// - vm *go_proxmox.VirtualMachine +func (_e *MockClient_Expecter) CloudInitStatus(ctx interface{}, vm interface{}) *MockClient_CloudInitStatus_Call { + return &MockClient_CloudInitStatus_Call{Call: _e.mock.On("CloudInitStatus", ctx, vm)} +} + +func (_c *MockClient_CloudInitStatus_Call) Run(run func(ctx context.Context, vm *go_proxmox.VirtualMachine)) *MockClient_CloudInitStatus_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(context.Context), args[1].(*go_proxmox.VirtualMachine)) + }) + return _c +} + +func (_c *MockClient_CloudInitStatus_Call) Return(_a0 bool, _a1 error) *MockClient_CloudInitStatus_Call { + _c.Call.Return(_a0, _a1) + return _c +} + +func (_c *MockClient_CloudInitStatus_Call) RunAndReturn(run func(context.Context, *go_proxmox.VirtualMachine, string) (bool, error)) *MockClient_CloudInitStatus_Call { + _c.Call.Return(run) + return _c +} + // NewMockClient creates a new instance of MockClient. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. // The first argument is typically a *testing.T value. func NewMockClient(t interface {