Skip to content

Commit

Permalink
Check cloud-init status as part of machine provisioning (#166)
Browse files Browse the repository at this point in the history
* Check cloud-init status and throw error state if cloud-init fails
  • Loading branch information
mcbenjemaa authored Apr 19, 2024
1 parent f26d055 commit d8c0929
Show file tree
Hide file tree
Showing 7 changed files with 268 additions and 0 deletions.
26 changes: 26 additions & 0 deletions internal/service/vmservice/vm.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import (
"github.com/ionos-cloud/cluster-api-provider-proxmox/internal/service/scheduler"
"github.com/ionos-cloud/cluster-api-provider-proxmox/internal/service/taskservice"
"github.com/ionos-cloud/cluster-api-provider-proxmox/pkg/proxmox"
"github.com/ionos-cloud/cluster-api-provider-proxmox/pkg/proxmox/goproxmox"
"github.com/ionos-cloud/cluster-api-provider-proxmox/pkg/scope"
)

Expand Down Expand Up @@ -91,6 +92,10 @@ func ReconcileVM(ctx context.Context, scope *scope.MachineScope) (infrav1alpha1.
return vm, err
}

if requeue, err := checkCloudInitStatus(ctx, scope); err != nil || requeue {
return vm, err
}

// if the root machine is ready, we can assume that the VM is ready as well.
// unmount the cloud-init iso if it is still mounted.
if scope.Machine.Status.BootstrapReady && scope.Machine.Status.NodeRef != nil {
Expand All @@ -103,6 +108,27 @@ func ReconcileVM(ctx context.Context, scope *scope.MachineScope) (infrav1alpha1.
return vm, nil
}

func checkCloudInitStatus(ctx context.Context, machineScope *scope.MachineScope) (requeue bool, err error) {
if !machineScope.VirtualMachine.IsRunning() {
// skip if the vm is not running.
return true, nil
}

if running, err := machineScope.InfraCluster.ProxmoxClient.CloudInitStatus(ctx, machineScope.VirtualMachine); err != nil || running {
if running {
return true, nil
}
if errors.Is(goproxmox.ErrCloudInitFailed, err) {
conditions.MarkFalse(machineScope.ProxmoxMachine, infrav1alpha1.VMProvisionedCondition, infrav1alpha1.VMProvisionFailedReason, clusterv1.ConditionSeverityError, err.Error())
machineScope.SetFailureMessage(err)
machineScope.SetFailureReason(capierrors.MachineStatusError("BootstrapFailed"))
}
return false, err
}

return false, nil
}

// ensureVirtualMachine creates a Proxmox VM if it doesn't exist and updates the given MachineScope.
func ensureVirtualMachine(ctx context.Context, machineScope *scope.MachineScope) (requeue bool, err error) {
// if there's an associated task, requeue.
Expand Down
37 changes: 37 additions & 0 deletions internal/service/vmservice/vm_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@ import (

"github.com/stretchr/testify/require"
"k8s.io/utils/ptr"
capierrors "sigs.k8s.io/cluster-api/errors"

infrav1alpha1 "github.com/ionos-cloud/cluster-api-provider-proxmox/api/v1alpha1"
"github.com/ionos-cloud/cluster-api-provider-proxmox/internal/service/scheduler"
"github.com/ionos-cloud/cluster-api-provider-proxmox/pkg/proxmox"
"github.com/ionos-cloud/cluster-api-provider-proxmox/pkg/proxmox/goproxmox"
"github.com/ionos-cloud/cluster-api-provider-proxmox/pkg/scope"
)

Expand All @@ -36,8 +38,10 @@ func TestReconcileVM_EverythingReady(t *testing.T) {
machineScope.SetVirtualMachineID(int64(vm.VMID))
machineScope.ProxmoxMachine.Status.IPAddresses = map[string]infrav1alpha1.IPAddress{infrav1alpha1.DefaultNetworkDevice: {IPV4: "10.10.10.10"}}
machineScope.ProxmoxMachine.Status.BootstrapDataProvided = ptr.To(true)
machineScope.ProxmoxMachine.Status.Ready = true

proxmoxClient.EXPECT().GetVM(context.Background(), "node1", int64(123)).Return(vm, nil).Once()
proxmoxClient.EXPECT().CloudInitStatus(context.Background(), vm).Return(false, nil).Once()

result, err := ReconcileVM(context.Background(), machineScope)
require.NoError(t, err)
Expand Down Expand Up @@ -308,3 +312,36 @@ func TestReconcileDisks_UnmountCloudInitISO(t *testing.T) {

require.NoError(t, unmountCloudInitISO(context.Background(), machineScope))
}

func TestReconcileVM_CloudInitFailed(t *testing.T) {
machineScope, proxmoxClient, _ := setupReconcilerTest(t)
vm := newRunningVM()
machineScope.SetVirtualMachineID(int64(vm.VMID))
machineScope.ProxmoxMachine.Status.IPAddresses = map[string]infrav1alpha1.IPAddress{infrav1alpha1.DefaultNetworkDevice: {IPV4: "10.10.10.10"}}
machineScope.ProxmoxMachine.Status.BootstrapDataProvided = ptr.To(true)
machineScope.ProxmoxMachine.Status.Ready = true

proxmoxClient.EXPECT().GetVM(context.Background(), "node1", int64(123)).Return(vm, nil).Once()
proxmoxClient.EXPECT().CloudInitStatus(context.Background(), vm).Return(false, goproxmox.ErrCloudInitFailed).Once()

_, err := ReconcileVM(context.Background(), machineScope)
require.Error(t, err, "unknown error")
require.Equal(t, machineScope.ProxmoxMachine.Status.FailureReason, ptr.To(capierrors.MachineStatusError("BootstrapFailed")))
require.Equal(t, machineScope.ProxmoxMachine.Status.FailureMessage, ptr.To("cloud-init failed execution"))
}

func TestReconcileVM_CloudInitRunning(t *testing.T) {
machineScope, proxmoxClient, _ := setupReconcilerTest(t)
vm := newRunningVM()
machineScope.SetVirtualMachineID(int64(vm.VMID))
machineScope.ProxmoxMachine.Status.IPAddresses = map[string]infrav1alpha1.IPAddress{infrav1alpha1.DefaultNetworkDevice: {IPV4: "10.10.10.10"}}
machineScope.ProxmoxMachine.Status.BootstrapDataProvided = ptr.To(true)
machineScope.ProxmoxMachine.Status.Ready = true

proxmoxClient.EXPECT().GetVM(context.Background(), "node1", int64(123)).Return(vm, nil).Once()
proxmoxClient.EXPECT().CloudInitStatus(context.Background(), vm).Return(true, nil).Once()

result, err := ReconcileVM(context.Background(), machineScope)
require.NoError(t, err)
require.Equal(t, infrav1alpha1.VirtualMachineStatePending, result.State)
}
2 changes: 2 additions & 0 deletions pkg/proxmox/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,6 @@ type Client interface {
TagVM(ctx context.Context, vm *proxmox.VirtualMachine, tag string) (*proxmox.Task, error)

UnmountCloudInitISO(ctx context.Context, vm *proxmox.VirtualMachine, device string) error

CloudInitStatus(ctx context.Context, vm *proxmox.VirtualMachine) (bool, error)
}
28 changes: 28 additions & 0 deletions pkg/proxmox/goproxmox/api_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@ import (
"context"
"fmt"
"net/url"
"strings"

"github.com/go-logr/logr"
"github.com/luthermonson/go-proxmox"
"github.com/pkg/errors"

capmox "github.com/ionos-cloud/cluster-api-provider-proxmox/pkg/proxmox"
)
Expand Down Expand Up @@ -258,3 +260,29 @@ func (c *APIClient) UnmountCloudInitISO(ctx context.Context, vm *proxmox.Virtual
}
return err
}

// CloudInitStatus returns the cloud-init status of the VM.
func (c *APIClient) CloudInitStatus(ctx context.Context, vm *proxmox.VirtualMachine) (running bool, err error) {
if err := vm.WaitForAgent(ctx, 5); err != nil {
return false, errors.Wrap(err, "error waiting for agent")
}

pid, err := vm.AgentExec(ctx, []string{"cloud-init", "status"}, "")
if err != nil {
return false, errors.Wrap(err, "unable to get cloud-init status")
}

status, err := vm.WaitForAgentExecExit(ctx, pid, 2)
if err != nil {
return false, errors.Wrap(err, "unable to wait for agent exec")
}

if status.Exited == 1 && status.ExitCode == 0 && strings.Contains(status.OutData, "running") {
return true, nil
}
if status.Exited == 1 && status.ExitCode != 0 {
return false, ErrCloudInitFailed
}

return false, nil
}
113 changes: 113 additions & 0 deletions pkg/proxmox/goproxmox/api_client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package goproxmox

import (
"context"
"fmt"
"net/http"
"testing"

Expand Down Expand Up @@ -150,3 +151,115 @@ func TestProxmoxAPIClient_GetReservableMemoryBytes(t *testing.T) {
})
}
}

func TestProxmoxAPIClient_CloudInitStatus(t *testing.T) {
tests := []struct {
name string
node string // node name
vmid int64 // vmid
pid float64 // pid of agent
exited int // exited state
exitcode int // exitcode
outData string // out-data
running bool // expected running state
err error // expected error
}{
{
name: "cloud-init success",
node: "pve",
vmid: 1111,
pid: 12234,
exited: 1,
exitcode: 0,
outData: "status: done\n",
running: false,
err: nil,
},
{
name: "cloud-init running",
node: "pve",
vmid: 1111,
pid: 12234,
exited: 1,
exitcode: 0,
outData: "status: running\n",
running: true,
err: nil,
},
{
name: "cloud-init failed",
node: "pve",
vmid: 1111,
pid: 12234,
exited: 1,
exitcode: 1,
outData: "status: error\n",
running: false,
err: ErrCloudInitFailed,
},
}

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
client := newTestClient(t)

httpmock.RegisterResponder(http.MethodGet, fmt.Sprintf(`=~/nodes/%s/status`, test.node),
newJSONResponder(200, proxmox.Node{Name: "pve"}))

httpmock.RegisterResponder(http.MethodGet, fmt.Sprintf(`=~/nodes/%s/qemu/%d/status/current`, test.node, test.vmid),
newJSONResponder(200, proxmox.VirtualMachine{
VMID: proxmox.StringOrUint64(test.vmid),
Name: "legit-worker",
Node: test.node,
}))

httpmock.RegisterResponder(http.MethodGet, fmt.Sprintf(`=~/nodes/%s/qemu/%d/config`, test.node, test.vmid),
newJSONResponder(200, proxmox.VirtualMachineConfig{
Name: "legit-worker",
}))

vm, err := client.GetVM(context.Background(), test.node, test.vmid)
require.NoError(t, err)
require.NotNil(t, vm)

// WaitForAgent mock
httpmock.RegisterResponder(http.MethodGet, fmt.Sprintf(`=~/nodes/%s/qemu/%d/agent/get-osinfo`, vm.Node, vm.VMID),
newJSONResponder(200,
map[string]*proxmox.AgentOsInfo{
"result": {
ID: "ubuntu",
VersionID: "22.04",
Machine: "x86_64",
KernelRelease: "5.15.0-89-generic",
KernelVersion: "#99-Ubuntu SMP Mon Oct 30 20:42:41 UTC 2023",
Name: "Ubuntu",
Version: "22.04.3 LTS (Jammy Jellyfish)",
PrettyName: "Ubuntu 22.04.3 LTS",
},
},
))

// AgentExec mock
httpmock.RegisterResponder(http.MethodPost, fmt.Sprintf(`=~/nodes/%s/qemu/%d/agent/exec\z`, vm.Node, vm.VMID),
newJSONResponder(200,
map[string]interface{}{
"pid": test.pid,
},
))

// AgentExecStatus mock
httpmock.RegisterResponder(http.MethodGet, fmt.Sprintf(`=~/nodes/%s/qemu/%d/agent/exec-status\?pid=%v`, vm.Node, vm.VMID, test.pid),
newJSONResponder(200,
&proxmox.AgentExecStatus{
Exited: test.exited,
ExitCode: test.exitcode,
OutData: test.outData,
},
))

running, err := client.CloudInitStatus(context.Background(), vm)
require.Equal(t, err, test.err)
require.Equal(t, test.running, running)
})
}
}
8 changes: 8 additions & 0 deletions pkg/proxmox/goproxmox/errors.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package goproxmox

import "github.com/pkg/errors"

var (
// ErrCloudInitFailed is returned when cloud-init failed execution.
ErrCloudInitFailed = errors.New("cloud-init failed execution")
)
54 changes: 54 additions & 0 deletions pkg/proxmox/proxmoxtest/mock_client.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit d8c0929

Please sign in to comment.