diff --git a/baremetal/metal3remediation_manager.go b/baremetal/metal3remediation_manager.go index d8a2cb4fc6..c62193b917 100644 --- a/baremetal/metal3remediation_manager.go +++ b/baremetal/metal3remediation_manager.go @@ -27,6 +27,7 @@ import ( infrav1 "github.com/metal3-io/cluster-api-provider-metal3/api/v1beta1" "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" + storagev1 "k8s.io/api/storage/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" v1 "k8s.io/client-go/kubernetes/typed/core/v1" @@ -76,6 +77,16 @@ type RemediationManagerInterface interface { SetNodeBackupAnnotations(annotations string, labels string) bool GetNodeBackupAnnotations() (annotations, labels string) RemoveNodeBackupAnnotations() + AddOutOfServiceTaint(ctx context.Context, clusterClient v1.CoreV1Interface, node *corev1.Node) error + RemoveOutOfServiceTaint(ctx context.Context, clusterClient v1.CoreV1Interface, node *corev1.Node) error + HasOutOfServiceTaint(node *corev1.Node) bool + IsNodeDrained(ctx context.Context, clusterClient v1.CoreV1Interface, node *corev1.Node) bool +} + +var outOfServiceTaint = &corev1.Taint{ + Key: "node.kubernetes.io/out-of-service", + Value: "nodeshutdown", + Effect: corev1.TaintEffectNoExecute, } // RemediationManager is responsible for performing remediation reconciliation. @@ -476,3 +487,88 @@ func (r *RemediationManager) RemoveNodeBackupAnnotations() { func (r *RemediationManager) getPowerOffAnnotationKey() string { return fmt.Sprintf(powerOffAnnotation, r.Metal3Remediation.UID) } + +func (r *RemediationManager) HasOutOfServiceTaint(node *corev1.Node) bool { + for _, taint := range node.Spec.Taints { + if taint.MatchTaint(outOfServiceTaint) { + return true + } + } + return false +} + +func (r *RemediationManager) AddOutOfServiceTaint(ctx context.Context, clusterClient v1.CoreV1Interface, node *corev1.Node) error { + taint := outOfServiceTaint + now := metav1.Now() + taint.TimeAdded = &now + node.Spec.Taints = append(node.Spec.Taints, *taint) + if err := r.UpdateNode(ctx, clusterClient, node); err != nil { + msg := fmt.Sprintf("failed to add out-of-service taint on node %s", node.Name) + r.Log.Error(err, msg) + return errors.Wrap(err, msg) + } + r.Log.Info("Out-of-service taint added", "node", node.Name) + return nil +} + +func (r *RemediationManager) RemoveOutOfServiceTaint(ctx context.Context, clusterClient v1.CoreV1Interface, node *corev1.Node) error { + newTaints := []corev1.Taint{} + + var isPopOutOfServiceTaint bool + for _, taint := range node.Spec.Taints { + if taint.MatchTaint(outOfServiceTaint) { + isPopOutOfServiceTaint = true + continue + } + newTaints = append(newTaints, taint) + } + + if isPopOutOfServiceTaint { + r.Log.Info("Removing out-of-service taint from node taints", "node", node.Name) + node.Spec.Taints = newTaints + } else { + r.Log.Info("Out-of-service taint not found. Nothing to do", "node name", node.Name) + return nil + } + + if err := r.UpdateNode(ctx, clusterClient, node); err != nil { + msg := fmt.Sprintf("failed to remove out-of-service taint on node %s", node.Name) + r.Log.Error(err, msg) + return errors.Wrap(err, msg) + } + + r.Log.Info("Out-of-service taint removed", "node", node.Name) + return nil +} + +func (r *RemediationManager) IsNodeDrained(ctx context.Context, clusterClient v1.CoreV1Interface, node *corev1.Node) bool { + pods, err := clusterClient.Pods("").List(ctx, metav1.ListOptions{}) + if err != nil { + r.Log.Error(err, "failed to get pod list in the cluster") + return false + } + + for _, pod := range pods.Items { + if pod.Spec.NodeName == node.Name { + if pod.ObjectMeta.DeletionTimestamp != nil { + r.Log.Info("Waiting for terminating pod", "node", node.Name, "pod name", pod.Name, "phase", pod.Status.Phase) + return false + } + } + } + + volumeAttachments := &storagev1.VolumeAttachmentList{} + if err := r.Client.List(ctx, volumeAttachments); err != nil { + r.Log.Error(err, "failed to get volumeAttachments list") + return false + } + for _, va := range volumeAttachments.Items { + if va.Spec.NodeName == node.Name { + r.Log.Info("Waiting for deleting volumeAttachement", "node", node.Name, "name", va.Name) + return false + } + } + + r.Log.Info("Node is drained", "node", node.Name) + return true +} diff --git a/baremetal/mocks/zz_generated.metal3remediation_manager.go b/baremetal/mocks/zz_generated.metal3remediation_manager.go index 51145e86cf..6aa11f2ccb 100644 --- a/baremetal/mocks/zz_generated.metal3remediation_manager.go +++ b/baremetal/mocks/zz_generated.metal3remediation_manager.go @@ -60,6 +60,20 @@ func (m *MockRemediationManagerInterface) EXPECT() *MockRemediationManagerInterf return m.recorder } +// AddOutOfServiceTaint mocks base method. +func (m *MockRemediationManagerInterface) AddOutOfServiceTaint(ctx context.Context, clusterClient v11.CoreV1Interface, node *v1.Node) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "AddOutOfServiceTaint", ctx, clusterClient, node) + ret0, _ := ret[0].(error) + return ret0 +} + +// AddOutOfServiceTaint indicates an expected call of AddOutOfServiceTaint. +func (mr *MockRemediationManagerInterfaceMockRecorder) AddOutOfServiceTaint(ctx, clusterClient, node interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AddOutOfServiceTaint", reflect.TypeOf((*MockRemediationManagerInterface)(nil).AddOutOfServiceTaint), ctx, clusterClient, node) +} + // DeleteNode mocks base method. func (m *MockRemediationManagerInterface) DeleteNode(ctx context.Context, clusterClient v11.CoreV1Interface, node *v1.Node) error { m.ctrl.T.Helper() @@ -220,6 +234,20 @@ func (mr *MockRemediationManagerInterfaceMockRecorder) HasFinalizer() *gomock.Ca return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "HasFinalizer", reflect.TypeOf((*MockRemediationManagerInterface)(nil).HasFinalizer)) } +// HasOutOfServiceTaint mocks base method. +func (m *MockRemediationManagerInterface) HasOutOfServiceTaint(node *v1.Node) bool { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "HasOutOfServiceTaint", node) + ret0, _ := ret[0].(bool) + return ret0 +} + +// HasOutOfServiceTaint indicates an expected call of HasOutOfServiceTaint. +func (mr *MockRemediationManagerInterfaceMockRecorder) HasOutOfServiceTaint(node interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "HasOutOfServiceTaint", reflect.TypeOf((*MockRemediationManagerInterface)(nil).HasOutOfServiceTaint), node) +} + // HasReachRetryLimit mocks base method. func (m *MockRemediationManagerInterface) HasReachRetryLimit() bool { m.ctrl.T.Helper() @@ -246,6 +274,20 @@ func (mr *MockRemediationManagerInterfaceMockRecorder) IncreaseRetryCount() *gom return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "IncreaseRetryCount", reflect.TypeOf((*MockRemediationManagerInterface)(nil).IncreaseRetryCount)) } +// IsNodeDrained mocks base method. +func (m *MockRemediationManagerInterface) IsNodeDrained(ctx context.Context, clusterClient v11.CoreV1Interface, node *v1.Node) bool { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "IsNodeDrained", ctx, clusterClient, node) + ret0, _ := ret[0].(bool) + return ret0 +} + +// IsNodeDrained indicates an expected call of IsNodeDrained. +func (mr *MockRemediationManagerInterfaceMockRecorder) IsNodeDrained(ctx, clusterClient, node interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "IsNodeDrained", reflect.TypeOf((*MockRemediationManagerInterface)(nil).IsNodeDrained), ctx, clusterClient, node) +} + // IsPowerOffRequested mocks base method. func (m *MockRemediationManagerInterface) IsPowerOffRequested(ctx context.Context) (bool, error) { m.ctrl.T.Helper() @@ -302,6 +344,20 @@ func (mr *MockRemediationManagerInterfaceMockRecorder) RemoveNodeBackupAnnotatio return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RemoveNodeBackupAnnotations", reflect.TypeOf((*MockRemediationManagerInterface)(nil).RemoveNodeBackupAnnotations)) } +// RemoveOutOfServiceTaint mocks base method. +func (m *MockRemediationManagerInterface) RemoveOutOfServiceTaint(ctx context.Context, clusterClient v11.CoreV1Interface, node *v1.Node) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "RemoveOutOfServiceTaint", ctx, clusterClient, node) + ret0, _ := ret[0].(error) + return ret0 +} + +// RemoveOutOfServiceTaint indicates an expected call of RemoveOutOfServiceTaint. +func (mr *MockRemediationManagerInterfaceMockRecorder) RemoveOutOfServiceTaint(ctx, clusterClient, node interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RemoveOutOfServiceTaint", reflect.TypeOf((*MockRemediationManagerInterface)(nil).RemoveOutOfServiceTaint), ctx, clusterClient, node) +} + // RemovePowerOffAnnotation mocks base method. func (m *MockRemediationManagerInterface) RemovePowerOffAnnotation(ctx context.Context) error { m.ctrl.T.Helper() diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 3d6463f6eb..2fe0ac36a6 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -118,6 +118,12 @@ rules: - patch - update - watch +- apiGroups: + - "" + resources: + - pods + verbs: + - list - apiGroups: - infrastructure.cluster.x-k8s.io resources: @@ -336,3 +342,10 @@ rules: - get - patch - update +- apiGroups: + - storage.k8s.io + resources: + - volumeattachments + verbs: + - list + - watch diff --git a/controllers/metal3remediation_controller.go b/controllers/metal3remediation_controller.go index 7e5699bbc9..60a403502c 100644 --- a/controllers/metal3remediation_controller.go +++ b/controllers/metal3remediation_controller.go @@ -39,10 +39,13 @@ import ( // Metal3RemediationReconciler reconciles a Metal3Remediation object. type Metal3RemediationReconciler struct { client.Client - ManagerFactory baremetal.ManagerFactoryInterface - Log logr.Logger + ManagerFactory baremetal.ManagerFactoryInterface + Log logr.Logger + IsOutOfServiceTaintEnabled bool } +// +kubebuilder:rbac:groups=core,resources=pods,verbs=list +// +kubebuilder:rbac:groups=storage.k8s.io,resources=volumeattachments,verbs=list;watch // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=metal3remediations,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=metal3remediations/status,verbs=get;update;patch // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines;machines/status,verbs=get;list;watch;create;update;patch;delete @@ -206,15 +209,25 @@ func (r *Metal3RemediationReconciler) reconcileNormal(ctx context.Context, // Restore node if available and not done yet if remediationMgr.HasFinalizer() { if node != nil { - // Node was recreated, restore annotations and labels - r.Log.Info("Restoring the node") - if err := r.restoreNode(ctx, remediationMgr, clusterClient, node); err != nil { - return ctrl.Result{}, err + if r.IsOutOfServiceTaintEnabled { + if remediationMgr.HasOutOfServiceTaint(node) { + if err := remediationMgr.RemoveOutOfServiceTaint(ctx, clusterClient, node); err != nil { + return ctrl.Result{}, errors.Wrapf(err, "error removing out-of-service taint from node %s", node.Name) + } + } + } else { + // Node was recreated, restore annotations and labels + r.Log.Info("Restoring the node") + if err := r.restoreNode(ctx, remediationMgr, clusterClient, node); err != nil { + return ctrl.Result{}, err + } } // clean up r.Log.Info("Remediation done, cleaning up remediation CR") - remediationMgr.RemoveNodeBackupAnnotations() + if !r.IsOutOfServiceTaintEnabled { + remediationMgr.RemoveNodeBackupAnnotations() + } remediationMgr.UnsetFinalizer() return ctrl.Result{RequeueAfter: 5 * time.Second}, nil } else if isNodeForbidden { @@ -319,29 +332,43 @@ func (r *Metal3RemediationReconciler) remediateRebootStrategy(ctx context.Contex return ctrl.Result{RequeueAfter: 5 * time.Second}, nil } - // if we have a node, store annotations and labels, and delete it if node != nil { - /* - Delete the node only after the host is powered off. Otherwise, if we would delete the node - when the host is powered on, the scheduler would assign the workload to other nodes, with the - possibility that two instances of the same application are running in parallel. This might result - in corruption or other issues for applications with singleton requirement. After the host is powered - off we know for sure that it is safe to re-assign that workload to other nodes. - */ - modified := r.backupNode(remediationMgr, node) - if modified { - r.Log.Info("Backing up node") - // save annotations before deleting node - return ctrl.Result{RequeueAfter: 1 * time.Second}, nil - } - r.Log.Info("Deleting node") - err := remediationMgr.DeleteNode(ctx, clusterClient, node) - if err != nil { - r.Log.Error(err, "error deleting node") - return ctrl.Result{}, errors.Wrap(err, "error deleting node") + if r.IsOutOfServiceTaintEnabled { + if !remediationMgr.HasOutOfServiceTaint(node) { + if err := remediationMgr.AddOutOfServiceTaint(ctx, clusterClient, node); err != nil { + return ctrl.Result{}, err + } + // If we immediately check if the node is drained, we might find no pods with + // Deletion timestamp set yet and assume the node is drained. + return ctrl.Result{RequeueAfter: 1 * time.Second}, nil + } + + if !remediationMgr.IsNodeDrained(ctx, clusterClient, node) { + return ctrl.Result{RequeueAfter: 5 * time.Second}, nil + } + } else { + /* + Delete the node only after the host is powered off. Otherwise, if we would delete the node + when the host is powered on, the scheduler would assign the workload to other nodes, with the + possibility that two instances of the same application are running in parallel. This might result + in corruption or other issues for applications with singleton requirement. After the host is powered + off we know for sure that it is safe to re-assign that workload to other nodes. + */ + modified := r.backupNode(remediationMgr, node) + if modified { + r.Log.Info("Backing up node") + // save annotations before deleting node + return ctrl.Result{RequeueAfter: 1 * time.Second}, nil + } + r.Log.Info("Deleting node") + err := remediationMgr.DeleteNode(ctx, clusterClient, node) + if err != nil { + r.Log.Error(err, "error deleting node") + return ctrl.Result{}, errors.Wrap(err, "error deleting node") + } + // wait until node is gone + return ctrl.Result{RequeueAfter: 5 * time.Second}, nil } - // wait until node is gone - return ctrl.Result{RequeueAfter: 5 * time.Second}, nil } // we are done for this phase, switch to waiting for power on and the node restore diff --git a/controllers/metal3remediation_controller_test.go b/controllers/metal3remediation_controller_test.go index 903777adaa..b5289d5cfa 100644 --- a/controllers/metal3remediation_controller_test.go +++ b/controllers/metal3remediation_controller_test.go @@ -46,20 +46,23 @@ var ( ) type reconcileNormalRemediationTestCase struct { - ExpectError bool - ExpectRequeue bool - GetUnhealthyHostFails bool - GetRemediationTypeFails bool - HostStatusOffline bool - RemediationPhase string - IsFinalizerSet bool - IsPowerOffRequested bool - IsPoweredOn bool - IsNodeForbidden bool - IsNodeBackedUp bool - IsNodeDeleted bool - IsTimedOut bool - IsRetryLimitReached bool + ExpectError bool + ExpectRequeue bool + GetUnhealthyHostFails bool + GetRemediationTypeFails bool + HostStatusOffline bool + RemediationPhase string + IsFinalizerSet bool + IsPowerOffRequested bool + IsPoweredOn bool + IsNodeForbidden bool + IsNodeBackedUp bool + IsNodeDeleted bool + IsTimedOut bool + IsRetryLimitReached bool + IsOutOfServiceTaintSupported bool + IsOutOfServiceTaintAdded bool + IsNodeDrained bool } type reconcileRemediationTestCase struct { @@ -152,8 +155,19 @@ func setReconcileNormalRemediationExpectations(ctrl *gomock.Controller, if tc.IsPoweredOn { return m } + if tc.IsOutOfServiceTaintSupported { + if !tc.IsOutOfServiceTaintAdded { + m.EXPECT().HasOutOfServiceTaint(gomock.Any()).Return(false) + m.EXPECT().AddOutOfServiceTaint(context.TODO(), gomock.Any(), gomock.Any()).Return(nil) + return m + } - if !tc.IsNodeForbidden && !tc.IsNodeDeleted { + m.EXPECT().HasOutOfServiceTaint(gomock.Any()).Return(true) + m.EXPECT().IsNodeDrained(context.TODO(), gomock.Any(), gomock.Any()).Return(tc.IsNodeDrained) + if !tc.IsNodeDrained { + return m + } + } else if !tc.IsNodeForbidden && !tc.IsNodeDeleted { m.EXPECT().SetNodeBackupAnnotations("{\"foo\":\"bar\"}", "{\"answer\":\"42\"}").Return(!tc.IsNodeBackedUp) if !tc.IsNodeBackedUp { return m @@ -181,16 +195,25 @@ func setReconcileNormalRemediationExpectations(ctrl *gomock.Controller, m.EXPECT().HasFinalizer().Return(tc.IsFinalizerSet) if tc.IsFinalizerSet { - if !tc.IsNodeDeleted { - m.EXPECT().GetNodeBackupAnnotations().Return("{\"foo\":\"bar\"}", "{\"answer\":\"42\"}") - m.EXPECT().UpdateNode(context.TODO(), gomock.Any(), gomock.Any()) - m.EXPECT().RemoveNodeBackupAnnotations() - m.EXPECT().UnsetFinalizer() - return m - } - if tc.IsNodeForbidden { - m.EXPECT().UnsetFinalizer() - return m + if tc.IsOutOfServiceTaintSupported { + if tc.IsOutOfServiceTaintAdded { + m.EXPECT().HasOutOfServiceTaint(gomock.Any()).Return(true) + m.EXPECT().RemoveOutOfServiceTaint(context.TODO(), gomock.Any(), gomock.Any()).Return(nil) + m.EXPECT().UnsetFinalizer() + return m + } + } else { + if !tc.IsNodeDeleted { + m.EXPECT().GetNodeBackupAnnotations().Return("{\"foo\":\"bar\"}", "{\"answer\":\"42\"}") + m.EXPECT().UpdateNode(context.TODO(), gomock.Any(), gomock.Any()) + m.EXPECT().RemoveNodeBackupAnnotations() + m.EXPECT().UnsetFinalizer() + return m + } + if tc.IsNodeForbidden { + m.EXPECT().UnsetFinalizer() + return m + } } } @@ -330,9 +353,10 @@ var _ = Describe("Metal3Remediation controller", func() { DescribeTable("ReconcileNormal tests", func(tc reconcileNormalRemediationTestCase) { fakeClient := fake.NewClientBuilder().WithScheme(setupScheme()).Build() testReconciler = &Metal3RemediationReconciler{ - Client: fakeClient, - ManagerFactory: baremetal.NewManagerFactory(fakeClient), - Log: logr.Discard(), + Client: fakeClient, + ManagerFactory: baremetal.NewManagerFactory(fakeClient), + Log: logr.Discard(), + IsOutOfServiceTaintEnabled: tc.IsOutOfServiceTaintSupported, } m := setReconcileNormalRemediationExpectations(goMockCtrl, tc) res, err := testReconciler.reconcileNormal(context.TODO(), m) @@ -418,6 +442,49 @@ var _ = Describe("Metal3Remediation controller", func() { IsNodeDeleted: false, IsTimedOut: false, }), + Entry("[OOST] Should set out-of-service taint, and then requeue", reconcileNormalRemediationTestCase{ + ExpectError: false, + ExpectRequeue: true, + RemediationPhase: infrav1.PhaseRunning, + IsFinalizerSet: true, + IsPowerOffRequested: true, + IsPoweredOn: false, + IsOutOfServiceTaintSupported: true, + IsOutOfServiceTaintAdded: false, + }), + Entry("[OOST] Should requeue if Node is not drained yet", reconcileNormalRemediationTestCase{ + ExpectError: false, + ExpectRequeue: true, + RemediationPhase: infrav1.PhaseRunning, + IsFinalizerSet: true, + IsPowerOffRequested: true, + IsPoweredOn: false, + IsOutOfServiceTaintSupported: true, + IsOutOfServiceTaintAdded: true, + IsNodeDrained: false, + }), + Entry("[OOST] Should update phase when node is drained", reconcileNormalRemediationTestCase{ + ExpectError: false, + ExpectRequeue: true, + RemediationPhase: infrav1.PhaseRunning, + IsFinalizerSet: true, + IsPowerOffRequested: true, + IsPoweredOn: false, + IsOutOfServiceTaintSupported: true, + IsOutOfServiceTaintAdded: true, + IsNodeDrained: true, + }), + Entry("[OOST] Should remove out-of-service taint and requeue", reconcileNormalRemediationTestCase{ + ExpectError: false, + ExpectRequeue: true, + RemediationPhase: infrav1.PhaseWaiting, + IsFinalizerSet: true, + IsPowerOffRequested: false, + IsPoweredOn: true, + IsOutOfServiceTaintSupported: true, + IsOutOfServiceTaintAdded: true, + IsNodeDrained: true, + }), Entry("Should delete node when backed up, and then requeue", reconcileNormalRemediationTestCase{ ExpectError: false, ExpectRequeue: true, diff --git a/main.go b/main.go index 0068d62078..c2f377a653 100644 --- a/main.go +++ b/main.go @@ -23,6 +23,7 @@ import ( "fmt" "math/rand" "os" + "strconv" "strings" "time" @@ -39,6 +40,7 @@ import ( "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/selection" "k8s.io/client-go/discovery" + "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/scheme" _ "k8s.io/client-go/plugin/pkg/client/auth/gcp" "k8s.io/client-go/rest" @@ -62,6 +64,9 @@ import ( const ( TLSVersion12 = "TLS12" TLSVersion13 = "TLS13" + // out-of-service taint strategy (GA from 1.28). + minK8sMajorVersionOutOfServiceTaint = 1 + minK8sMinorVersionGAOutOfServiceTaint = 28 ) type TLSOptions struct { @@ -443,10 +448,15 @@ func setupReconcilers(ctx context.Context, mgr ctrl.Manager) { os.Exit(1) } + isOOSTSupported, err := isOutOfServiceTaintSupported(mgr.GetConfig()) + if err != nil { + setupLog.Error(err, "unable to detect support for Out-of-service taint") + } if err := (&controllers.Metal3RemediationReconciler{ - Client: mgr.GetClient(), - ManagerFactory: baremetal.NewManagerFactory(mgr.GetClient()), - Log: ctrl.Log.WithName("controllers").WithName("Metal3Remediation"), + Client: mgr.GetClient(), + ManagerFactory: baremetal.NewManagerFactory(mgr.GetClient()), + Log: ctrl.Log.WithName("controllers").WithName("Metal3Remediation"), + IsOutOfServiceTaintEnabled: isOOSTSupported, }).SetupWithManager(ctx, mgr, concurrency(metal3RemediationConcurrency)); err != nil { setupLog.Error(err, "unable to create controller", "controller", "Metal3Remediation") os.Exit(1) @@ -565,3 +575,41 @@ func GetTLSOptionOverrideFuncs(options TLSOptions) ([]func(*tls.Config), error) return tlsOptions, nil } + +func isOutOfServiceTaintSupported(config *rest.Config) (bool, error) { + cs, err := kubernetes.NewForConfig(config) + if err != nil || cs == nil { + if cs == nil { + err = fmt.Errorf("k8s client set is nil") + } + setupLog.Error(err, "unable to get k8s client") + return false, err + } + + k8sVersion, err := cs.Discovery().ServerVersion() + if err != nil || k8sVersion == nil { + if k8sVersion == nil { + err = fmt.Errorf("k8s server version is nil") + } + setupLog.Error(err, "unable to get k8s server version") + return false, err + } + + major, err := strconv.Atoi(k8sVersion.Major) + if err != nil { + setupLog.Error(err, "could not parse k8s server major version", "major version", k8sVersion.Major) + return false, err + } + minor, err := strconv.Atoi(k8sVersion.Minor) + if err != nil { + setupLog.Error(err, "could not convert k8s server minor version", "minor version", k8sVersion.Minor) + return false, err + } + + isSupported := major > minK8sMajorVersionOutOfServiceTaint || + (major == minK8sMajorVersionOutOfServiceTaint && + minor >= minK8sMinorVersionGAOutOfServiceTaint) + + setupLog.Info("out-of-service taint", "supported", isSupported) + return isSupported, nil +} diff --git a/test/e2e/common.go b/test/e2e/common.go index 6553d598c6..ed9514ad4c 100644 --- a/test/e2e/common.go +++ b/test/e2e/common.go @@ -58,6 +58,9 @@ const ( osTypeCentos = "centos" osTypeUbuntu = "ubuntu" ironicSuffix = "-ironic" + // Out-of-service Taint test actions. + oostAdded = "added" + oostRemoved = "removed" ) var releaseMarkerPrefix = "go://github.com/metal3-io/cluster-api-provider-metal3@v%s" diff --git a/test/e2e/node_deletion_remediation.go b/test/e2e/node_deletion_remediation.go index 3f0ba19f8f..091654f148 100644 --- a/test/e2e/node_deletion_remediation.go +++ b/test/e2e/node_deletion_remediation.go @@ -8,6 +8,7 @@ import ( infrav1 "github.com/metal3-io/cluster-api-provider-metal3/api/v1beta1" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + "golang.org/x/mod/semver" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -17,7 +18,9 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" ) -type NodeDeletionRemediation struct { +const minK8sVersionOutOfServiceTaint = "1.28" + +type NodeRemediation struct { E2EConfig *clusterctl.E2EConfig BootstrapClusterProxy framework.ClusterProxy TargetCluster framework.ClusterProxy @@ -27,22 +30,30 @@ type NodeDeletionRemediation struct { } /* - * Node Deletion Remediation Test + * Node Remediation Test * - * This test evaluates node deletion in reboot remediation feature added to CAPM3 Remediation Controller. + * This test evaluates node remediation (via deletion or use of out-of-service taint) in reboot remediation feature added to CAPM3 Remediation Controller. * issue #392: Reboot remediation is incomplete * PR #668: Fix reboot remediation by adding node deletion - * This test evaluates the reboot remediation strategy with an enhancement of node deletion in the CAPM3 (Cluster API Provider for Metal3) Remediation Controller. + * This test evaluates the reboot remediation strategy with an enhancement in the CAPM3 (Cluster API Provider for Metal3) Remediation Controller + * consisting in: + * - node deletion (kubernetes server version < 1.28) + * - out-of-service taint on node (kubernetes server version >= 1.28) * * Tested Feature: - * - Delete Node in Reboot Remediation + * - Manage Node in Reboot Remediation (deletion or out-of-service taint) * * Workflow: * 1. Retrieve the Metal3Machines associated with the worker nodes in the cluster. * 2. Identify the target worker machine node its associated BMH object corresponding to the Metal3Machine. * 3. Create a Metal3Remediation resource, specifying the remediation strategy as "Reboot" with a retry limit and timeout. * 4. Wait for the VM (Virtual Machine) associated with target BMH to power off. - * 5. Wait for the target worker node to be deleted from the cluster. + * 5. Identify the kubernetes service version: + * 5a. if version < 1.28: + * - Wait for the target worker node to be deleted from the cluster. + * 5b. if version < 1.28: + * - Wait for the out-of-service taint to be set on target worker node. + * - Wait for the out-of-service taint to be removed from target worker node. * 6. Wait for the VMs to power on. * 7. Verify that the target worker node becomes ready. * 8. Verify that the Metal3Remediation resource is successfully delete @@ -52,8 +63,8 @@ type NodeDeletionRemediation struct { * resiliency of the cluster by allowing workloads to be seamlessly migrated from unhealthy nodes to healthy node */ -func nodeDeletionRemediation(ctx context.Context, inputGetter func() NodeDeletionRemediation) { - Logf("Starting node deletion remediation tests") +func nodeRemediation(ctx context.Context, inputGetter func() NodeRemediation) { + Logf("Starting node remediation tests") input := inputGetter() bootstrapClient := input.BootstrapClusterProxy.GetClient() targetClient := input.TargetCluster.GetClient() @@ -106,9 +117,18 @@ func nodeDeletionRemediation(ctx context.Context, inputGetter func() NodeDeletio By("Waiting for VM power off") waitForVmsState([]string{vmName}, shutoff, input.SpecName, input.E2EConfig.GetIntervals(input.SpecName, "wait-vm-state")...) - By("Waiting for node deletion") - interval := input.E2EConfig.GetIntervals(input.SpecName, "wait-vm-state") - waitForNodeDeletion(ctx, targetClient, workerNodeName, interval...) + k8sVersion := input.E2EConfig.GetVariable("KUBERNETES_VERSION") + if isOutOfServiceTaintSupported(k8sVersion) { + Byf("Waiting for Out of service taint on node to be added (kubernetes version %s)", k8sVersion) + interval := input.E2EConfig.GetIntervals(input.SpecName, "wait-vm-state") + waitForOutOfServiceTaint(ctx, targetClient, workerNodeName, oostAdded, interval...) + Byf("Waiting for Out of service taint on node to be removed (kubernetes version %s)", k8sVersion) + waitForOutOfServiceTaint(ctx, targetClient, workerNodeName, oostRemoved, interval...) + } else { + By("Waiting for node deletion") + interval := input.E2EConfig.GetIntervals(input.SpecName, "wait-vm-state") + waitForNodeDeletion(ctx, targetClient, workerNodeName, interval...) + } By("Waiting for VM power on") waitForVmsState([]string{vmName}, running, input.SpecName, input.E2EConfig.GetIntervals(input.SpecName, "wait-vm-state")...) @@ -125,7 +145,7 @@ func nodeDeletionRemediation(ctx context.Context, inputGetter func() NodeDeletio return apierrors.IsNotFound(err) }, 2*time.Minute, 10*time.Second).Should(BeTrue(), "Metal3Remediation should have been deleted") - By("NODE DELETION TESTS PASSED!") + By("NODE REMEDIATION TESTS PASSED!") } func waitForNodeDeletion(ctx context.Context, cl client.Client, name string, intervals ...interface{}) { @@ -137,3 +157,30 @@ func waitForNodeDeletion(ctx context.Context, cl client.Client, name string, int return apierrors.IsNotFound(err) }, intervals...).Should(BeTrue()) } + +func waitForOutOfServiceTaint(ctx context.Context, cl client.Client, name, action string, intervals ...interface{}) { + Byf("Waiting for Out of service taint to Node '%s' to be %s", name, action) + var oostExpectedToExist bool + if action == oostAdded { + oostExpectedToExist = true + } + Eventually( + func() bool { + node := &corev1.Node{} + err := cl.Get(ctx, client.ObjectKey{Name: name}, node) + Expect(err).ToNot(HaveOccurred()) + for _, t := range node.Spec.Taints { + if t.Key == "node.kubernetes.io/out-of-service" && + t.Value == "nodeshutdown" && + t.Effect == corev1.TaintEffectNoExecute { + return oostExpectedToExist + } + } + return !oostExpectedToExist + }, intervals...).Should(BeTrue()) +} + +func isOutOfServiceTaintSupported(k8sVersion string) bool { + Byf("comparing current version %s with supported version %s", k8sVersion, minK8sVersionOutOfServiceTaint) + return semver.Compare(k8sVersion, minK8sVersionOutOfServiceTaint) >= 0 +} diff --git a/test/e2e/remediation_based_feature_test.go b/test/e2e/remediation_based_feature_test.go index 774bbf95a4..c53c511361 100644 --- a/test/e2e/remediation_based_feature_test.go +++ b/test/e2e/remediation_based_feature_test.go @@ -17,15 +17,19 @@ import ( * These tests involve simulating failure scenarios, triggering the remediation process, and then verifying that the remediation actions successfully restore the nodes to the desired state. * * Test Types: - * 1. Metal3Remediation Test: This test specifically evaluates the Metal3 Remediation Controller's node deletion feature in the reboot remediation strategy. + * 1. Metal3Remediation Test: This test specifically evaluates the Metal3 Remediation Controller's node management feature in the reboot remediation strategy. * 2. Remediation Test: This test focuses on verifying various annotations and actions related to remediation in the CAPM3 (Cluster API Provider for Metal3). * - * NodeDeletionRemediation Test: + * NodeRemediation Test: * - Retrieve the list of Metal3 machines associated with the worker nodes. * - Identify the target worker Metal3Machine and its corresponding BareMetalHost (BMH) object. * - Create a Metal3Remediation resource with a remediation strategy of type "Reboot" and a specified timeout. * - Wait for the associated virtual machine (VM) to power off. - * - Wait for the node (VM) to be deleted. + * - If kubernetes server version < 1.28: + * - Wait for the node (VM) to be deleted. + * - If kubernetes server version >= 1.28: + * - Wait for the out-of-service taint to be set on the node. + * - Wait for the out-of-service taint to be removed from the node. * - Wait for the VM to power on. * - Wait for the node to be in a ready state. * - Delete the Metal3Remediation resource. @@ -71,9 +75,9 @@ var _ = Describe("Testing nodes remediation [remediation] [features]", Label("re targetCluster, _ = createTargetCluster(e2eConfig.GetVariable("KUBERNETES_VERSION")) // Run Metal3Remediation test first, doesn't work after remediation... - By("Running node deletion remediation tests") - nodeDeletionRemediation(ctx, func() NodeDeletionRemediation { - return NodeDeletionRemediation{ + By("Running node remediation tests") + nodeRemediation(ctx, func() NodeRemediation { + return NodeRemediation{ E2EConfig: e2eConfig, BootstrapClusterProxy: bootstrapClusterProxy, TargetCluster: targetCluster, diff --git a/test/go.mod b/test/go.mod index d1ee12cf12..940939b954 100644 --- a/test/go.mod +++ b/test/go.mod @@ -13,6 +13,7 @@ require ( github.com/onsi/gomega v1.33.1 github.com/pkg/errors v0.9.1 golang.org/x/crypto v0.24.0 + golang.org/x/mod v0.18.0 gopkg.in/yaml.v3 v3.0.1 k8s.io/api v0.30.3 k8s.io/apiextensions-apiserver v0.30.3 @@ -129,7 +130,6 @@ require ( go.starlark.net v0.0.0-20230525235612-a134d8f9ddca // indirect go.uber.org/multierr v1.11.0 // indirect golang.org/x/exp v0.0.0-20230905200255-921286631fa9 // indirect - golang.org/x/mod v0.17.0 // indirect golang.org/x/net v0.26.0 // indirect golang.org/x/oauth2 v0.21.0 // indirect golang.org/x/sync v0.7.0 // indirect diff --git a/test/go.sum b/test/go.sum index bc81e01396..f5b8016ec5 100644 --- a/test/go.sum +++ b/test/go.sum @@ -348,8 +348,8 @@ golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHl golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= -golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA= -golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/mod v0.18.0 h1:5+9lSbEzPSdWkH32vYPBwEpX8KwDbM52Ud9xBUvNlb0= +golang.org/x/mod v0.18.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=