Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: fix stagedUpdateRun PolicyObservedClusterCount #1014

Merged
merged 1 commit into from
Jan 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion pkg/controllers/updaterun/execution_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,8 @@ var _ = Describe("UpdateRun execution tests", func() {
waitEndTime := meta.FindStatusCondition(updateRun.Status.StagesStatus[0].AfterStageTaskStatus[0].Conditions, string(placementv1beta1.AfterStageTaskConditionWaitTimeElapsed)).LastTransitionTime.Time
// In this test, I set wait time to be 4 seconds, while stageClusterUpdatingWaitTime is 3 seconds.
// So it needs 2 rounds of reconcile to wait for the waitTime to elapse, waitEndTime - waitStartTime should be around 6 seconds.
Expect(waitStartTime.Add(updateStrategy.Spec.Stages[0].AfterStageTasks[0].WaitTime.Duration).Before(waitEndTime)).Should(BeTrue())
Expect(waitStartTime.Add(updateStrategy.Spec.Stages[0].AfterStageTasks[0].WaitTime.Duration).Before(waitEndTime)).Should(BeTrue(),
fmt.Sprintf("waitEndTime %v did not pass waitStartTime %v long enough, want at least %v", waitEndTime, waitStartTime, updateStrategy.Spec.Stages[0].AfterStageTasks[0].WaitTime.Duration))
})

It("Should mark the 1st cluster in the 2nd stage as succeeded after marking the binding available", func() {
Expand Down
10 changes: 10 additions & 0 deletions pkg/controllers/updaterun/initialization.go
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,16 @@ func (r *Reconciler) collectScheduledClusters(
// no more retries here.
return nil, nil, fmt.Errorf("%w: %s", errInitializedFailed, nobindingErr.Error())
}

if updateRun.Status.PolicyObservedClusterCount == -1 {
// For pickAll policy, the observed cluster count is not included in the policy snapshot. We set it to the number of selected bindings.
updateRun.Status.PolicyObservedClusterCount = len(selectedBindings)
} else if updateRun.Status.PolicyObservedClusterCount != len(selectedBindings) {
countErr := controller.NewUnexpectedBehaviorError(fmt.Errorf("the number of selected bindings %d is not equal to the observed cluster count %d", len(selectedBindings), updateRun.Status.PolicyObservedClusterCount))
klog.ErrorS(countErr, "Failed to collect clusterResourceBindings", "clusterResourcePlacement", placementName, "latestPolicySnapshot", latestPolicySnapshot.Name, "clusterStagedUpdateRun", updateRunRef)
// no more retries here.
return nil, nil, fmt.Errorf("%w: %s", errInitializedFailed, countErr.Error())
}
return selectedBindings, toBeDeletedBindings, nil
}

Expand Down
48 changes: 48 additions & 0 deletions pkg/controllers/updaterun/initialization_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,17 @@ var _ = Describe("Updaterun initialization tests", func() {
})

It("Should not report error if there are only to-be-deleted clusters", func() {
By("Updating the policy snapshot to use pickAll policy to avoid binding count check")
policySnapshot.Spec.Policy.PlacementType = placementv1beta1.PickAllPlacementType
Expect(k8sClient.Update(ctx, policySnapshot)).To(Succeed())
meta.SetStatusCondition(&policySnapshot.Status.Conditions, metav1.Condition{
Type: string(placementv1beta1.PolicySnapshotScheduled),
Status: metav1.ConditionTrue,
ObservedGeneration: policySnapshot.Generation,
Reason: "scheduled",
})
Expect(k8sClient.Status().Update(ctx, policySnapshot)).Should(Succeed())

By("Creating a to-be-deleted clusterResourceBinding")
binding := generateTestClusterResourceBinding(policySnapshot.Name+"a", "cluster-0", placementv1beta1.BindingStateUnscheduled)
Expect(k8sClient.Create(ctx, binding)).To(Succeed())
Expand All @@ -391,6 +402,43 @@ var _ = Describe("Updaterun initialization tests", func() {
validateFailedInitCondition(ctx, updateRun, "referenced clusterStagedUpdateStrategy not found")
})

It("Should fail to initialize if the number of selected bindings does not match the observed cluster count", func() {
By("Creating only one scheduled cluterResourceBinding")
Expect(k8sClient.Create(ctx, resourceBindings[0])).To(Succeed())

By("Creating a new clusterStagedUpdateRun")
Expect(k8sClient.Create(ctx, updateRun)).To(Succeed())

By("Validating the initialization failed")
validateFailedInitCondition(ctx, updateRun, "the number of selected bindings 1 is not equal to the observed cluster count 10")
})

It("Should update the ObservedClusterCount to the number of scheduled bindings if it's pickAll policy", func() {
By("Updating the policy snapshot to use pickAll policy")
policySnapshot.Spec.Policy.PlacementType = placementv1beta1.PickAllPlacementType
Expect(k8sClient.Update(ctx, policySnapshot)).To(Succeed())
meta.SetStatusCondition(&policySnapshot.Status.Conditions, metav1.Condition{
Type: string(placementv1beta1.PolicySnapshotScheduled),
Status: metav1.ConditionTrue,
ObservedGeneration: policySnapshot.Generation,
Reason: "scheduled",
})
Expect(k8sClient.Status().Update(ctx, policySnapshot)).Should(Succeed())

By("Creating only one scheduled cluterResourceBinding")
Expect(k8sClient.Create(ctx, resourceBindings[0])).To(Succeed())

By("Creating a new clusterStagedUpdateRun")
Expect(k8sClient.Create(ctx, updateRun)).To(Succeed())

By("Validating the initialization not failed due to no selected cluster")
// it should fail due to strategy not found
validateFailedInitCondition(ctx, updateRun, "referenced clusterStagedUpdateStrategy not found")

By("Validating the ObservedClusterCount is updated")
Expect(updateRun.Status.PolicyObservedClusterCount).To(Equal(1), "failed to update the updateRun PolicyObservedClusterCount status")
})

It("Should fail to initialize if the bindings with latest policy snapshots are not in Scheduled or Bound state", func() {
By("Creating a not scheduled clusterResourceBinding")
binding := generateTestClusterResourceBinding(policySnapshot.Name, "cluster-1", placementv1beta1.BindingStateUnscheduled)
Expand Down
7 changes: 4 additions & 3 deletions pkg/controllers/updaterun/validation_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -333,12 +333,13 @@ var _ = Describe("UpdateRun validation tests", func() {
})

It("Should fail to validate if the number of clusters has changed in a stage", func() {
By("Deleting a cluster resource binding")
Expect(k8sClient.Delete(ctx, resourceBindings[0])).Should(Succeed())
By("Changing 1st cluster's so that it's selected by the 1st stage")
targetClusters[0].Labels["region"] = regionEastus
Expect(k8sClient.Update(ctx, targetClusters[0])).Should(Succeed())

By("Validating the validation failed")
wantStatus = generateFailedValidationStatus(updateRun, wantStatus)
validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "the number of clusters in index `1` stage has changed")
validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "the number of clusters in index `0` stage has changed")
})

It("Should fail to validate if the cluster name has changed in a stage", func() {
Expand Down
Loading