diff --git a/common/api/v1alpha1/instance.go b/common/api/v1alpha1/instance.go index ace394b..4c5da95 100644 --- a/common/api/v1alpha1/instance.go +++ b/common/api/v1alpha1/instance.go @@ -149,6 +149,10 @@ type InstanceSpec struct { // AdminUser represents the admin user specification // +optional AdminUser *AdminUserSpec `json:"adminUser,omitempty"` + + // IsStopped is true if an instance is stopped, false otherwise + // +optional + IsStopped *bool `json:"isStopped,omitempty"` } // DBLoadBalancerOptions contains customization options for the Kubernetes diff --git a/common/api/v1alpha1/zz_generated.deepcopy.go b/common/api/v1alpha1/zz_generated.deepcopy.go index 975c587..1573937 100644 --- a/common/api/v1alpha1/zz_generated.deepcopy.go +++ b/common/api/v1alpha1/zz_generated.deepcopy.go @@ -540,6 +540,11 @@ func (in *InstanceSpec) DeepCopyInto(out *InstanceSpec) { *out = new(AdminUserSpec) (*in).DeepCopyInto(*out) } + if in.IsStopped != nil { + in, out := &in.IsStopped, &out.IsStopped + *out = new(bool) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InstanceSpec. diff --git a/oracle/config/crd/bases/oracle.db.anthosapis.com_instances.yaml b/oracle/config/crd/bases/oracle.db.anthosapis.com_instances.yaml index 1f93502..7adeace 100644 --- a/oracle/config/crd/bases/oracle.db.anthosapis.com_instances.yaml +++ b/oracle/config/crd/bases/oracle.db.anthosapis.com_instances.yaml @@ -321,6 +321,9 @@ spec: an optional map that allows a customer to specify GCR images different from those chosen/provided. type: object + isStopped: + description: IsStopped is true if an instance is stopped, false otherwise + type: boolean maintenanceWindow: description: MaintenanceWindow specifies the time windows during which database downtimes are allowed for maintenance. diff --git a/oracle/config/rbac/role.yaml b/oracle/config/rbac/role.yaml index 50bcfdc..9afeeba 100644 --- a/oracle/config/rbac/role.yaml +++ b/oracle/config/rbac/role.yaml @@ -144,6 +144,7 @@ rules: - services verbs: - create + - delete - get - list - patch diff --git a/oracle/controllers/instancecontroller/BUILD.bazel b/oracle/controllers/instancecontroller/BUILD.bazel index 0eba45c..f59fdc4 100644 --- a/oracle/controllers/instancecontroller/BUILD.bazel +++ b/oracle/controllers/instancecontroller/BUILD.bazel @@ -49,6 +49,7 @@ go_library( "@io_k8s_sigs_controller_runtime//pkg/predicate", "@io_k8s_sigs_controller_runtime//pkg/reconcile", "@io_k8s_sigs_controller_runtime//pkg/source", + "@io_k8s_utils//pointer", "@org_golang_google_protobuf//types/known/timestamppb", ], ) diff --git a/oracle/controllers/instancecontroller/instance_controller.go b/oracle/controllers/instancecontroller/instance_controller.go index fedd4d1..c07b760 100644 --- a/oracle/controllers/instancecontroller/instance_controller.go +++ b/oracle/controllers/instancecontroller/instance_controller.go @@ -76,7 +76,7 @@ func (r *InstanceReconciler) Scheme() *runtime.Scheme { // +kubebuilder:rbac:groups=apps,resources=statefulsets/status,verbs=get;update;patch // +kubebuilder:rbac:groups="",resources=persistentvolumeclaims,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups="",resources=persistentvolumes,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=core,resources=services,verbs=list;watch;get;patch;create +// +kubebuilder:rbac:groups=core,resources=services,verbs=list;watch;get;patch;create;delete // +kubebuilder:rbac:groups="",resources=configmaps,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups="",resources=events,verbs=create;patch // +kubebuilder:rbac:groups=storage.k8s.io,resources=storageclasses,verbs=get;list;watch @@ -125,8 +125,14 @@ func (r *InstanceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ } }() - if !inst.DeletionTimestamp.IsZero() { + instanceReadyCond := k8s.FindCondition(inst.Status.Conditions, k8s.Ready) + + if IsDeleting(&inst) { return r.reconcileInstanceDeletion(ctx, req, log) + } else if IsStopped(&inst) && !k8s.ConditionReasonEquals(instanceReadyCond, k8s.InstanceStopped) { + return r.reconcileInstanceStop(ctx, req, log) + } else if !IsStopped(&inst) && k8s.ConditionReasonEquals(instanceReadyCond, k8s.InstanceStopped) { + k8s.InstanceUpsertCondition(&inst.Status, k8s.Ready, v1.ConditionFalse, k8s.CreateInProgress, "Restarting Instance") } // Add finalizer to clean up underlying objects in case of deletion. @@ -144,7 +150,7 @@ func (r *InstanceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ } log.Info("common instance", "total allocated disk space across all instance disks [Gi]", diskSpace/1024/1024/1024) - instanceReadyCond := k8s.FindCondition(inst.Status.Conditions, k8s.Ready) + instanceReadyCond = k8s.FindCondition(inst.Status.Conditions, k8s.Ready) dbInstanceCond := k8s.FindCondition(inst.Status.Conditions, k8s.DatabaseInstanceReady) if inst.Spec.Mode == commonv1alpha1.Pause { @@ -440,6 +446,64 @@ func (r *InstanceReconciler) reconcileInstanceDeletion(ctx context.Context, req return ctrl.Result{RequeueAfter: 10 * time.Second}, nil } +func (r *InstanceReconciler) reconcileInstanceStop(ctx context.Context, req ctrl.Request, log logr.Logger) (ctrl.Result, error) { + log.Info("Stopping Instance...", "InstanceName", req.NamespacedName.Name) + + var inst v1alpha1.Instance + if err := r.Get(ctx, req.NamespacedName, &inst); err != nil { + return ctrl.Result{}, client.IgnoreNotFound(err) + } + r.recordEventAndUpdateStatus(ctx, &inst, v1.ConditionFalse, k8s.InstanceStoppingInProgress, "Instance is being stopped", log) + if _, err := r.stopDBStatefulset(ctx, req, log); err != nil { + return ctrl.Result{}, err + } + //delete the monitoring pod + var monitor appsv1.Deployment + if err := r.Get(ctx, client.ObjectKey{Namespace: req.Namespace, Name: fmt.Sprintf("%s-monitor", req.Name)}, &monitor); err == nil { + if err := r.Delete(ctx, &monitor); err != nil { + log.Error(err, "failed to delete monitoring deployment", "InstanceName", req.Name, "MonitorDeployment", monitor.Name) + return ctrl.Result{}, err + } + } else if !apierrors.IsNotFound(err) { // retry on other errors. + return ctrl.Result{}, err + } + if _, err := r.deleteDBLoadBalancer(ctx, &inst, log); err != nil { + return ctrl.Result{}, err + } + if _, err := r.deleteDBDSVC(ctx, &inst, log); err != nil { + return ctrl.Result{}, err + } + if _, err := r.deleteAgentSVC(ctx, &inst, log); err != nil { + return ctrl.Result{}, err + } + + if err := r.Get(ctx, req.NamespacedName, &inst); err != nil { + return ctrl.Result{}, client.IgnoreNotFound(err) + } + r.recordEventAndUpdateStatus(ctx, &inst, v1.ConditionTrue, k8s.InstanceStopped, "Instance has been stopped", log) + + return ctrl.Result{Requeue: false}, nil + +} + +func (r *InstanceReconciler) shutdownDB(ctx context.Context, inst v1alpha1.Instance) error { + dbClient, closeConn, err := r.DatabaseClientFactory.New(ctx, r, inst.GetNamespace(), inst.Name) + if err != nil { + return err + } + defer closeConn() + + _, err = dbClient.BounceDatabase(ctx, &dbdpb.BounceDatabaseRequest{ + Operation: dbdpb.BounceDatabaseRequest_SHUTDOWN, + DatabaseName: inst.Spec.CDBName, + Option: "immediate", + }) + if err != nil { + return fmt.Errorf("BounceDatabase: error while shutting db: %v", err) + } + return nil +} + // reconcileDatabaseInstance reconciling the underlying database instance. // Successful state transition for seeded instance: // nil->BootstrapPending->BootstrapInProgress->CreateFailed/CreateComplete @@ -704,3 +768,10 @@ func lroOperationID(opType string, instance *v1alpha1.Instance) string { return fmt.Sprintf("%s_%s", opType, instance.GetUID()) } } +func IsStopped(instance *v1alpha1.Instance) bool { + return instance.InstanceSpec().IsStopped != nil && *instance.InstanceSpec().IsStopped == true +} + +func IsDeleting(instance *v1alpha1.Instance) bool { + return !instance.GetDeletionTimestamp().IsZero() +} diff --git a/oracle/controllers/instancecontroller/utils.go b/oracle/controllers/instancecontroller/utils.go index 7b34653..aa926e1 100644 --- a/oracle/controllers/instancecontroller/utils.go +++ b/oracle/controllers/instancecontroller/utils.go @@ -36,6 +36,7 @@ import ( storagev1 "k8s.io/api/storage/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/utils/pointer" "github.com/go-logr/logr" appsv1 "k8s.io/api/apps/v1" @@ -219,6 +220,59 @@ func (r *InstanceReconciler) reconcileMonitoring(ctx context.Context, inst *v1al return ctrl.Result{RequeueAfter: requeueDuration}, nil } +func (r *InstanceReconciler) stopDBStatefulset(ctx context.Context, req ctrl.Request, log logr.Logger) (ctrl.Result, error) { + var inst v1alpha1.Instance + if err := r.Get(ctx, req.NamespacedName, &inst); err != nil { + return ctrl.Result{}, client.IgnoreNotFound(err) + } + sts := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: getSTSName(inst), + Namespace: inst.Namespace, + }, + } + if err := r.Get(ctx, client.ObjectKeyFromObject(sts), sts); err != nil { + return ctrl.Result{}, fmt.Errorf("failed to get statefulset: %v", err) + } + + sts.Spec.Replicas = pointer.Int32(controllers.StoppedReplicaCnt) + baseSTS := &appsv1.StatefulSet{} + sts.DeepCopyInto(baseSTS) + if _, err := ctrl.CreateOrUpdate(ctx, r, baseSTS, func() error { + sts.Spec.DeepCopyInto(&baseSTS.Spec) + return nil + }); err != nil { + log.Error(err, "failed to update the StatefulSet", "sts.Status", sts.Status) + return ctrl.Result{}, err + } + return ctrl.Result{}, nil + +} + +func (r *InstanceReconciler) deleteAgentSVC(ctx context.Context, inst *v1alpha1.Instance, log logr.Logger) (ctrl.Result, error) { + if err := r.Delete(ctx, AgentSVC(*inst)); err != nil && !apierrors.IsNotFound(err) { + log.Error(err, "failed to delete agent svc", "InstanceName", inst.Name) + return ctrl.Result{}, err + } + return ctrl.Result{}, nil +} + +func (r *InstanceReconciler) deleteDBDSVC(ctx context.Context, inst *v1alpha1.Instance, log logr.Logger) (ctrl.Result, error) { + if err := r.Delete(ctx, DbDaemonSVC(*inst)); err != nil && !apierrors.IsNotFound(err) { + log.Error(err, "failed to delete dbdaemon svc", "InstanceName", inst.Name) + return ctrl.Result{}, err + } + return ctrl.Result{}, nil +} + +func (r *InstanceReconciler) deleteDBLoadBalancer(ctx context.Context, inst *v1alpha1.Instance, log logr.Logger) (ctrl.Result, error) { + if err := r.Delete(ctx, InstanceLB(*inst)); err != nil && !apierrors.IsNotFound(err) { + log.Error(err, "failed to delete load balancer", "InstanceName", inst.Name) + return ctrl.Result{}, err + } + return ctrl.Result{}, nil +} + // CreateDBLoadBalancer returns the service for the database. func (r *InstanceReconciler) createDBLoadBalancer(ctx context.Context, inst *v1alpha1.Instance, applyOpts []client.PatchOption) (*corev1.Service, error) { sourceCidrRanges := []string{"0.0.0.0/0"} @@ -228,7 +282,7 @@ func (r *InstanceReconciler) createDBLoadBalancer(ctx context.Context, inst *v1a var svcAnnotations map[string]string lbType := corev1.ServiceTypeLoadBalancer - svcNameFull := fmt.Sprintf(controllers.SvcName, inst.Name) + svcNameFull := getSVCName(*inst) svcAnnotations = utils.LoadBalancerAnnotations(inst.Spec.DBLoadBalancerOptions) svc := &corev1.Service{ @@ -1016,3 +1070,37 @@ func isObjectChanged(ctx context.Context, patch client.Patch, obj client.Object) specOrMetaChanged = len(result) > 0 && !(len(result) == 1 && statusChanged) return specOrMetaChanged, statusChanged, nil } + +func getSTSName(instance v1alpha1.Instance) string { + return fmt.Sprintf(controllers.StsName, instance.Name) +} + +func getSVCName(instance v1alpha1.Instance) string { + return fmt.Sprintf(controllers.SvcName, instance.Name) +} +func InstanceLB(inst v1alpha1.Instance) *corev1.Service { + return &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: inst.GetNamespace(), + Name: getSVCName(inst), + }, + } +} + +func AgentSVC(inst v1alpha1.Instance) *corev1.Service { + return &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: inst.GetNamespace(), + Name: fmt.Sprintf(controllers.AgentSvcName, inst.Name), + }, + } +} + +func DbDaemonSVC(inst v1alpha1.Instance) *corev1.Service { + return &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: inst.GetNamespace(), + Name: fmt.Sprintf(controllers.DbdaemonSvcName, inst.Name), + }, + } +} diff --git a/oracle/controllers/inttest/instancetest/BUILD.bazel b/oracle/controllers/inttest/instancetest/BUILD.bazel index 1cd8c86..8d4759a 100644 --- a/oracle/controllers/inttest/instancetest/BUILD.bazel +++ b/oracle/controllers/inttest/instancetest/BUILD.bazel @@ -25,6 +25,7 @@ ginkgo_test( "@io_k8s_client_go//plugin/pkg/client/auth/gcp", "@io_k8s_sigs_controller_runtime//pkg/client", "@io_k8s_sigs_controller_runtime//pkg/envtest/printer", + "@io_k8s_utils//pointer", ], ) diff --git a/oracle/controllers/inttest/instancetest/instance_test.go b/oracle/controllers/inttest/instancetest/instance_test.go index da8bee8..dde3f28 100644 --- a/oracle/controllers/inttest/instancetest/instance_test.go +++ b/oracle/controllers/inttest/instancetest/instance_test.go @@ -25,9 +25,11 @@ import ( . "github.com/onsi/gomega" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/pointer" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/envtest/printer" @@ -135,9 +137,46 @@ var _ = Describe("Instance and Database provisioning", func() { var svc corev1.ServiceList Expect(k8sClient.List(ctx, &svc, client.InNamespace(namespace))).Should(Succeed()) Expect(len(svc.Items)).Should(Equal(4)) // 2 services (LB, DBDaemon) per instance - By("By checking that the datadisk and log disk get resized") + By("By checking that the Instance can be stopped") testhelpers.WaitForInstanceConditionState(k8sEnv, instKey1, k8s.Ready, metav1.ConditionTrue, k8s.CreateComplete, 25*time.Minute) createdInstance1 := &v1alpha1.Instance{} + testhelpers.K8sUpdateWithRetry(k8sEnv.K8sClient, k8sEnv.Ctx, + instKey1, + createdInstance1, + func(obj *client.Object) { + instanceToUpdate := (*obj).(*v1alpha1.Instance) + instanceToUpdate.Spec.IsStopped = pointer.Bool(true) + }) + testhelpers.WaitForInstanceConditionState(k8sEnv, instKey1, k8s.Ready, metav1.ConditionTrue, k8s.InstanceStopped, 5*time.Minute) + By("Checking that the sts replicas were scaled down to 0") + sts1 := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf(controllers.StsName, createdInstance1.Name), + Namespace: namespace, + }, + } + testhelpers.K8sGetWithLongRetry(k8sClient, ctx, client.ObjectKeyFromObject(sts1), sts1) + + Expect(*sts1.Spec.Replicas).Should(Equal(int32(controllers.StoppedReplicaCnt))) + By("Checking that the instance can be started") + createdInstance1 = &v1alpha1.Instance{} + testhelpers.K8sUpdateWithRetry(k8sEnv.K8sClient, k8sEnv.Ctx, + instKey1, + createdInstance1, + func(obj *client.Object) { + instanceToUpdate := (*obj).(*v1alpha1.Instance) + instanceToUpdate.Spec.IsStopped = pointer.Bool(false) + }) + + By("Checking that the sts replicas were scaled up to 1") + testhelpers.WaitForInstanceConditionState(k8sEnv, instKey1, k8s.Ready, metav1.ConditionTrue, k8s.CreateComplete, 25*time.Minute) + testhelpers.K8sGetWithLongRetry(k8sClient, ctx, client.ObjectKeyFromObject(sts1), sts1) + Expect(*sts1.Spec.Replicas).Should(Equal(int32(controllers.DefaultReplicaCnt))) + + By("By checking that the datadisk and log disk get resized") + testhelpers.WaitForInstanceConditionState(k8sEnv, instKey1, k8s.Ready, metav1.ConditionTrue, k8s.CreateComplete, 25*time.Minute) + + createdInstance1 = &v1alpha1.Instance{} testhelpers.K8sUpdateWithRetry(k8sEnv.K8sClient, k8sEnv.Ctx, instKey1, createdInstance1, @@ -155,12 +194,6 @@ var _ = Describe("Instance and Database provisioning", func() { //wait for status propagation testhelpers.WaitForInstanceConditionState(k8sEnv, instKey1, k8s.Ready, metav1.ConditionFalse, k8s.ResizingInProgress, 5*time.Minute) testhelpers.WaitForInstanceConditionState(k8sEnv, instKey1, k8s.Ready, metav1.ConditionTrue, k8s.CreateComplete, 25*time.Minute) - sts1 := &appsv1.StatefulSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf(controllers.StsName, createdInstance1.Name), - Namespace: namespace, - }, - } testhelpers.K8sGetWithLongRetry(k8sClient, ctx, client.ObjectKeyFromObject(sts1), sts1) By("By Checking if DataDisk and Log Disk PVC is changed") dataDiskPVCName := getPVCName(createdInstance1.Name, sts1.Name, "DataDisk") diff --git a/oracle/controllers/resources.go b/oracle/controllers/resources.go index 1b8454e..72e7d2e 100644 --- a/oracle/controllers/resources.go +++ b/oracle/controllers/resources.go @@ -54,6 +54,8 @@ const ( podInfoMemRequestSubPath = "request_memory" dbContainerName = "oracledb" podInfoVolume = "podinfo" + StoppedReplicaCnt = 0 + DefaultReplicaCnt = 1 ) var ( @@ -162,7 +164,7 @@ func NewConfigMap(inst *v1alpha1.Instance, scheme *runtime.Scheme, cmName string // NewSts returns the statefulset for the database pod. func NewSts(sp StsParams, pvcs []corev1.PersistentVolumeClaim, podTemplate corev1.PodTemplateSpec) (*appsv1.StatefulSet, error) { - var replicas int32 = 1 + var replicas int32 = DefaultReplicaCnt sts := &appsv1.StatefulSet{ // It looks like the version needs to be explicitly set to avoid the // "incorrect version specified in apply patch" error. diff --git a/oracle/operator.yaml b/oracle/operator.yaml index e312bd1..8ab06db 100644 --- a/oracle/operator.yaml +++ b/oracle/operator.yaml @@ -2047,6 +2047,9 @@ spec: an optional map that allows a customer to specify GCR images different from those chosen/provided. type: object + isStopped: + description: IsStopped is true if an instance is stopped, false otherwise + type: boolean maintenanceWindow: description: MaintenanceWindow specifies the time windows during which database downtimes are allowed for maintenance. @@ -2974,6 +2977,7 @@ rules: - services verbs: - create + - delete - get - list - patch diff --git a/oracle/pkg/k8s/condition.go b/oracle/pkg/k8s/condition.go index 1ee694c..4511479 100644 --- a/oracle/pkg/k8s/condition.go +++ b/oracle/pkg/k8s/condition.go @@ -32,6 +32,7 @@ const ( StandbyReady = "StandbyReady" PauseMode = "Pause" StandbyDRReady = "StandbyDRReady" + InstanceStopped = "InstanceStopped" // Condition Reasons // Backup schedule concurrent policy is relying on the backup ready condition’s reason, @@ -70,6 +71,10 @@ const ( PromoteStandbyComplete = "PromoteStandbyComplete" PromoteStandbyFailed = "PromoteStandbyFailed" + DBShutDown = "DBShutdown" + + InstanceStoppingInProgress = "InstanceStoppingInProgress" + ExportComplete = "ExportComplete" ExportFailed = "ExportFailed" ExportInProgress = "ExportInProgress"