From 589184a0f41a3fef92905230893e6522c1b1474c Mon Sep 17 00:00:00 2001 From: Claudiu Belu Date: Mon, 16 Dec 2024 17:13:47 +0000 Subject: [PATCH] Cleans up k8s-dqlite state dir and stops it on remove hook cfg.Datastore.GetType() may return an empty string if the bootstrap action failed before database.SetClusterConfig has been called. Because of this, we're not removing the state dir for k8s-dqlite, which will be wrongfully removed by setup.K8sDqlite on the next bootstrap attempt. We're now opportunistically cleaning up the k8s-dqlite related state directory. If a bootstrap attempt fails, the k8s-dqlite service will still be running, which will cause the next bootstrap attempt to fail, as the k8s-dqlite port will be currently in use. --- src/k8s/pkg/k8sd/app/cluster_util.go | 18 ------------------ src/k8s/pkg/k8sd/app/hooks_remove.go | 13 +++++++++---- 2 files changed, 9 insertions(+), 22 deletions(-) diff --git a/src/k8s/pkg/k8sd/app/cluster_util.go b/src/k8s/pkg/k8sd/app/cluster_util.go index 97435d2237..8048a5789c 100644 --- a/src/k8s/pkg/k8sd/app/cluster_util.go +++ b/src/k8s/pkg/k8sd/app/cluster_util.go @@ -29,24 +29,6 @@ func startControlPlaneServices(ctx context.Context, snap snap.Snap, datastore st return nil } -func stopControlPlaneServices(ctx context.Context, snap snap.Snap, datastore string) error { - // Stop services - switch datastore { - case "k8s-dqlite": - if err := snaputil.StopK8sDqliteServices(ctx, snap); err != nil { - return fmt.Errorf("failed to stop k8s-dqlite service: %w", err) - } - case "external": - default: - return fmt.Errorf("unsupported datastore %s, must be one of %v", datastore, setup.SupportedDatastores) - } - - if err := snaputil.StopControlPlaneServices(ctx, snap); err != nil { - return fmt.Errorf("failed to stop control plane services: %w", err) - } - return nil -} - func waitApiServerReady(ctx context.Context, snap snap.Snap) error { // Wait for API server to come up client, err := snap.KubernetesClient("") diff --git a/src/k8s/pkg/k8sd/app/hooks_remove.go b/src/k8s/pkg/k8sd/app/hooks_remove.go index 4e90e6c053..c7548af60e 100644 --- a/src/k8s/pkg/k8sd/app/hooks_remove.go +++ b/src/k8s/pkg/k8sd/app/hooks_remove.go @@ -92,10 +92,6 @@ func (a *App) onPreRemove(ctx context.Context, s state.State, force bool) (rerr log.Error(err, "Failed to create k8s-dqlite client: %w") } - log.Info("Cleaning up k8s-dqlite directory") - if err := os.RemoveAll(snap.K8sDqliteStateDir()); err != nil { - return fmt.Errorf("failed to cleanup k8s-dqlite state directory: %w", err) - } case "external": log.Info("Cleaning up external datastore certificates") if _, err := setup.EnsureExtDatastorePKI(snap, &pki.ExternalDatastorePKI{}); err != nil { @@ -107,6 +103,10 @@ func (a *App) onPreRemove(ctx context.Context, s state.State, force bool) (rerr log.Error(err, "Failed to retrieve cluster config") } + log.Info("Cleaning up k8s-dqlite directory") + if err := os.RemoveAll(snap.K8sDqliteStateDir()); err != nil { + return fmt.Errorf("failed to cleanup k8s-dqlite state directory: %w", err) + } for _, dir := range []string{snap.ServiceArgumentsDir()} { log.WithValues("directory", dir).Info("Cleaning up config files", dir) if err := os.RemoveAll(dir); err != nil { @@ -144,6 +144,11 @@ func (a *App) onPreRemove(ctx context.Context, s state.State, force bool) (rerr if err := snaputil.StopControlPlaneServices(ctx, snap); err != nil { log.Error(err, "Failed to stop control-plane services") } + + log.Info("Stopping k8s-dqlite") + if err := snaputil.StopK8sDqliteServices(ctx, snap); err != nil { + log.Error(err, "Failed to stop k8s-dqlite service") + } } tryCleanupContainerdPaths(log, snap)