From 91f29ec9cd17f7eb21842663956d6b12946afc59 Mon Sep 17 00:00:00 2001 From: Claudiu Belu Date: Fri, 10 Jan 2025 12:01:31 +0200 Subject: [PATCH] Cleans up k8s-dqlite state dir and stops it on remove hook (#908) cfg.Datastore.GetType() may return an empty string if the bootstrap action failed before database.SetClusterConfig has been called. Because of this, we're not removing the state dir for k8s-dqlite, which will be wrongfully removed by setup.K8sDqlite on the next bootstrap attempt. We're now opportunistically cleaning up the k8s-dqlite related state directory. If a bootstrap attempt fails, the k8s-dqlite service will still be running, which will cause the next bootstrap attempt to fail, as the k8s-dqlite port will be currently in use. --- src/k8s/pkg/k8sd/app/cluster_util.go | 18 ------------------ src/k8s/pkg/k8sd/app/hooks_remove.go | 13 +++++++++---- 2 files changed, 9 insertions(+), 22 deletions(-) diff --git a/src/k8s/pkg/k8sd/app/cluster_util.go b/src/k8s/pkg/k8sd/app/cluster_util.go index 97435d223..8048a5789 100644 --- a/src/k8s/pkg/k8sd/app/cluster_util.go +++ b/src/k8s/pkg/k8sd/app/cluster_util.go @@ -29,24 +29,6 @@ func startControlPlaneServices(ctx context.Context, snap snap.Snap, datastore st return nil } -func stopControlPlaneServices(ctx context.Context, snap snap.Snap, datastore string) error { - // Stop services - switch datastore { - case "k8s-dqlite": - if err := snaputil.StopK8sDqliteServices(ctx, snap); err != nil { - return fmt.Errorf("failed to stop k8s-dqlite service: %w", err) - } - case "external": - default: - return fmt.Errorf("unsupported datastore %s, must be one of %v", datastore, setup.SupportedDatastores) - } - - if err := snaputil.StopControlPlaneServices(ctx, snap); err != nil { - return fmt.Errorf("failed to stop control plane services: %w", err) - } - return nil -} - func waitApiServerReady(ctx context.Context, snap snap.Snap) error { // Wait for API server to come up client, err := snap.KubernetesClient("") diff --git a/src/k8s/pkg/k8sd/app/hooks_remove.go b/src/k8s/pkg/k8sd/app/hooks_remove.go index 567d6d350..f76c2ee57 100644 --- a/src/k8s/pkg/k8sd/app/hooks_remove.go +++ b/src/k8s/pkg/k8sd/app/hooks_remove.go @@ -93,10 +93,6 @@ func (a *App) onPreRemove(ctx context.Context, s state.State, force bool) (rerr log.Error(err, "Failed to create k8s-dqlite client: %w") } - log.Info("Cleaning up k8s-dqlite directory") - if err := os.RemoveAll(snap.K8sDqliteStateDir()); err != nil { - return fmt.Errorf("failed to cleanup k8s-dqlite state directory: %w", err) - } case "external": log.Info("Cleaning up external datastore certificates") if _, err := setup.EnsureExtDatastorePKI(snap, &pki.ExternalDatastorePKI{}); err != nil { @@ -108,6 +104,10 @@ func (a *App) onPreRemove(ctx context.Context, s state.State, force bool) (rerr log.Error(err, "Failed to retrieve cluster config") } + log.Info("Cleaning up k8s-dqlite directory") + if err := os.RemoveAll(snap.K8sDqliteStateDir()); err != nil { + log.Error(err, "failed to cleanup k8s-dqlite state directory") + } for _, dir := range []string{snap.ServiceArgumentsDir()} { log.WithValues("directory", dir).Info("Cleaning up config files", dir) if err := os.RemoveAll(dir); err != nil { @@ -145,6 +145,11 @@ func (a *App) onPreRemove(ctx context.Context, s state.State, force bool) (rerr if err := snaputil.StopControlPlaneServices(ctx, snap); err != nil { log.Error(err, "Failed to stop control-plane services") } + + log.Info("Stopping k8s-dqlite") + if err := snaputil.StopK8sDqliteServices(ctx, snap); err != nil { + log.Error(err, "Failed to stop k8s-dqlite service") + } } tryCleanupContainerdPaths(log, snap)