From a443e95332a5f553f1095a47b5f483bee3a032b5 Mon Sep 17 00:00:00 2001 From: Claudiu Belu Date: Mon, 16 Dec 2024 17:13:47 +0000 Subject: [PATCH] Cleans up files and removes k8s-dqlite on remove hook cfg.Datastore.GetType() may return an empty string if the bootstrap action failed before database.SetClusterConfig has been called. Because of this, we're not doing the proper clean up in the remove hook. We're now opportunistically cleaning up the k8s-dqlite related resources / external datastore certificates. Additionally, in the remove hook, we're ensuring that the PKI files exist, instead of removing them, contrary to what the log messages would also suggest. This addresses this issue as well. --- src/k8s/pkg/k8sd/app/hooks_remove.go | 48 +++++++++++------------- src/k8s/pkg/k8sd/setup/certificates.go | 51 ++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 26 deletions(-) diff --git a/src/k8s/pkg/k8sd/app/hooks_remove.go b/src/k8s/pkg/k8sd/app/hooks_remove.go index 4e90e6c053..e70dd45258 100644 --- a/src/k8s/pkg/k8sd/app/hooks_remove.go +++ b/src/k8s/pkg/k8sd/app/hooks_remove.go @@ -10,7 +10,6 @@ import ( apiv1_annotations "github.com/canonical/k8s-snap-api/api/v1/annotations" databaseutil "github.com/canonical/k8s/pkg/k8sd/database/util" - "github.com/canonical/k8s/pkg/k8sd/pki" "github.com/canonical/k8s/pkg/k8sd/setup" "github.com/canonical/k8s/pkg/log" "github.com/canonical/k8s/pkg/snap" @@ -77,31 +76,28 @@ func (a *App) onPreRemove(ctx context.Context, s state.State, force bool) (rerr } } - switch cfg.Datastore.GetType() { - case "k8s-dqlite": - client, err := snap.K8sDqliteClient(ctx) - if err == nil { - log.Info("Removing node from k8s-dqlite cluster") - nodeAddress := net.JoinHostPort(s.Address().Hostname(), fmt.Sprintf("%d", cfg.Datastore.GetK8sDqlitePort())) - if err := client.RemoveNodeByAddress(ctx, nodeAddress); err != nil { - // Removing the node might fail (e.g. if it is the only one in the cluster). - // We still want to continue with the file cleanup, hence we only log the error. - log.Error(err, "Failed to remove node from k8s-dqlite cluster") - } - } else { - log.Error(err, "Failed to create k8s-dqlite client: %w") + // opportunistically cleanup for both k8s-dqlite and external datastore. + client, err := snap.K8sDqliteClient(ctx) + if err == nil { + log.Info("Removing node from k8s-dqlite cluster") + nodeAddress := net.JoinHostPort(s.Address().Hostname(), fmt.Sprintf("%d", cfg.Datastore.GetK8sDqlitePort())) + if err := client.RemoveNodeByAddress(ctx, nodeAddress); err != nil { + // Removing the node might fail (e.g. if it is the only one in the cluster). + // We still want to continue with the file cleanup, hence we only log the error. + log.Error(err, "Failed to remove node from k8s-dqlite cluster") } + } else { + log.Error(err, "Failed to create k8s-dqlite client: %w") + } - log.Info("Cleaning up k8s-dqlite directory") - if err := os.RemoveAll(snap.K8sDqliteStateDir()); err != nil { - return fmt.Errorf("failed to cleanup k8s-dqlite state directory: %w", err) - } - case "external": - log.Info("Cleaning up external datastore certificates") - if _, err := setup.EnsureExtDatastorePKI(snap, &pki.ExternalDatastorePKI{}); err != nil { - log.Error(err, "Failed to cleanup external datastore certificates") - } - default: + log.Info("Cleaning up k8s-dqlite directory") + if err := os.RemoveAll(snap.K8sDqliteStateDir()); err != nil { + return fmt.Errorf("failed to cleanup k8s-dqlite state directory: %w", err) + } + + log.Info("Cleaning up external datastore certificates") + if err := setup.RemoveExtDatastorePKIFiles(snap); err != nil { + log.Error(err, "Failed to cleanup external datastore certificates") } } else { log.Error(err, "Failed to retrieve cluster config") @@ -118,7 +114,7 @@ func (a *App) onPreRemove(ctx context.Context, s state.State, force bool) (rerr // Trying to detect the node type is not reliable as the node might have been marked as worker // or not, depending on which step it failed. log.Info("Cleaning up worker certificates") - if _, err := setup.EnsureWorkerPKI(snap, &pki.WorkerNodePKI{}); err != nil { + if err := setup.RemoveWorkerPKIFiles(snap); err != nil { log.Error(err, "failed to cleanup worker certificates") } @@ -130,7 +126,7 @@ func (a *App) onPreRemove(ctx context.Context, s state.State, force bool) (rerr } log.Info("Cleaning up control plane certificates") - if _, err := setup.EnsureControlPlanePKI(snap, &pki.ControlPlanePKI{}); err != nil { + if err := setup.RemoveControlNodePKIFiles(snap); err != nil { log.Error(err, "failed to cleanup control plane certificates") } diff --git a/src/k8s/pkg/k8sd/setup/certificates.go b/src/k8s/pkg/k8sd/setup/certificates.go index c8e67d227a..53422f850c 100644 --- a/src/k8s/pkg/k8sd/setup/certificates.go +++ b/src/k8s/pkg/k8sd/setup/certificates.go @@ -70,6 +70,15 @@ func ensureFiles(uid, gid int, mode fs.FileMode, files map[string]string) (bool, return changed, nil } +func removeFiles(files []string) error { + for fname := range files { + if err := os.Remove(fname); err != nil && !os.IsNotExist(err) { + return fmt.Errorf("failed to remove %s: %w", fname, err) + } + } + return nil +} + // EnsureExtDatastorePKI ensures the external datastore PKI files are present // and have the correct content, permissions and ownership. // It returns true if one or more files were updated and any error that occurred. @@ -81,6 +90,16 @@ func EnsureExtDatastorePKI(snap snap.Snap, certificates *pki.ExternalDatastorePK }) } +// RemoveExtDatastorePKIFiles removes the external datastore PKI files. +func RemoveExtDatastorePKIFiles(snap snap.Snap) error { + files := []string{ + filepath.Join(snap.EtcdPKIDir(), "ca.crt"), + filepath.Join(snap.EtcdPKIDir(), "client.key"), + filepath.Join(snap.EtcdPKIDir(), "client.crt"), + } + return removeFiles(files) +} + // EnsureK8sDqlitePKI ensures the k8s dqlite PKI files are present // and have the correct content, permissions and ownership. // It returns true if one or more files were updated and any error that occurred. @@ -113,6 +132,27 @@ func EnsureControlPlanePKI(snap snap.Snap, certificates *pki.ControlPlanePKI) (b }) } +// RemoveControlNodePKIFiles removes the control plane PKI files. +func RemoveControlNodePKIFiles(snap snap.Snap) error { + files := []string{ + filepath.Join(snap.KubernetesPKIDir(), "apiserver-kubelet-client.crt"), + filepath.Join(snap.KubernetesPKIDir(), "apiserver-kubelet-client.key"), + filepath.Join(snap.KubernetesPKIDir(), "apiserver.crt"), + filepath.Join(snap.KubernetesPKIDir(), "apiserver.key"), + filepath.Join(snap.KubernetesPKIDir(), "ca.crt"), + filepath.Join(snap.KubernetesPKIDir(), "client-ca.crt"), + filepath.Join(snap.KubernetesPKIDir(), "ca.key"), + filepath.Join(snap.KubernetesPKIDir(), "front-proxy-ca.crt"), + filepath.Join(snap.KubernetesPKIDir(), "front-proxy-ca.key"), + filepath.Join(snap.KubernetesPKIDir(), "front-proxy-client.crt"), + filepath.Join(snap.KubernetesPKIDir(), "front-proxy-client.key"), + filepath.Join(snap.KubernetesPKIDir(), "kubelet.crt"), + filepath.Join(snap.KubernetesPKIDir(), "kubelet.key"), + filepath.Join(snap.KubernetesPKIDir(), "serviceaccount.key"), + } + return removeFiles(files) +} + // EnsureWorkerPKI ensures the worker PKI files are present // and have the correct content, permissions and ownership. // It returns true if one or more files were updated and any error that occurred. @@ -124,3 +164,14 @@ func EnsureWorkerPKI(snap snap.Snap, certificates *pki.WorkerNodePKI) (bool, err filepath.Join(snap.KubernetesPKIDir(), "kubelet.key"): certificates.KubeletKey, }) } + +// RemoveWorkerPKIFiles removes the worker PKI files. +func RemoveWorkerPKIFiles(snap snap.Snap) error { + files := []string{ + filepath.Join(snap.KubernetesPKIDir(), "ca.crt"), + filepath.Join(snap.KubernetesPKIDir(), "client-ca.crt"), + filepath.Join(snap.KubernetesPKIDir(), "kubelet.crt"), + filepath.Join(snap.KubernetesPKIDir(), "kubelet.key"), + } + return removeFiles(files) +}