diff --git a/src/k8s/go.mod b/src/k8s/go.mod index 6ec6e3204..ca4a888d6 100644 --- a/src/k8s/go.mod +++ b/src/k8s/go.mod @@ -5,7 +5,7 @@ go 1.22.6 require ( dario.cat/mergo v1.0.0 github.com/canonical/go-dqlite v1.22.0 - github.com/canonical/k8s-snap-api v1.0.7 + github.com/canonical/k8s-snap-api v1.0.8 github.com/canonical/lxd v0.0.0-20240822122218-e7b2a7a83230 github.com/canonical/microcluster/v3 v3.0.0-20240827143335-f7a4d3984970 github.com/go-logr/logr v1.4.2 diff --git a/src/k8s/go.sum b/src/k8s/go.sum index ee150fa8a..8e5154068 100644 --- a/src/k8s/go.sum +++ b/src/k8s/go.sum @@ -99,8 +99,8 @@ github.com/bugsnag/panicwrap v0.0.0-20151223152923-e2c28503fcd0 h1:nvj0OLI3YqYXe github.com/bugsnag/panicwrap v0.0.0-20151223152923-e2c28503fcd0/go.mod h1:D/8v3kj0zr8ZAKg1AQ6crr+5VwKN5eIywRkfhyM/+dE= github.com/canonical/go-dqlite v1.22.0 h1:DuJmfcREl4gkQJyvZzjl2GHFZROhbPyfdjDRQXpkOyw= github.com/canonical/go-dqlite v1.22.0/go.mod h1:Uvy943N8R4CFUAs59A1NVaziWY9nJ686lScY7ywurfg= -github.com/canonical/k8s-snap-api v1.0.7 h1:40qz+9IcV90ZN/wTMuOraZcuqoyRHaJck1J3c7FcWrQ= -github.com/canonical/k8s-snap-api v1.0.7/go.mod h1:LDPoIYCeYnfgOFrwVPJ/4edGU264w7BB7g0GsVi36AY= +github.com/canonical/k8s-snap-api v1.0.8 h1:W360Y4ulkAdCdQqbfQ7zXs3/Ty8SWENO3/Bzz8ZAEPE= +github.com/canonical/k8s-snap-api v1.0.8/go.mod h1:LDPoIYCeYnfgOFrwVPJ/4edGU264w7BB7g0GsVi36AY= github.com/canonical/lxd v0.0.0-20240822122218-e7b2a7a83230 h1:YOqZ+/14OPZ+/TOXpRHIX3KLT0C+wZVpewKIwlGUmW0= github.com/canonical/lxd v0.0.0-20240822122218-e7b2a7a83230/go.mod h1:YVGI7HStOKsV+cMyXWnJ7RaMPaeWtrkxyIPvGWbgACc= github.com/canonical/microcluster/v3 v3.0.0-20240827143335-f7a4d3984970 h1:UrnpglbXELlxtufdk6DGDytu2JzyzuS3WTsOwPrkQLI= diff --git a/src/k8s/pkg/k8sd/api/certificates_refresh.go b/src/k8s/pkg/k8sd/api/certificates_refresh.go index 4e3694114..1be0f436f 100644 --- a/src/k8s/pkg/k8sd/api/certificates_refresh.go +++ b/src/k8s/pkg/k8sd/api/certificates_refresh.go @@ -1,6 +1,7 @@ package api import ( + "context" "crypto/x509/pkix" "fmt" "math" @@ -66,6 +67,8 @@ func (e *Endpoints) postRefreshCertsRun(s state.State, r *http.Request) response // refreshCertsRunControlPlane refreshes the certificates for a control plane node. func refreshCertsRunControlPlane(s state.State, r *http.Request, snap snap.Snap) response.Response { + log := log.FromContext(r.Context()) + req := apiv1.RefreshCertificatesRunRequest{} if err := utils.NewStrictJSONDecoder(r.Body).Decode(&req); err != nil { return response.BadRequest(fmt.Errorf("failed to parse request: %w", err)) @@ -130,20 +133,54 @@ func refreshCertsRunControlPlane(s state.State, r *http.Request, snap snap.Snap) return response.InternalError(fmt.Errorf("failed to generate control plane kubeconfigs: %w", err)) } - if err := snaputil.RestartControlPlaneServices(r.Context(), snap); err != nil { - return response.InternalError(fmt.Errorf("failed to restart control plane services: %w", err)) - } + // NOTE: Restart the control plane services in a separate goroutine to avoid + // restarting the API server, which would break the k8sd proxy connection + // and cause missed responses in the proxy side. + readyCh := make(chan error) + go func() { + // NOTE: Create a new context independent of the request context to ensure + // the restart process is not cancelled by the client. + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + defer cancel() - kubeletCert, _, err := pkiutil.LoadCertificate(certificates.KubeletCert, "") + if err := <-readyCh; err != nil { + log.Error(err, "Failed to refresh certificates") + return + } + if err := snaputil.RestartControlPlaneServices(ctx, snap); err != nil { + log.Error(err, "Failed to restart control plane services") + } + }() + + apiServerCert, _, err := pkiutil.LoadCertificate(certificates.APIServerCert, "") if err != nil { return response.InternalError(fmt.Errorf("failed to read kubelet certificate: %w", err)) } - expirationTimeUNIX := kubeletCert.NotAfter.Unix() + expirationTimeUNIX := apiServerCert.NotAfter.Unix() - return response.SyncResponse(true, apiv1.RefreshCertificatesRunResponse{ - ExpirationSeconds: int(expirationTimeUNIX), + return response.ManualResponse(func(w http.ResponseWriter) (rerr error) { + defer func() { + readyCh <- rerr + close(readyCh) + }() + + err := response.SyncResponse(true, apiv1.RefreshCertificatesRunResponse{ + ExpirationSeconds: int(expirationTimeUNIX), + }).Render(w) + if err != nil { + return fmt.Errorf("failed to render response: %w", err) + } + + f, ok := w.(http.Flusher) + if !ok { + return fmt.Errorf("ResponseWriter is not type http.Flusher") + } + + f.Flush() + return nil }) + } // refreshCertsRunWorker refreshes the certificates for a worker node diff --git a/src/k8s/pkg/k8sd/api/endpoints.go b/src/k8s/pkg/k8sd/api/endpoints.go index 3bc56701e..f4a02e02c 100644 --- a/src/k8s/pkg/k8sd/api/endpoints.go +++ b/src/k8s/pkg/k8sd/api/endpoints.go @@ -91,12 +91,12 @@ func (e *Endpoints) Endpoints() []rest.Endpoint { // Certificates { Name: "RefreshCerts/Plan", - Path: "k8sd/refresh-certs/plan", + Path: apiv1.RefreshCertificatesPlanRPC, Post: rest.EndpointAction{Handler: e.postRefreshCertsPlan}, }, { Name: "RefreshCerts/Run", - Path: "k8sd/refresh-certs/run", + Path: apiv1.RefreshCertificatesRunRPC, Post: rest.EndpointAction{Handler: e.postRefreshCertsRun}, }, // Kubeconfig @@ -143,7 +143,17 @@ func (e *Endpoints) Endpoints() []rest.Endpoint { { Name: "ClusterAPI/CertificatesExpiry", Path: apiv1.ClusterAPICertificatesExpiryRPC, - Post: rest.EndpointAction{Handler: e.postCertificatesExpiry, AccessHandler: ValidateCAPIAuthTokenAccessHandler("capi-auth-token"), AllowUntrusted: true}, + Post: rest.EndpointAction{Handler: e.postCertificatesExpiry, AccessHandler: e.ValidateNodeTokenAccessHandler("node-token"), AllowUntrusted: true}, + }, + { + Name: "ClusterAPI/RefreshCerts/Plan", + Path: apiv1.ClusterAPICertificatesPlanRPC, + Post: rest.EndpointAction{Handler: e.postRefreshCertsPlan, AccessHandler: e.ValidateNodeTokenAccessHandler("node-token"), AllowUntrusted: true}, + }, + { + Name: "ClusterAPI/RefreshCerts/Run", + Path: apiv1.ClusterAPICertificatesRunRPC, + Post: rest.EndpointAction{Handler: e.postRefreshCertsRun, AccessHandler: e.ValidateNodeTokenAccessHandler("node-token"), AllowUntrusted: true}, }, // Snap refreshes { diff --git a/tests/integration/tests/test_smoke.py b/tests/integration/tests/test_smoke.py index 9d8dd6da1..ab2ee7552 100644 --- a/tests/integration/tests/test_smoke.py +++ b/tests/integration/tests/test_smoke.py @@ -66,6 +66,7 @@ def test_smoke(instances: List[harness.Instance]): LOG.info("Verify the functionality of the CAPI endpoints.") instance.exec("k8s x-capi set-auth-token my-secret-token".split()) + instance.exec("k8s x-capi set-node-token my-node-token".split()) body = { "name": "my-node", @@ -107,7 +108,7 @@ def test_smoke(instances: List[harness.Instance]): "-H", "Content-Type: application/json", "-H", - "capi-auth-token: my-secret-token", + "node-token: my-node-token", "--unix-socket", "/var/snap/k8s/common/var/lib/k8sd/state/control.socket", "http://localhost/1.0/x/capi/certificates-expiry",