Skip to content

Commit

Permalink
Checks k8s-related port availability in PreInitChecks
Browse files Browse the repository at this point in the history
PreInitChecks is called on bootstrap or when joining another Kubernetes
cluster. Kubernetes and its services open up several ports; if they're
already in use, we cannot progress.

Adding these checks will make these error cases more explainable to the
user, rather than a generic bootstrap / join error.
  • Loading branch information
claudiubelu committed Nov 26, 2024
1 parent 946a94b commit c391136
Show file tree
Hide file tree
Showing 7 changed files with 92 additions and 12 deletions.
4 changes: 2 additions & 2 deletions src/k8s/pkg/k8sd/app/hooks_bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ func (a *App) onBootstrapWorkerNode(ctx context.Context, s state.State, encodedT
}

// Pre-init checks
if err := snap.PreInitChecks(ctx, cfg); err != nil {
if err := snap.PreInitChecks(ctx, cfg, false); err != nil {
return fmt.Errorf("pre-init checks failed for worker node: %w", err)
}

Expand Down Expand Up @@ -420,7 +420,7 @@ func (a *App) onBootstrapControlPlane(ctx context.Context, s state.State, bootst
cfg.Certificates.K8sdPrivateKey = utils.Pointer(certificates.K8sdPrivateKey)

// Pre-init checks
if err := snap.PreInitChecks(ctx, cfg); err != nil {
if err := snap.PreInitChecks(ctx, cfg, true); err != nil {
return fmt.Errorf("pre-init checks failed for bootstrap node: %w", err)
}

Expand Down
2 changes: 1 addition & 1 deletion src/k8s/pkg/k8sd/app/hooks_join.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ func (a *App) onPostJoin(ctx context.Context, s state.State, initConfig map[stri
}

// Pre-init checks
if err := snap.PreInitChecks(ctx, cfg); err != nil {
if err := snap.PreInitChecks(ctx, cfg, true); err != nil {
return fmt.Errorf("pre-init checks failed for joining node: %w", err)
}

Expand Down
2 changes: 1 addition & 1 deletion src/k8s/pkg/snap/interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,5 +66,5 @@ type Snap interface {

K8sdClient(address string) (k8sd.Client, error) // k8sd client

PreInitChecks(ctx context.Context, config types.ClusterConfig) error // pre-init checks before k8s-snap can start
PreInitChecks(ctx context.Context, config types.ClusterConfig, isControlPlane bool) error // pre-init checks before k8s-snap can start
}
2 changes: 1 addition & 1 deletion src/k8s/pkg/snap/mock/mock.go
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ func (s *Snap) SnapctlSet(ctx context.Context, args ...string) error {
return s.SnapctlSetErr
}

func (s *Snap) PreInitChecks(ctx context.Context, config types.ClusterConfig) error {
func (s *Snap) PreInitChecks(ctx context.Context, config types.ClusterConfig, isControlPlane bool) error {
s.PreInitChecksCalledWith = append(s.PreInitChecksCalledWith, config)
return s.PreInitChecksErr
}
Expand Down
45 changes: 41 additions & 4 deletions src/k8s/pkg/snap/snap.go
Original file line number Diff line number Diff line change
Expand Up @@ -328,11 +328,11 @@ func (s *snap) SnapctlSet(ctx context.Context, args ...string) error {
return s.runCommand(ctx, append([]string{"snapctl", "set"}, args...))
}

func (s *snap) PreInitChecks(ctx context.Context, config types.ClusterConfig) error {
// TODO: check for available ports for k8s-dqlite, apiserver, containerd, etc
func (s *snap) PreInitChecks(ctx context.Context, config types.ClusterConfig, isControlPlane bool) error {
if err := checkK8sServicePorts(config, isControlPlane); err != nil {
return fmt.Errorf("Encountered error(s) while verifying port availability for Kubernetes services: %w", err)
}

// NOTE(neoaggelos): in some environments the Kubernetes might hang when running for the first time
// This works around the issue by running them once during the install hook
for _, binary := range []string{"kube-apiserver", "kube-controller-manager", "kube-scheduler", "kube-proxy", "kubelet"} {
if err := s.runCommand(ctx, []string{filepath.Join(s.snapDir, "bin", binary), "--version"}); err != nil {
return fmt.Errorf("%q binary could not run: %w", binary, err)
Expand All @@ -354,4 +354,41 @@ func (s *snap) PreInitChecks(ctx context.Context, config types.ClusterConfig) er
return nil
}

func checkK8sServicePorts(config types.ClusterConfig, isControlPlane bool) error {
// NOTE(neoaggelos): in some environments the Kubernetes might hang when running for the first time
// This works around the issue by running them once during the install hook
ports := map[string]int{
// Default values from official Kubernetes documentation.
"kubelet": 10250,
"kubelet-healthz": 10248,
"kube-proxy-healhz": 10256,
"kube-proxy-metrics": 10249,
"k8s-dqlite": config.Datastore.GetK8sDqlitePort(),
"loadbalancer": config.LoadBalancer.GetBGPPeerPort(),
}

if isControlPlane {
ports["kube-apiserver"] = config.APIServer.GetSecurePort()
ports["kube-scheduler"] = 10259
ports["kube-controller-manager"] = 10257
} else {
ports["kube-apiserver-proxy"] = config.APIServer.GetSecurePort()
}

var allErrors []error
for service, port := range ports {
if port == 0 {
continue
}
if open, err := utils.IsLocalPortOpen(port); err != nil {
// Could not open port due to error.
allErrors = append(allErrors, fmt.Errorf("could not check port %d (needed by: %s): %w", port, service, err))
} else if open {
allErrors = append(allErrors, fmt.Errorf("port %d (needed by: %s) is already in use.", port, service))
}
}

return errors.Join(allErrors...)
}

var _ Snap = &snap{}
18 changes: 15 additions & 3 deletions src/k8s/pkg/snap/snap_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package snap_test
import (
"context"
"fmt"
"net"
"os"
"path/filepath"
"testing"
Expand Down Expand Up @@ -142,14 +143,25 @@ func TestSnap(t *testing.T) {
})
conf := types.ClusterConfig{}

err = snap.PreInitChecks(context.Background(), conf)
err = snap.PreInitChecks(context.Background(), conf, true)
g.Expect(err).To(Not(HaveOccurred()))
expectedCalls := []string{}
for _, binary := range []string{"kube-apiserver", "kube-controller-manager", "kube-scheduler", "kube-proxy", "kubelet"} {
expectedCalls = append(expectedCalls, fmt.Sprintf("testdir/bin/%s --version", binary))
}
g.Expect(mockRunner.CalledWithCommand).To(ConsistOf(expectedCalls))

t.Run("Fail port already in use", func(t *testing.T) {
g := NewWithT(t)
// Open a port which will be checked (kubelet).
l, err := net.Listen("tcp", ":10250")
g.Expect(err).To(Not(HaveOccurred()))
defer l.Close()

err = snap.PreInitChecks(context.Background(), conf, true)
g.Expect(err).To(HaveOccurred())
})

t.Run("Fail socket exists", func(t *testing.T) {
g := NewWithT(t)
// Create the containerd.sock file, which should cause the check to fail.
Expand All @@ -160,15 +172,15 @@ func TestSnap(t *testing.T) {
f.Close()
defer os.Remove(f.Name())

err = snap.PreInitChecks(context.Background(), conf)
err = snap.PreInitChecks(context.Background(), conf, true)
g.Expect(err).To(HaveOccurred())
})

t.Run("Fail run command", func(t *testing.T) {
g := NewWithT(t)
mockRunner.Err = fmt.Errorf("some error")

err := snap.PreInitChecks(context.Background(), conf)
err := snap.PreInitChecks(context.Background(), conf, true)
g.Expect(err).To(HaveOccurred())
})
})
Expand Down
31 changes: 31 additions & 0 deletions src/k8s/pkg/utils/net.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package utils

import (
"errors"
"net"
"os"
"strconv"
"syscall"
"time"
)

// IsLocalPortOpen checks if the given local port is already open or not.
func IsLocalPortOpen(port int) (bool, error) {
if err := checkPort("localhost", port, 500*time.Millisecond); err == nil {
return true, nil
} else if errors.Is(err, os.ErrDeadlineExceeded) || errors.Is(err, syscall.ECONNREFUSED) {
return false, nil
} else {
// could not open due to error, couldn't check.
return false, err
}
}

func checkPort(host string, port int, timeout time.Duration) error {
conn, err := net.DialTimeout("tcp", net.JoinHostPort(host, strconv.Itoa(port)), timeout)
if err != nil {
return err
}
conn.Close()
return nil
}

0 comments on commit c391136

Please sign in to comment.