Skip to content

Commit

Permalink
fix: ensure containerd-related directories removed on failed `bootstr…
Browse files Browse the repository at this point in the history
…ap/join-cluster`.

integration: add separate `test_util.check_file_paths_exist()` function.

integration: make `test_util.open_port()` call `socket.listen()` itself.

fix: minor refactoring following PR feedback.

Signed-off-by: Nashwan Azhari <[email protected]>
  • Loading branch information
Nashwan Azhari committed Dec 10, 2024
1 parent dd6b5e7 commit 80cb072
Show file tree
Hide file tree
Showing 5 changed files with 187 additions and 18 deletions.
46 changes: 46 additions & 0 deletions src/k8s/pkg/k8sd/app/hooks_remove.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"github.com/canonical/k8s/pkg/k8sd/pki"
"github.com/canonical/k8s/pkg/k8sd/setup"
"github.com/canonical/k8s/pkg/log"
"github.com/canonical/k8s/pkg/snap"
snaputil "github.com/canonical/k8s/pkg/snap/util"
"github.com/canonical/k8s/pkg/utils/control"
"github.com/canonical/microcluster/v2/cluster"
Expand Down Expand Up @@ -145,5 +146,50 @@ func (a *App) onPreRemove(ctx context.Context, s state.State, force bool) (rerr
}
}

tryCleanupContainerdPaths(snap)

return nil
}

// tryCleanupContainerdPaths attempts to clean up all containerd directories which were
// created by the k8s-snap based on the existence of their respective lockfiles
// located in the directory returned by `s.LockFilesDir()`.
func tryCleanupContainerdPaths(s snap.Snap) {
log := log.L()

for lockpath, dirpath := range setup.ContainerdLockPathsForSnap(s) {
// Ensure lockfile exists:
if _, err := os.Stat(lockpath); os.IsNotExist(err) {
log.Info("WARN: failed to find containerd lockfile, no cleanup will be perfomed", "lockfile", lockpath, "directory", dirpath)
continue
}

// Ensure lockfile's contents is the one we expect:
lockfile_contents := ""
if contents, err := os.ReadFile(lockpath); err != nil {
log.Info("WARN: failed to read contents of lockfile", "lockfile", lockpath, "error", err)
continue
} else {
lockfile_contents = string(contents)
}

if lockfile_contents != dirpath {
log.Info("WARN: lockfile points to different path than expected", "lockfile", lockpath, "expected", dirpath, "actual", lockfile_contents)
continue
}

// Check directory exists before attempting to remove:
if _, err := os.Stat(dirpath); os.IsNotExist(err) {
log.Info("Containerd directory doesn't exist; skipping cleanup", "directory", dirpath)
} else {
if err := os.RemoveAll(dirpath); err != nil {
log.Info("WARN: failed to remove containerd data directory", "directory", dirpath, "error", err)
continue // Avoid removing the lockfile path.
}
}

if err := os.Remove(lockpath); err != nil {
log.Info("WARN: Failed to remove containerd lockfile", "lockfile", lockpath)
}
}
}
22 changes: 17 additions & 5 deletions src/k8s/pkg/k8sd/setup/containerd.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,10 @@ func Containerd(snap snap.Snap, extraContainerdConfig map[string]any, extraArgs
return nil
}

func saveSnapContainerdPaths(s snap.Snap) error {
// Write the containerd-related paths to files to properly clean-up on removal.
// ContainerdLockPathsForSnap returns a mapping between the absolute paths of
// the lockfiles within the k8s snap and the absolute paths of the containerd
// directory they lock.
func ContainerdLockPathsForSnap(s snap.Snap) map[string]string {
m := map[string]string{
"containerd-socket-path": s.ContainerdSocketDir(),
"containerd-config-dir": s.ContainerdConfigDir(),
Expand All @@ -176,9 +178,19 @@ func saveSnapContainerdPaths(s snap.Snap) error {
snap.ContainerdBaseDir: s.GetContainerdBaseDir(),
}

for filename, content := range m {
if err := utils.WriteFile(filepath.Join(s.LockFilesDir(), filename), []byte(content), 0o600); err != nil {
return fmt.Errorf("failed to write %s: %w", filename, err)
prefixed := map[string]string{}
for k, v := range m {
prefixed[filepath.Join(s.LockFilesDir(), k)] = v
}

return prefixed
}

// saveSnapContainerdPaths creates the lock files for the containerd directory paths to be used for later cleanup.
func saveSnapContainerdPaths(s snap.Snap) error {
for lockpath, dirpath := range ContainerdLockPathsForSnap(s) {
if err := utils.WriteFile(lockpath, []byte(dirpath), 0o600); err != nil {
return fmt.Errorf("failed to write %s: %w", lockpath, err)
}
}
return nil
Expand Down
12 changes: 10 additions & 2 deletions src/k8s/pkg/k8sd/setup/containerd_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ func TestContainerd(t *testing.T) {
ContainerdRegistryConfigDir: filepath.Join(dir, "containerd-hosts"),
ContainerdStateDir: filepath.Join(dir, "containerd-state"),
ContainerdExtraConfigDir: filepath.Join(dir, "containerd-confd"),
LockFilesDir: filepath.Join(dir, "lockfiles"),
ServiceArgumentsDir: filepath.Join(dir, "args"),
CNIBinDir: filepath.Join(dir, "opt-cni-bin"),
CNIConfDir: filepath.Join(dir, "cni-netd"),
Expand Down Expand Up @@ -129,9 +130,16 @@ func TestContainerd(t *testing.T) {
"containerd-root-dir": s.ContainerdRootDir(),
"containerd-cni-bin-dir": s.CNIBinDir(),
}
for filename, content := range m {

b, err := os.ReadFile(filepath.Join(s.LockFilesDir(), filename))
// Ensure locks directory exists:
files, err := os.ReadDir(s.LockFilesDir())
g.Expect(err).To(Not(HaveOccurred()))
t.Logf("Lockfiles currently in %q: %v", s.LockFilesDir(), files)

for filename, content := range m {
path := filepath.Join(s.LockFilesDir(), filename)
t.Logf("Checking path: %s", path)
b, err := os.ReadFile(path)
g.Expect(err).To(Not(HaveOccurred()))
g.Expect(string(b)).To(Equal(content))
}
Expand Down
75 changes: 64 additions & 11 deletions tests/integration/tests/test_cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@

LOG = logging.getLogger(__name__)

KUBE_CONTROLLER_MANAGER_SNAP_PORT = 10257

CONTAINERD_PATHS = [
"/etc/containerd",
"/run/containerd",
Expand All @@ -19,9 +21,22 @@
CNI_PATH = "/opt/cni/bin"


def _assert_paths_not_exist(instance: harness.Instance, paths: List[str]):
paths_which_exist = [
p
for p, exists in util.check_file_paths_exist(instance, paths).items()
if exists
]
if paths_which_exist:
raise AssertionError(
f"Expected the following path(s) to not exist: {paths_which_exist}"
)


@pytest.mark.node_count(1)
@pytest.mark.tags(tags.NIGHTLY)
def test_node_cleanup(instances: List[harness.Instance], tmp_path):
"""Verifies that a `snap remove k8s` will perform proper cleanup."""
instance = instances[0]
util.wait_for_dns(instance)
util.wait_for_network(instance)
Expand All @@ -30,11 +45,7 @@ def test_node_cleanup(instances: List[harness.Instance], tmp_path):

# Check that the containerd-related folders are removed on snap removal.
all_paths = CONTAINERD_PATHS + [CNI_PATH]
process = instance.exec(
["ls", *all_paths], capture_output=True, text=True, check=False
)
for path in all_paths:
assert f"cannot access '{path}': No such file or directory" in process.stderr
_assert_paths_not_exist(instance, all_paths)

util.setup_k8s_snap(instance, tmp_path)
instance.exec(["k8s", "bootstrap"])
Expand Down Expand Up @@ -88,18 +99,60 @@ def test_node_cleanup_new_containerd_path(instances: List[harness.Instance]):
assert (
f"cannot access '{path}': No such file or directory" in process.stderr
)
_assert_paths_not_exist(instance, exp_missing_paths)

# Check that the containerd-related folders are in the new locations.
# If one of them is missing, this should have a non-zero exit code.
instance.exec(["ls", *new_containerd_paths], check=True)

for instance in instances:
# Check that the containerd-related folders are not in the new locations after snap removal.
util.remove_k8s_snap(instance)
process = instance.exec(
["ls", *new_containerd_paths], capture_output=True, text=True, check=False
# Check that the containerd-related folders are not in the new locations after snap removal.
_assert_paths_not_exist(instance, new_containerd_paths)


@pytest.mark.node_count(1)
@pytest.mark.no_setup()
@pytest.mark.tags(tags.NIGHTLY)
def test_containerd_path_cleanup_on_failed_init(
instances: List[harness.Instance], tmp_path
):
"""Tests that a failed `bootstrap` properly cleans up any
containerd-related paths it may have created as part of the
failed `bootstrap`.
It induces a bootstrap failure by pre-binding a required k8s service
port (10257 for the kube-controller-manager) before running `k8s bootstrap`.
NOTE: a failed `join-cluster` will trigger the exact same cleanup
hook, so the test implicitly applies to it as well.
"""
instance = instances[0]
expected_code = 1
expected_message = (
"Encountered error(s) while verifying port availability for Kubernetes "
"services: Port 10257 (needed by: kube-controller-manager) is already in use."
)

with util.open_port(KUBE_CONTROLLER_MANAGER_SNAP_PORT) as _:
util.setup_k8s_snap(instance, tmp_path, config.SNAP, connect_interfaces=False)

proc = instance.exec(
["k8s", "bootstrap"], capture_output=True, text=True, check=False
)
for path in new_containerd_paths:
assert (
f"cannot access '{path}': No such file or directory" in process.stderr

if proc.returncode != expected_code:
raise AssertionError(
f"Expected `k8s bootstrap` to exit with code {expected_code}, "
f"but it exited with {proc.returncode}.\n"
f"Stdout was: \n{proc.stdout}.\nStderr was: \n{proc.stderr}"
)

if expected_message not in proc.stderr:
raise AssertionError(
f"Expected to find port-related warning '{expected_message}' in "
"stderr of the `k8s bootstrap` command.\n"
f"Stdout was: \n{proc.stdout}.\nStderr was: \n{proc.stderr}"
)

_assert_paths_not_exist(instance, CONTAINERD_PATHS)
50 changes: 50 additions & 0 deletions tests/integration/tests/test_util/util.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
#
# Copyright 2024 Canonical, Ltd.
#
import contextlib
import ipaddress
import json
import logging
import re
import shlex
import socket
import subprocess
import urllib.request
from datetime import datetime
Expand Down Expand Up @@ -514,3 +516,51 @@ def find_suitable_cidr(parent_cidr: str, excluded_ips: List[str]):

return str(lb_net)
raise RuntimeError("Could not find a suitable CIDR for LoadBalancer services")


@contextlib.contextmanager
def open_port(
port: int,
host: str = "",
address_family: socket.AddressFamily = socket.AF_INET,
socket_kind: socket.SocketKind = socket.SOCK_STREAM,
max_backlogged_connections: int = 0,
):
"""Context manager which opens a socket with the given properties
and binds it to the given port.
Yields the already setup and listening socket object for use.
By default, it will only allow one single active connection
and instantly refuse any new ones. Use the `max_backlogged_connections`
argument if you'd like it to accept more connections as `pending`.
"""
sock = socket.socket(family=address_family, type=socket_kind)
if not host:
host = socket.gethostname()
sock.bind((host, port))
LOG.info(f"Successfully bound new socket on '{host}:{port}'")

try:
sock.listen(max_backlogged_connections)
LOG.info(f"Successfully started listening on '{host}:{port}'")
yield sock
finally:
sock.close()
LOG.info(f"Closed socket on '{host}:{port}'")


def check_file_paths_exist(
instance: harness.Instance, paths: List[str]
) -> Mapping[str, bool]:
"""Returns whether the given path(s) exist within the given harness instance
by checking the output of a single `ls` command containing all of them.
It is recommended to always use absolute paths, as the cwd relative to
which the `ls` will get executed depends on the harness instance.
"""
process = instance.exec(["ls", *paths], capture_output=True, text=True, check=False)
return {
p: not (f"cannot access '{p}': No such file or directory" in process.stderr)
for p in paths
}

0 comments on commit 80cb072

Please sign in to comment.