Skip to content

Commit

Permalink
Add function tests for maintenance mode
Browse files Browse the repository at this point in the history
Signed-off-by: Chi Wai Chan <[email protected]>
  • Loading branch information
chanchiwai-ray committed Jan 3, 2025
1 parent 81663ad commit 6d1aa77
Show file tree
Hide file tree
Showing 2 changed files with 192 additions and 7 deletions.
68 changes: 67 additions & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ jobs:
set +e
sudo microceph log set-level warning
output=$(sudo microceph log get-level)
if [[ "$output" != "3" ]] ; then echo "incorrect log level: $output"; exit 1; fi
if [[ "$output" != "3" ]] ; then echo "incorrect log level: $output"; exit 1; fi
- name: Print logs for failure
if: failure()
Expand Down Expand Up @@ -434,6 +434,72 @@ jobs:
run: |
sudo snap logs microceph -n 1000
test-maintenance-modes:
name: Test maintenance mode
runs-on: ubuntu-22.04
needs: build-microceph
steps:
- name: Download snap
uses: actions/download-artifact@v3
with:
name: snaps
path: /home/runner

- name: Checkout code
uses: actions/checkout@v3
with:
fetch-depth: 0

- name: Copy utils
run: cp tests/scripts/actionutils.sh $HOME

- name: Clear FORWARD firewall rules
run: ~/actionutils.sh cleaript

- name: Free disk
run: ~/actionutils.sh free_runner_disk

- name: Install dependencies
run: ~/actionutils.sh setup_lxd

- name: Create 4 containers with loopback devices
run: ~/actionutils.sh create_containers internal

- name: Install local microceph snap
run: ~/actionutils.sh install_multinode

- name: Bootstrap
run: ~/actionutils.sh bootstrap_head internal

- name: Setup cluster
run: ~/actionutils.sh cluster_nodes internal

- name: Enable one extra ceph mon for redundancy
run: ~/actionutils.sh nodeexec node-wrk3 enable_mon

- name: Add and wait for OSDs
run: |
set -uex
for i in 0 1 2 3 ; do
~/actionutils.sh add_osd_to_node "node-wrk$i"
done
~/actionutils.sh headexec wait_for_osds 4
- name: Test dry run maintenance enter
run: ~/actionutils.sh test_dry_run_maintenance_enter node-wrk1

- name: Test dry run maintenance exit
run: ~/actionutils.sh test_dry_run_maintenance_exit node-wrk1

- name: Test can always exit maintenance mode
run: ~/actionutils.sh test_maintenance_exit node-wrk1

- name: Test enter maintainenace mode without set noout and stop osd
run: ~/actionutils.sh test_maintenance_enter node-wrk1

- name: Test enter maintainenace mode with set noout and stop osd
run: ~/actionutils.sh test_maintenance_enter_set_noout_stop_osds node-wrk1

loop-file-tests:
name: Test with loopback file OSDs
runs-on: ubuntu-22.04
Expand Down
131 changes: 125 additions & 6 deletions tests/scripts/actionutils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,16 @@ function enable_rgw() {
wait_for_rgw 1
}

function enable_mon() {
set -x
sudo microceph enable mon
}

function enable_mon() {
set -x
sudo microceph enable mon
}

function enable_rgw_ssl() {
set -x
# Generate the SSL material
Expand Down Expand Up @@ -146,7 +156,7 @@ function create_containers() {
lxc config set $container security.privileged true
lxc config set $container security.nesting true
# Allow access to loopback devices
printf 'lxc.cgroup2.devices.allow = b 7:* rwm\nlxc.cgroup2.devices.allow = c 10:237 rwm' | lxc config set $container raw.lxc -
printf 'lxc.cgroup2.devices.allow = b 7:* rwm\nlxc.cgroup2.devices.allow = c 10:237 rwm' | lxc config set $container raw.lxc -

# Configure and start container
lxc config device add $container homedir disk source=${HOME} path=/mnt
Expand Down Expand Up @@ -181,7 +191,7 @@ function remote_simple_bootstrap_two_sites() {
tok=$(lxc exec node-wrk0 -- sh -c "microceph cluster add node-wrk1" )
lxc exec node-wrk1 -- sh -c "microceph cluster join $tok"
sleep 10
# Bootstrap siteb
# Bootstrap siteb
lxc exec node-wrk2 -- sh -c "microceph cluster bootstrap"
lxc exec node-wrk2 -- sh -c "microceph disk add loop,2G,3"
tok=$(lxc exec node-wrk2 -- sh -c "microceph cluster add node-wrk3" )
Expand Down Expand Up @@ -639,7 +649,7 @@ function wait_for_osds() {
if [[ $res -lt $expect ]] ; then
echo "Never reached ${expect} OSDs"
return -1
fi
fi
}

function wait_for_rgw() {
Expand Down Expand Up @@ -784,12 +794,12 @@ EOF
}

# nodeexec <node name> <run>
function node_exec() {
function nodeexec() {
local node="${1?missing}"
local run="${2?missing}"
shift
shift 2
set -x
lxc exec node-wrk0 -- sh -c "/mnt/actionutils.sh $run $@"
lxc exec $node -- sh -c "/mnt/actionutils.sh $run $@"
}

function headexec() {
Expand Down Expand Up @@ -824,6 +834,115 @@ function bombard_rgw_configs() {
sudo microceph.ceph health
}


# Check if the cluster has osd noout flag set.
# Usage: is_osd_noout_set
function is_osd_noout_set() {
osd_noout=$(sudo microceph.ceph osd dump -f json | jq -r '.flags_set | any(. == "noout")')

set -x

if [ $osd_noout == "true" ]; then
exit 0
fi
exit 1
}

# Check if the snap service is active and enabled
# Usage: check_snap_service_active_enabled <microceph service name>
function check_snap_service_active_enabled() {
local service="${1?missing}"

set -xe

snap services microceph.$service | grep active > /dev/null 2>&1
snap services microceph.$service | grep enabled > /dev/null 2>&1
}

# Test dry run `microceph cluster maintenance enter` prints expected number of steps.
# Usage: test_dry_run_maintenance_enter <node name>
function test_dry_run_maintenance_enter() {
local node="${1?missing}"

set -xe

# Count expected steps when --set-noout=false --stop-osds=false
lines=$(lxc exec $node -- sh -c "sudo microceph cluster maintenance enter $node --set-noout=false --stop-osds=false --dry-run | wc -l")
if [ $lines != "3" ]; then exit 1; fi

# Count expected steps when --set-noout=false --stop-osds=true
lines=$(lxc exec $node -- sh -c "sudo microceph cluster maintenance enter $node --set-noout=false --stop-osds=true --dry-run | wc -l")
if [ $lines != "4" ]; then exit 1; fi

# Count expected steps when --set-noout=true --stop-osds=false
lines=$(lxc exec $node -- sh -c "sudo microceph cluster maintenance enter $node --set-noout=true --stop-osds=false --dry-run | wc -l")
if [ $lines != "5" ]; then exit 1; fi

# Count expected steps when --set-noout=true --stop-osds=true
lines=$(lxc exec $node -- sh -c "sudo microceph cluster maintenance enter $node --set-noout=true --stop-osds=true --dry-run | wc -l")
if [ $lines != "6" ]; then exit 1; fi
}

# Test dry run `microceph cluster maintenance exit` prints expected number of steps.
# Usage: test_dry_run_maintenance_exit <node name>
function test_dry_run_maintenance_exit() {
local node="${1?missing}"

set -xe

# Count expected steps
lines=$(lxc exec $node -- sh -c "sudo microceph cluster maintenance exit $node --dry-run | wc -l")
if [ $lines != "4" ]; then exit 1; fi
}

# Test `microceph cluster maintenance exit` is idempotent and always start osd service and unset osd noout.
# Usage: test_maintenance_exit <node name>
function test_maintenance_exit() {
local node="${1?missing}"

set -xe

lxc exec $node -- sh -c "sudo microceph cluster maintenance exit $node"
[ ! $(nodeexec $node is_osd_noout_set) ] # assert noout is unset
nodeexec $node check_snap_service_active_enabled osd # assert osd service is active and enabled
}

# Test `microceph cluster maintenance enter --set-noout=false --stop-osds=false`.
# Usage: test_maintenance_enter <node name>
function test_maintenance_enter() {
local node="${1?missing}"

set -xe

# Enter
lxc exec $node -- sh -c "sudo microceph cluster maintenance enter --set-noout=false --stop-osds=false $node"
[ ! $(nodeexec $node is_osd_noout_set) ] # assert noout is unset
nodeexec $node check_snap_service_active_enabled osd # assert osd service is still active and enabled

# Exit
lxc exec $node -- sh -c "sudo microceph cluster maintenance exit $node"
[ ! $(nodeexec $node is_osd_noout_set) ] # assert noout is unset
nodeexec $node check_snap_service_active_enabled osd # assert osd service is active and enabled
}

# Test `microceph cluster maintenance enter --set-noout=true --stop-osds=true`.
# Usage: test_maintenance_enter_set_noout_stop_osds <node name>
function test_maintenance_enter_set_noout_stop_osds() {
local node="${1?missing}"

set -xe

# Enter
lxc exec $node -- sh -c "sudo microceph cluster maintenance enter --set-noout=true --stop-osds=true $node"
nodeexec $node is_osd_noout_set # assert noout is set
[ ! $(nodeexec $node check_snap_service_active_enabled osd) ] # assert osd service is not active and not enabled

# Exit
lxc exec $node -- sh -c "sudo microceph cluster maintenance exit $node"
[ ! $(nodeexec $node is_osd_noout_set) ] # assert noout is unset
nodeexec $node check_snap_service_active_enabled osd # assert osd service is active and enabled
}

run="${1}"
shift

Expand Down

0 comments on commit 6d1aa77

Please sign in to comment.