Skip to content

Commit

Permalink
t: add tests for enforcing queue limits
Browse files Browse the repository at this point in the history
Problem: There are no tests that check the enforcement of max running
jobs limit in a queue for an association.

Add some tests.
  • Loading branch information
cmoussa1 committed Sep 30, 2024
1 parent f7acb1b commit a5f220f
Show file tree
Hide file tree
Showing 2 changed files with 208 additions and 0 deletions.
1 change: 1 addition & 0 deletions t/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ TESTSCRIPTS = \
t1038-hierarchy-small-tie-all-db.t \
t1039-issue476.t \
t1040-mf-priority-projects.t \
t1041-mf-priority-queue-limits.t \
t5000-valgrind.t \
python/t1000-example.py \
python/t1001_db.py \
Expand Down
207 changes: 207 additions & 0 deletions t/t1041-mf-priority-queue-limits.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
#!/bin/bash

test_description='test multi-factor priority plugin queue limits'

. `dirname $0`/sharness.sh

mkdir -p conf.d

MULTI_FACTOR_PRIORITY=${FLUX_BUILD_DIR}/src/plugins/.libs/mf_priority.so
SUBMIT_AS=${SHARNESS_TEST_SRCDIR}/scripts/submit_as.py
DB_PATH=$(pwd)/FluxAccountingTest.db

export TEST_UNDER_FLUX_SCHED_SIMPLE_MODE="limited=1"
test_under_flux 16 job -o,--config-path=$(pwd)/conf.d

flux setattr log-stderr-level 1

test_expect_success 'allow guest access to testexec' '
flux config load <<-EOF
[exec.testexec]
allow-guests = true
EOF
'

test_expect_success 'create flux-accounting DB' '
flux account -p ${DB_PATH} create-db
'

test_expect_success 'start flux-accounting service' '
flux account-service -p ${DB_PATH} -t
'

test_expect_success 'load multi-factor priority plugin' '
flux jobtap load -r .priority-default ${MULTI_FACTOR_PRIORITY}
'

test_expect_success 'check that mf_priority plugin is loaded' '
flux jobtap list | grep mf_priority
'

test_expect_success 'add some banks' '
flux account add-bank root 1 &&
flux account add-bank --parent-bank=root bankA 1
'

test_expect_success 'add queues with different running jobs limits' '
flux account add-queue bronze --priority=200 --max-running-jobs=3 &&
flux account add-queue silver --priority=300 --max-running-jobs=2 &&
flux account add-queue gold --priority=400 --max-running-jobs=1
'

test_expect_success 'add a user' '
flux account add-user \
--username=user1 \
--userid=5001 \
--bank=bankA \
--queues="bronze,silver,gold" \
--max-running-jobs=100 \
--max-active-jobs=100
'

test_expect_success 'send the user and queue information to the plugin' '
flux account-priority-update -p ${DB_PATH}
'

test_expect_success 'configure flux with those queues' '
cat >conf.d/queues.toml <<-EOT &&
[queues.bronze]
[queues.silver]
[queues.gold]
EOT
flux config reload &&
flux queue start --all
'

# In this set of tests, an association belongs to all three available queues,
# and each queue has a different limit on the number of running jobs available
# per-association. The association will submit the max number of running jobs
# to the silver queue (2 jobs). A dependency specific to the number of running
# jobs per-queue is added to the third submitted job in the silver queue, but
# jobs submitted to other queues will still receive an alloc event.
#
# Once one of the currently running jobs in the silver queue completes and is
# cleaned up, the job with a dependency added to it will have its dependency
# removed and will receive its alloc event.
test_expect_success 'submit max number of jobs to silver queue' '
job1=$(flux python ${SUBMIT_AS} 5001 --queue=silver sleep 60) &&
job2=$(flux python ${SUBMIT_AS} 5001 --queue=silver sleep 60) &&
flux job wait-event -vt 5 ${job1} alloc &&
flux job wait-event -vt 5 ${job2} alloc
'

test_expect_success 'running jobs count for the queues are incremented once jobs start' '
flux jobtap query mf_priority.so > silver.json &&
jq -e ".mf_priority_map[] | \
select(.userid == 5001) | \
.banks[0].queue_usage.silver == 2" <silver.json
'

test_expect_success 'a third job to the silver queue results in a dependency-add' '
job3=$(flux python ${SUBMIT_AS} 5001 --queue=silver sleep 60) &&
flux job wait-event -vt 5 \
--match-context=description="max-run-jobs-queue" \
${job3} dependency-add
'

test_expect_success 'can submit other jobs to other queues in the meantime' '
job4=$(flux python ${SUBMIT_AS} 5001 --queue=bronze sleep 60) &&
flux job wait-event -vt 5 ${job4} alloc
'

test_expect_success 'check overall jobs counts for user' '
flux jobtap query mf_priority.so > user1.json &&
jq -e ".mf_priority_map[] | \
select(.userid == 5001) | \
.banks[0].cur_run_jobs == 3" <user1.json &&
jq -e ".mf_priority_map[] | \
select(.userid == 5001) | \
.banks[0].cur_active_jobs == 4" <user1.json
'

test_expect_success 'cancel currently running job in silver queue' '
flux cancel ${job1} &&
flux job wait-event -vt 5 ${job1} clean
'

test_expect_success 'wait for alloc and then cancel second and third jobs' '
flux job wait-event -vt 5 ${job3} alloc &&
flux cancel ${job2} &&
flux cancel ${job3} &&
flux job wait-event -vt 5 ${job2} clean &&
flux job wait-event -vt 5 ${job3} clean
'

test_expect_success 'cancel job in bronze queue' '
flux cancel ${job4} &&
flux job wait-event -vt 5 ${job4} clean
'

test_expect_success 'running jobs count for the queues are decremented once jobs exit' '
flux jobtap query mf_priority.so > query.json &&
jq -e ".mf_priority_map[] | \
select(.userid == 5001) | \
.banks[0].queue_usage.silver == 0" <query.json &&
jq -e ".mf_priority_map[] | \
select(.userid == 5001) | \
.banks[0].cur_run_jobs == 0" <query.json &&
jq -e ".mf_priority_map[] | \
select(.userid == 5001) | \
.banks[0].cur_active_jobs == 0" <query.json
'

# In this set of tests, the association will have a max running jobs limit
# that is less than the number of jobs they can run in a given queue. In this
# case, the association will have a more general running jobs limit dependency
# added to their job instead of the queue-specific dependency.
test_expect_success 'edit the max-running-jobs limit of the association' '
flux account edit-user user1 --max-running-jobs=2 &&
flux account-priority-update -p ${DB_PATH}
'

test_expect_success 'submit max running jobs to bronze queue' '
job1=$(flux python ${SUBMIT_AS} 5001 --queue=bronze sleep 60) &&
job2=$(flux python ${SUBMIT_AS} 5001 --queue=bronze sleep 60) &&
flux job wait-event -vt 5 ${job1} alloc &&
flux job wait-event -vt 5 ${job2} alloc
'

test_expect_success 'a third submitted job (regardless of queue) results in dependency-add' '
job3=$(flux python ${SUBMIT_AS} 5001 --queue=silver sleep 60) &&
flux job wait-event -vt 5 \
--match-context=description="max-running-jobs-user-limit" \
${job3} dependency-add
'

test_expect_success 'check active/running jobs counts' '
flux jobtap query mf_priority.so > user1.json &&
jq -e ".mf_priority_map[] | \
select(.userid == 5001) | \
.banks[0].held_jobs | length == 1" <user1.json &&
jq -e ".mf_priority_map[] | \
select(.userid == 5001) | \
.banks[0].cur_run_jobs == 2" <user1.json &&
jq -e ".mf_priority_map[] | \
select(.userid == 5001) | \
.banks[0].cur_active_jobs == 3" <user1.json
'

test_expect_success 'cancel currently running job; held job gets alloc event' '
flux cancel ${job1} &&
flux job wait-event -vt 5 ${job1} clean &&
flux job wait-event -vt 5 ${job3} alloc &&
flux job wait-event -vt 5 \
--match-context=description="max-running-jobs-user-limit" \
${job3} dependency-remove
'

test_expect_success 'cancel running jobs' '
flux cancel ${job2} &&
flux cancel ${job3}
'

test_expect_success 'shut down flux-accounting service' '
flux python -c "import flux; flux.Flux().rpc(\"accounting.shutdown_service\").get()"
'

test_done

0 comments on commit a5f220f

Please sign in to comment.