diff --git a/t/Makefile.am b/t/Makefile.am index 6e90f8364444..1997d3ab007a 100644 --- a/t/Makefile.am +++ b/t/Makefile.am @@ -354,6 +354,7 @@ dist_check_SCRIPTS = \ issues/t5105-signal-propagation.sh \ issues/t5308-kvsdir-initial-path.py \ issues/t5368-kvs-commit-clear.py \ + issues/t5518-job-validator-hang.sh \ python/__init__.py \ python/subflux.py \ python/tap \ diff --git a/t/issues/t5518-job-validator-hang.sh b/t/issues/t5518-job-validator-hang.sh new file mode 100755 index 000000000000..c8dba3084b87 --- /dev/null +++ b/t/issues/t5518-job-validator-hang.sh @@ -0,0 +1,30 @@ +#!/bin/sh +# +# Run an instance with a very small job-ingest working buffer size +# and ensure the worker does not hang after errors are returned +# +export FLUX_URI_RESOLVE_LOCAL=t + +# Check if we need to start parent job, if so, reexec under flux-start +if test "$VALIDATOR_HANG_TEST_ACTIVE" != "t"; then + export VALIDATOR_HANG_TEST_ACTIVE=t + printf "Re-launching test script under flux-start\n" + exec flux start -s1 $0 +fi + +id=$(flux alloc -n1 --bg --conf=ingest.buffer-size=8k) +printf "Launched single core alloc job $id\n" + +# Submission of more than 1 job should have some failures, but should not +# hang: +flux proxy $id flux submit --cc=1-10 --watch hostname +rc=$? +printf "submission of multiple jobs got rc=$rc\n" +test $rc -ne 0 || exit 1 + +# Small job to clear errors +flux proxy $id flux run --env=-* --env=PATH hostname + +# Another small job should succeed: +flux proxy $id flux run --env=-* --env=PATH hostname || exit 1 +printf "submission of single job still works\n"