-
Notifications
You must be signed in to change notification settings - Fork 99
/
Copy pathpower_azure
executable file
·114 lines (96 loc) · 2.93 KB
/
power_azure
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/usr/bin/env bash
# Slurm utility: Handle Azure cloud nodes
# Author: [email protected]
# Homepage: https://github.com/OleHolmNielsen/Slurm_tools/
# Use with ResumeProgram and SuspendProgram in slurm.conf
# NOTE: The slurmctld will execute this script as user "slurm"
# see https://slurm.schedmd.com/power_save.html
# Prerequisites for Azure nodes:
# 1. An Azure subscription to create cloud nodes
# 2. azure-cli RPM package
# See https://wiki.fysik.dtu.dk/niflheim/Slurm_cloud_bursting#azure-cli
# Logfile for Azure suspend/resume actions
# N.B.: Make sure this file is writable by SlurmUser
export LOGFILE=/var/log/slurm/power_azure.log
# It is recommended that the Azure CLI command az should be used to login the slurm user to an Azure subscription.
# Otherwise you must insert suitable sudo commands for running Azure CLI commands.
# Command usage:
function usage()
{
cat <<EOF
Usage: $0 [-r|-s|-h]
where:
-r: Resume (start) cloud nodes
-s: Suspend (stop/deallocate) cloud nodes
-h: Print this help information
EOF
}
export action=""
while getopts "rsh" options; do
case $options in
r ) export action="start"
;;
s ) export action="deallocate"
;;
h|? ) usage
exit 1;;
* ) usage
exit 1;;
esac
done
shift $((OPTIND-1))
if [[ -z "$action" ]]
then
usage
exit 1
fi
# Make sure the LOGFILE is owned by SlurmUser and has correct permissions
touch $LOGFILE
chown slurm: $LOGFILE
chmod 644 $LOGFILE
# Redirect stdout and stderr to $LOGFILE
exec &>> $LOGFILE
echo "================================================="
echo "`date +'%b %d %T'` Invoked $0"
# Display $action in UPPER case (see the bash man-page under Case modification)
echo "${action^^} the Azure nodes $*"
# The environment variables set by slurmctld do not include USER etc.
export USER=`whoami`
echo "The invoking user is $USER:" `id $USER`
if [[ ! -x /usr/bin/az ]]
then
echo "ERROR: The azure-cli RPM is not installed"
rpm -q azure-cli
exit -1
fi
echo Check that the invoking user is logged in to an Azure account
if test ! "`az account show --query '{tenantId:tenantId,subscriptionid:id}'`"
then
echo "ERROR: Invoking user $USER is not logged in to Azure CLI"
echo "See https://docs.microsoft.com/en-us/cli/azure/authenticate-azure-cli"
exit 1
fi
# List VM nodes (name, id)
export VM_LIST=`mktemp`
az vm list --query "[].{VMName:name, objectID:id}" -o tsv > $VM_LIST
# Generate Slurm node list, 1 line per node
export VM_LIST_IDS=`mktemp`
for node in "`scontrol show hostname $*`"
do
grep "^$node" $VM_LIST | awk '{print $2}' >> $VM_LIST_IDS
done
# Handle multiple Azure VMs in one command
# See https://build5nines.com/azure-cli-2-0-quickly-start-stop-all-vms/
if [[ -s $VM_LIST_IDS ]]
then
echo "Handle VM $action on list:"
cat $VM_LIST_IDS
az vm $action --ids `cat $VM_LIST_IDS`
# Check if nodes are running and slurmd started
echo "Check Slurm nodes:"
scontrol show node $* | grep Node
else
echo "No VMs found"
fi
# Cleanup
rm -f $VM_LIST $VM_LIST_IDS