Skip to content

Commit

Permalink
Add manage_vms script to Git
Browse files Browse the repository at this point in the history
Add existing script dealing with the management of errored, stuck or drained VMs to Git.
  • Loading branch information
kysrpex committed Nov 7, 2023
1 parent 3a33fe8 commit 8e6c4e4
Show file tree
Hide file tree
Showing 3 changed files with 217 additions and 0 deletions.
54 changes: 54 additions & 0 deletions files/clouds.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
$ANSIBLE_VAULT;1.1;AES256

Check warning on line 1 in files/clouds.yaml

View workflow job for this annotation

GitHub Actions / Lint

1:1 [document-start] missing document start "---"
35653765643938363737613233643036373566633739313036346366393862363733346630653732
6131353838333839646330336663353136323766396537300a373033626639393332663939653165
33376130313031333232383364313033656333613638663836623930326537613531663138306536
3361373732333363640a613634633032303437336463316238633030343333393136383334336138
38373337646330326165396336666530653737333538353239313034333830353730366663383163
31633736623166633633396439376336353662643066623839333331343935383332663636646661
35633862303762343863666665663564393530623830376338613231616636316637386539653036
30383136316533363734323334366231623133616162376662326532663538313366373265643536
65333961376237643032346565633834353334343030336463663332663766346262616362373437
63303035386163643337653538356263376464366566633566363333386639633465386139303938
31363135366235323336653834636233653265626266373039396339356533363964656565366231
34323033363562333461333765353161656134363537343639366666653635666262633636313339
63353561643338623365343761623738343961306133316134353165386562323463306139363135
30343936633133613331636664636332643136643637323630326233626464313738643532313039
36613664303961336436316533613236653635666332663330663963393331653237356534643438
35323436396138633133366135616636373832303339313938303533373264396565633931316633
64306463396364353634616466313537623832623435343166346165313233316639363666316236
34353762653162343262623836353032306464376538343030643865626435313830306162386239
66623930653931336238346533323634626265626434626336623264343734333636343330343961
36333362346565623830393361643936646563626565366636383935303138333434343861326263
31353032613034323138623162343737353837666338376664323564666136303939666530363637
35653165633961613863356164373034643566386635623364613438383161653136373634626362
34336266636531366636323363386530653066366364316462633535643765643731396364363438
34383734393164623266323464663738636330356236613964636432656335333933616466353439
64613136343932643038323765623164663061323933626664376139666235636635316139643165
31613662616463666461363866613339386436376137306439653566616337393232313231663231
35663266656435653033646136373032383566393130393637646465363164663761383062353165
66636435633138343738386235636332306236386334343333363662313230303236653032303061
64643531633735643463316138653930363833363936373561396562643238663335656265353633
30623665336266663237646233646430613339653466333064343065613635383538646362336261
32393734666539616531623835353438313432306536616565333264323830383861333932636661
62633231353235623833356234613836653361383466303866633638336538373161333261366332
34313731343638643366373331613238613632383666306433343231313935643262346331633133
62613738363563363338316661316137353066386365306138613938366162353265386336333331
64353533613861366239623463373035373837643265636631663832663537373435333561303630
35383064336561396338303134373334343638316134343439343866613864333766306265393936
61666361373534323762643337616539346462366432333136613935393934616237336562656366
34663866633732356663306666336265636436356632313961313131326235346130316138373963
35653035383634343363623634343536373331636333623635616536616635346135343539613363
31623438643865393731306138396235666538376364633163313732663663323438636664653363
64366565393234383633346634613561663964396261626664316562333134306135303332653266
62626432313637353162313833346531636331303763613130313332396562326163386238663135
65363139316538336335386534346133356335623162643963643461343465616665643762646564
38383363613965623939326266313462353736323764636434656631396261306165623834626333
34383635313062323366356631613761336637373064623963303230336536346332396638626465
64363263636632373136386434393730393961303039306139386530623564386630313134393137
61376366393661636365653434633539346635336437626266646339303130343665356466343865
61303136653431663565353064643530666363356437393433653863383235343332326465373531
37376265383463393738376161316666346263333730643935343664356536383333656164663937
39336464306264656662373761303031646464336366313562323564373334623934646132643563
30346138343961663435336363613464393765316231616130393930646430353935353764633863
30356339376365623637616463323838303164393534343464336334626533386339373165356532
63333865363636396437393935633739316266313532636663303533386138396332
125 changes: 125 additions & 0 deletions files/manage_vms
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
#!/bin/bash
# Description: This script can be used to do the following actions:
# 1. Get stuck VMs names and IP addresses
# 2. Get errored VMs names and IP addresses
# 3. Restart stuck VMs
# 4. Remove errored VMs
# 5. Remove stuck VMs
# 6. Remove drained VMs
# 7. Remove a list of VMs
# 8. Get the list of VMs with a specific flavor

OPENSTACK_CLOUD="freiburg_galaxy"
OPENSTACK_CMD="/opt/galaxy/venv/bin/openstack --os-cloud=$OPENSTACK_CLOUD"

get_list_of_stuck_vms(){
non_htcondor_node_names="$(/opt/galaxy/venv/bin/python /usr/local/bin/vgcn_monitoring.py | grep 'bwcloud=True,htcondor=False' | awk '{print $1}' | cut -d '=' -f2 | tr '\n' ' ')"
echo -e "$non_htcondor_node_names"
}

get_list_of_errored_vms(){
errored_vms="$($OPENSTACK_CMD server list --status='ERROR' -f json -c Name | jq -r '.[] | "\(.Name)"')"
echo -e "$errored_vms"
}

get_list_of_drained_vms(){
drained_vms="$(condor_status -any -af Name State Activity | grep 'Drained' | awk '{print $1}' | sed -e 's/slot.*@//' -e 's/\.novalocal//')"
echo -e "$drained_vms"
}

get_stuck_vms(){
VGCNBWC_WORKERS="$($OPENSTACK_CMD server list --name 'vgcnbwc-worker-*' -c Networks -c Name -f json | jq -r '.[] | "\(.Networks.bioinf[0]) \(.Name)"')"
non_htcondor_node_names=$(get_list_of_stuck_vms)
for i in $non_htcondor_node_names; do
echo -e "$VGCNBWC_WORKERS" | grep "$i"
done
}

get_errored_vms(){
VGCNBWC_WORKERS="$($OPENSTACK_CMD server list --name 'vgcnbwc-worker-*' -c Networks -c Name -f json | jq -r '.[] | "\(.Networks.bioinf[0]) \(.Name)"')"
errored_vms=$(get_list_of_errored_vms)
for i in $errored_vms; do
echo -e "$VGCNBWC_WORKERS" | grep "$i"
done
}

get_flavored_vms(){
$OPENSTACK_CMD server list --flavor $1 -c Networks -c Name -f json | jq -r '.[] | "\(.Networks.bioinf[0]) \(.Name)"'
}

hard_restart_stuck_vms(){
non_htcondor_node_names=$(get_list_of_stuck_vms)
for i in $non_htcondor_node_names; do
echo "===>Hard rebooting host: $i<==="
$OPENSTACK_CMD server reboot $i --hard
done
}

remove_errored_vms(){
errored_vms=$(get_list_of_errored_vms)
for i in $errored_vms; do
echo "===>Deleting errored host: $i<==="
$OPENSTACK_CMD server delete $i
done
}

remove_stuck_vms(){
non_htcondor_node_names=$(get_list_of_stuck_vms)
for i in $non_htcondor_node_names; do
echo "===>Deleting stuck host: $i<==="
$OPENSTACK_CMD server delete $i
done
}

remove_drained_vms(){
drained_vms=$(get_list_of_drained_vms)
for i in $drained_vms; do
echo "===>Deleting drained host: $i<==="
$OPENSTACK_CMD server delete $i
done
}

remove_list_of_vms(){
for i in $1; do
echo "===>Deleting host: $i<==="
$OPENSTACK_CMD server delete $i
done
}

# Parse command line argument and execute the appropriate function
if [ $# -eq 0 ]; then
echo "Usage: $0 [--get-stuck-vms|--get-errored-vms|--hard-restart-stuck-vms|--remove-errored-vms|--remove-stuck-vms]"
exit 1
else
case "$1" in
--get-stuck-vms)
get_stuck_vms
;;
--get-errored-vms)
get_errored_vms
;;
--get-flavored-vms)
get_flavored_vms "$2"
;;
--hard-restart-stuck-vms)
hard_restart_stuck_vms
;;
--remove-errored-vms)
remove_errored_vms
;;
--remove-stuck-vms)
remove_stuck_vms
;;
--remove-drained-vms)
remove_drained_vms
;;
--remove-list-of-vms)
remove_list_of_vms "$2"
;;
*)
echo "Invalid option: $1"
echo "Usage: $0 [--get-stuck-vms|--get-errored-vms|--hard-restart-stuck-vms|--remove-errored-vms|--remove-stuck-vms]"
exit 1
;;
esac
fi
38 changes: 38 additions & 0 deletions maintenance.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,44 @@
mode: '0644'
notify:
- restart rsyslog
- name: Configure OpenStack credentials.
become: true
block:
- name: Get $XDG_CONFIG_HOME for root.
ansible.builtin.shell:
executable: /bin/bash
cmd: "set -u; (echo $XDG_CONFIG_HOME) 2> /dev/null || echo $HOME/.config"
changed_when: false
register: root_config
- name: Ensure $XDG_CONFIG_HOME exists.
become: true
ansible.builtin.file:
path: "{{ root_config.stdout }}"
state: directory
owner: root
group: root
- name: Ensure OpenStack configuration directory exists.
become: true
ansible.builtin.file:
path: "{{ root_config.stdout }}/openstack"
state: directory
owner: root
group: root
mode: "0700"
- name: Copy OpenStack credentials.
ansible.builtin.copy:
src: clouds.yaml
dest: "{{ root_config.stdout }}/openstack/clouds.yaml"
owner: root
group: root
mode: "0600"
- name: Copy script to manage VMs.
ansible.builtin.copy:
src: manage_vms
dest: /usr/local/bin/manage_vms
owner: root
group: root
mode: "0555"
roles:
- usegalaxy_eu.handy.os_setup
- geerlingguy.repo-epel
Expand Down

0 comments on commit 8e6c4e4

Please sign in to comment.