diff --git a/files/clouds.yaml b/files/clouds.yaml new file mode 100644 index 000000000..88b2b9a54 --- /dev/null +++ b/files/clouds.yaml @@ -0,0 +1,54 @@ +$ANSIBLE_VAULT;1.1;AES256 +35653765643938363737613233643036373566633739313036346366393862363733346630653732 +6131353838333839646330336663353136323766396537300a373033626639393332663939653165 +33376130313031333232383364313033656333613638663836623930326537613531663138306536 +3361373732333363640a613634633032303437336463316238633030343333393136383334336138 +38373337646330326165396336666530653737333538353239313034333830353730366663383163 +31633736623166633633396439376336353662643066623839333331343935383332663636646661 +35633862303762343863666665663564393530623830376338613231616636316637386539653036 +30383136316533363734323334366231623133616162376662326532663538313366373265643536 +65333961376237643032346565633834353334343030336463663332663766346262616362373437 +63303035386163643337653538356263376464366566633566363333386639633465386139303938 +31363135366235323336653834636233653265626266373039396339356533363964656565366231 +34323033363562333461333765353161656134363537343639366666653635666262633636313339 +63353561643338623365343761623738343961306133316134353165386562323463306139363135 +30343936633133613331636664636332643136643637323630326233626464313738643532313039 +36613664303961336436316533613236653635666332663330663963393331653237356534643438 +35323436396138633133366135616636373832303339313938303533373264396565633931316633 +64306463396364353634616466313537623832623435343166346165313233316639363666316236 +34353762653162343262623836353032306464376538343030643865626435313830306162386239 +66623930653931336238346533323634626265626434626336623264343734333636343330343961 +36333362346565623830393361643936646563626565366636383935303138333434343861326263 +31353032613034323138623162343737353837666338376664323564666136303939666530363637 +35653165633961613863356164373034643566386635623364613438383161653136373634626362 +34336266636531366636323363386530653066366364316462633535643765643731396364363438 +34383734393164623266323464663738636330356236613964636432656335333933616466353439 +64613136343932643038323765623164663061323933626664376139666235636635316139643165 +31613662616463666461363866613339386436376137306439653566616337393232313231663231 +35663266656435653033646136373032383566393130393637646465363164663761383062353165 +66636435633138343738386235636332306236386334343333363662313230303236653032303061 +64643531633735643463316138653930363833363936373561396562643238663335656265353633 +30623665336266663237646233646430613339653466333064343065613635383538646362336261 +32393734666539616531623835353438313432306536616565333264323830383861333932636661 +62633231353235623833356234613836653361383466303866633638336538373161333261366332 +34313731343638643366373331613238613632383666306433343231313935643262346331633133 +62613738363563363338316661316137353066386365306138613938366162353265386336333331 +64353533613861366239623463373035373837643265636631663832663537373435333561303630 +35383064336561396338303134373334343638316134343439343866613864333766306265393936 +61666361373534323762643337616539346462366432333136613935393934616237336562656366 +34663866633732356663306666336265636436356632313961313131326235346130316138373963 +35653035383634343363623634343536373331636333623635616536616635346135343539613363 +31623438643865393731306138396235666538376364633163313732663663323438636664653363 +64366565393234383633346634613561663964396261626664316562333134306135303332653266 +62626432313637353162313833346531636331303763613130313332396562326163386238663135 +65363139316538336335386534346133356335623162643963643461343465616665643762646564 +38383363613965623939326266313462353736323764636434656631396261306165623834626333 +34383635313062323366356631613761336637373064623963303230336536346332396638626465 +64363263636632373136386434393730393961303039306139386530623564386630313134393137 +61376366393661636365653434633539346635336437626266646339303130343665356466343865 +61303136653431663565353064643530666363356437393433653863383235343332326465373531 +37376265383463393738376161316666346263333730643935343664356536383333656164663937 +39336464306264656662373761303031646464336366313562323564373334623934646132643563 +30346138343961663435336363613464393765316231616130393930646430353935353764633863 +30356339376365623637616463323838303164393534343464336334626533386339373165356532 +63333865363636396437393935633739316266313532636663303533386138396332 diff --git a/files/manage_vms b/files/manage_vms new file mode 100644 index 000000000..e670671ee --- /dev/null +++ b/files/manage_vms @@ -0,0 +1,125 @@ +#!/bin/bash +# Description: This script can be used to do the following actions: +# 1. Get stuck VMs names and IP addresses +# 2. Get errored VMs names and IP addresses +# 3. Restart stuck VMs +# 4. Remove errored VMs +# 5. Remove stuck VMs +# 6. Remove drained VMs +# 7. Remove a list of VMs +# 8. Get the list of VMs with a specific flavor + +OPENSTACK_CLOUD="freiburg_galaxy" +OPENSTACK_CMD="/opt/galaxy/venv/bin/openstack --os-cloud=$OPENSTACK_CLOUD" + +get_list_of_stuck_vms(){ + non_htcondor_node_names="$(/opt/galaxy/venv/bin/python /usr/local/bin/vgcn_monitoring.py | grep 'bwcloud=True,htcondor=False' | awk '{print $1}' | cut -d '=' -f2 | tr '\n' ' ')" + echo -e "$non_htcondor_node_names" +} + +get_list_of_errored_vms(){ + errored_vms="$($OPENSTACK_CMD server list --status='ERROR' -f json -c Name | jq -r '.[] | "\(.Name)"')" + echo -e "$errored_vms" +} + +get_list_of_drained_vms(){ + drained_vms="$(condor_status -any -af Name State Activity | grep 'Drained' | awk '{print $1}' | sed -e 's/slot.*@//' -e 's/\.novalocal//')" + echo -e "$drained_vms" +} + +get_stuck_vms(){ + VGCNBWC_WORKERS="$($OPENSTACK_CMD server list --name 'vgcnbwc-worker-*' -c Networks -c Name -f json | jq -r '.[] | "\(.Networks.bioinf[0]) \(.Name)"')" + non_htcondor_node_names=$(get_list_of_stuck_vms) + for i in $non_htcondor_node_names; do + echo -e "$VGCNBWC_WORKERS" | grep "$i" + done +} + +get_errored_vms(){ + VGCNBWC_WORKERS="$($OPENSTACK_CMD server list --name 'vgcnbwc-worker-*' -c Networks -c Name -f json | jq -r '.[] | "\(.Networks.bioinf[0]) \(.Name)"')" + errored_vms=$(get_list_of_errored_vms) + for i in $errored_vms; do + echo -e "$VGCNBWC_WORKERS" | grep "$i" + done +} + +get_flavored_vms(){ + $OPENSTACK_CMD server list --flavor $1 -c Networks -c Name -f json | jq -r '.[] | "\(.Networks.bioinf[0]) \(.Name)"' +} + +hard_restart_stuck_vms(){ + non_htcondor_node_names=$(get_list_of_stuck_vms) + for i in $non_htcondor_node_names; do + echo "===>Hard rebooting host: $i<===" + $OPENSTACK_CMD server reboot $i --hard + done +} + +remove_errored_vms(){ + errored_vms=$(get_list_of_errored_vms) + for i in $errored_vms; do + echo "===>Deleting errored host: $i<===" + $OPENSTACK_CMD server delete $i + done +} + +remove_stuck_vms(){ + non_htcondor_node_names=$(get_list_of_stuck_vms) + for i in $non_htcondor_node_names; do + echo "===>Deleting stuck host: $i<===" + $OPENSTACK_CMD server delete $i + done +} + +remove_drained_vms(){ + drained_vms=$(get_list_of_drained_vms) + for i in $drained_vms; do + echo "===>Deleting drained host: $i<===" + $OPENSTACK_CMD server delete $i + done +} + +remove_list_of_vms(){ + for i in $1; do + echo "===>Deleting host: $i<===" + $OPENSTACK_CMD server delete $i + done +} + +# Parse command line argument and execute the appropriate function +if [ $# -eq 0 ]; then + echo "Usage: $0 [--get-stuck-vms|--get-errored-vms|--hard-restart-stuck-vms|--remove-errored-vms|--remove-stuck-vms]" + exit 1 +else + case "$1" in + --get-stuck-vms) + get_stuck_vms + ;; + --get-errored-vms) + get_errored_vms + ;; + --get-flavored-vms) + get_flavored_vms "$2" + ;; + --hard-restart-stuck-vms) + hard_restart_stuck_vms + ;; + --remove-errored-vms) + remove_errored_vms + ;; + --remove-stuck-vms) + remove_stuck_vms + ;; + --remove-drained-vms) + remove_drained_vms + ;; + --remove-list-of-vms) + remove_list_of_vms "$2" + ;; + *) + echo "Invalid option: $1" + echo "Usage: $0 [--get-stuck-vms|--get-errored-vms|--hard-restart-stuck-vms|--remove-errored-vms|--remove-stuck-vms]" + exit 1 + ;; + esac +fi diff --git a/maintenance.yml b/maintenance.yml index e22c373f1..4b0dbbb1d 100644 --- a/maintenance.yml +++ b/maintenance.yml @@ -68,6 +68,44 @@ mode: '0644' notify: - restart rsyslog + - name: Configure OpenStack credentials. + become: true + block: + - name: Get $XDG_CONFIG_HOME for root. + ansible.builtin.shell: + executable: /bin/bash + cmd: "set -u; (echo $XDG_CONFIG_HOME) 2> /dev/null || echo $HOME/.config" + changed_when: false + register: root_config + - name: Ensure $XDG_CONFIG_HOME exists. + become: true + ansible.builtin.file: + path: "{{ root_config.stdout }}" + state: directory + owner: root + group: root + - name: Ensure OpenStack configuration directory exists. + become: true + ansible.builtin.file: + path: "{{ root_config.stdout }}/openstack" + state: directory + owner: root + group: root + mode: "0700" + - name: Copy OpenStack credentials. + ansible.builtin.copy: + src: clouds.yaml + dest: "{{ root_config.stdout }}/openstack/clouds.yaml" + owner: root + group: root + mode: "0600" + - name: Copy script to manage VMs. + ansible.builtin.copy: + src: manage_vms + dest: /usr/local/bin/manage_vms + owner: root + group: root + mode: "0555" roles: - usegalaxy_eu.handy.os_setup - geerlingguy.repo-epel