From 58c38e2db0507677b6e8f1538921105dc984bb99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Enol=20Fern=C3=A1ndez?= Date: Mon, 24 Jun 2024 07:24:01 +0100 Subject: [PATCH] Use fedcloud secrets (#347) * Do not send long lived secrets to VM Instead use fedcloud secret command with a locker that can only be used 2 times (one for putting the secret, another for getting it) and for 1 hour max. * Move the ansible role to this repository Instead of having this externally managed as it is a pain to update and to keep properly aligned * Move to the embedded role --- .github/workflows/deploy.yml | 37 +++++++----- .github/workflows/molecule.yml | 26 ++++++++ deploy/cloud-init.yaml | 6 +- deploy/deploy.sh | 20 +++++-- deploy/playbook.yaml | 2 +- deploy/roles/catchall/defaults/main.yaml | 22 +++++++ .../catchall/molecule/default/converge.yml | 28 +++++++++ .../catchall/molecule/default/molecule.yml | 13 ++++ .../molecule/default/tests/test_default.py | 30 ++++++++++ deploy/roles/catchall/requirements.txt | 4 ++ deploy/roles/catchall/tasks/cloud-info.yml | 26 ++++++++ deploy/roles/catchall/tasks/docker.yml | 59 +++++++++++++++++++ deploy/roles/catchall/tasks/main.yml | 47 +++++++++++++++ .../catchall/templates/cloud-info.env.j2 | 14 +++++ .../catchall/templates/site-info.yaml.j2 | 9 +++ deploy/roles/catchall/vars/main.yml | 1 + 16 files changed, 321 insertions(+), 23 deletions(-) create mode 100644 .github/workflows/molecule.yml create mode 100644 deploy/roles/catchall/defaults/main.yaml create mode 100644 deploy/roles/catchall/molecule/default/converge.yml create mode 100644 deploy/roles/catchall/molecule/default/molecule.yml create mode 100644 deploy/roles/catchall/molecule/default/tests/test_default.py create mode 100644 deploy/roles/catchall/requirements.txt create mode 100644 deploy/roles/catchall/tasks/cloud-info.yml create mode 100644 deploy/roles/catchall/tasks/docker.yml create mode 100644 deploy/roles/catchall/tasks/main.yml create mode 100644 deploy/roles/catchall/templates/cloud-info.env.j2 create mode 100644 deploy/roles/catchall/templates/site-info.yaml.j2 create mode 100644 deploy/roles/catchall/vars/main.yml diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index e6f6a9ec..58c205e5 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -25,17 +25,17 @@ jobs: curl -L https://github.com/stedolan/jq/releases/download/jq-1.6/jq-linux64 > jq chmod +x jq pip install yq git+https://github.com/tdviet/fedcloudclient.git - curl -L https://github.com/oidc-mytoken/client/releases/download/v0.3.0/mytoken_0.3.0_Linux_x86_64.tar.gz \ - | tar -xzf - - mkdir ~/.mytoken - curl https://raw.githubusercontent.com/oidc-mytoken/client/master/config/example-config.yaml > ~/.mytoken/config.yaml - name: Configure providers access env: - MYTOKEN: ${{ secrets.MYTOKEN }} REFRESH_TOKEN: ${{ secrets.REFRESH_TOKEN }} + ANSIBLE_SECRETS: ${{ secrets.ANSIBLE_SECRETS }} run: | + # using parametric scopes to only have access to cloud.egi.eu VO + SCOPE="openid%20email%20profile%20voperson_id" + SCOPE="$SCOPE%20eduperson_entitlement:urn:mace:egi.eu:group:cloud.egi.eu:role=vm_operator#aai.egi.eu" + SCOPE="$SCOPE%20eduperson_entitlement:urn:mace:egi.eu:group:cloud.egi.eu:role=member#aai.egi.eu" OIDC_TOKEN=$(curl -X POST "https://aai.egi.eu/auth/realms/egi/protocol/openid-connect/token" \ - -d "grant_type=refresh_token&refresh_token=$REFRESH_TOKEN&client_id=token-portal&scope=openid%20email%20profile%20voperson_id%20eduperson_entitlement" \ + -d "grant_type=refresh_token&client_id=token-portal&scope=$SCOPE&refresh_token=$REFRESH_TOKEN" \ | jq -r ".access_token") echo "::add-mask::$OIDC_TOKEN" cd deploy @@ -54,6 +54,12 @@ jobs: sed -i -e "s/deploy_secret/$DEPLOY_OS_TOKEN/" clouds.yaml mkdir -p ~/.config/openstack touch ~/.config/openstack/secure.yaml + FEDCLOUD_LOCKER_TOKEN="$(fedcloud secret locker create \ + --oidc-access-token "$OIDC_TOKEN" \ + --ttl 1h --num-uses 2)" + echo "::add-mask::$FEDCLOUD_LOCKER_TOKEN" + fedcloud secret put --locker-token "$FEDCLOUD_LOCKER_TOKEN" deploy "data=$ANSIBLE_SECRETS" + echo "FEDCLOUD_LOCKER_TOKEN=$FEDCLOUD_LOCKER_TOKEN" >> "$GITHUB_ENV" - name: Setup Terraform uses: hashicorp/setup-terraform@v3 with: @@ -71,16 +77,13 @@ jobs: - name: Adjust cloud-init file env: SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} - ANSIBLE_SECRETS: ${{ secrets.ANSIBLE_SECRETS }} run: | cd deploy sed -i -e "s/%TOKEN%/${{ secrets.GITHUB_TOKEN }}/" cloud-init.yaml sed -i -e "s/%REF%/${{ github.sha }}/" cloud-init.yaml sed -i -e "s/%SHORT_REF%/$(git rev-parse --short HEAD)/" cloud-init.yaml sed -i -e "s#%SLACK_WEBHOOK_URL%#$SLACK_WEBHOOK_URL#" cloud-init.yaml - ANSIBLE_ENCODED_SECRETS="$(echo "$ANSIBLE_SECRETS" | base64 -w 0)" - echo "::add-mask::$ANSIBLE_ENCODED_SECRETS" - sed -i -e "s/%ANSIBLE_SECRETS%/$ANSIBLE_ENCODED_SECRETS/" cloud-init.yaml + sed -i -e "s/%FEDCLOUD_LOCKER_TOKEN%/$FEDCLOUD_LOCKER_TOKEN/" cloud-init.yaml sed -i -e "s/%CLOUDS_YAML%/$(base64 -w 0 < clouds.yaml)/" cloud-init.yaml - name: terraform plan id: plan @@ -132,19 +135,24 @@ jobs: terraform output -raw instance-id - name: Re-configure providers access env: - MYTOKEN: ${{ secrets.MYTOKEN }} REFRESH_TOKEN: ${{ secrets.REFRESH_TOKEN }} run: | + # using parametric scopes to only have access to cloud.egi.eu VO + SCOPE="openid%20email%20profile%20voperson_id" + SCOPE="$SCOPE%20eduperson_entitlement:urn:mace:egi.eu:group:cloud.egi.eu:role=vm_operator#aai.egi.eu" + SCOPE="$SCOPE%20eduperson_entitlement:urn:mace:egi.eu:group:cloud.egi.eu:role=member#aai.egi.eu" OIDC_TOKEN=$(curl -X POST "https://aai.egi.eu/auth/realms/egi/protocol/openid-connect/token" \ - -d "grant_type=refresh_token&refresh_token=$REFRESH_TOKEN&client_id=token-portal&scope=openid%20email%20profile%20voperson_id%20eduperson_entitlement" \ + -d "grant_type=refresh_token&refresh_token=$REFRESH_TOKEN&client_id=token-portal&scope=$SCOPE" \ | jq -r ".access_token") echo "::add-mask::$OIDC_TOKEN" cd deploy + git checkout -- clouds.yaml BACKEND_SITE="$(yq -r .clouds.backend.site clouds.yaml)" BACKEND_VO="$(yq -r .clouds.backend.vo clouds.yaml)" BACKEND_OS_TOKEN="$(fedcloud openstack token issue --oidc-access-token "$OIDC_TOKEN" \ --site "$BACKEND_SITE" --vo "$BACKEND_VO" -j | jq -r '.[0].Result.id')" echo "::add-mask::$BACKEND_OS_TOKEN" + echo "BACKEND_OS_TOKEN=$BACKEND_OS_TOKEN" >> "$GITHUB_ENV" sed -i -e "s/backend_secret/$BACKEND_OS_TOKEN/" clouds.yaml mkdir -p ~/.config/openstack touch ~/.config/openstack/secure.yaml @@ -156,10 +164,9 @@ jobs: max_attempts: 20 retry_wait_seconds: 40 command: > - set -x && pushd deploy && - openstack --os-cloud backend object save fedcloud-catchall "${{ steps.terraform-vm-id.outputs.stdout }}" && - openstack --os-cloud backend object delete fedcloud-catchall "${{ steps.terraform-vm-id.outputs.stdout }}" + openstack --os-cloud backend --os-token "$BACKEND_OS_TOKEN" object save fedcloud-catchall "${{ steps.terraform-vm-id.outputs.stdout }}" && + openstack --os-cloud backend --os-token "$BACKEND_OS_TOKEN" object delete fedcloud-catchall "${{ steps.terraform-vm-id.outputs.stdout }}" - name: Look for errors if: github.ref == 'refs/heads/main' && github.event_name == 'push' run: | diff --git a/.github/workflows/molecule.yml b/.github/workflows/molecule.yml new file mode 100644 index 00000000..45453d56 --- /dev/null +++ b/.github/workflows/molecule.yml @@ -0,0 +1,26 @@ +--- +name: Test role + +on: [push, pull_request] + +jobs: + molecule: + name: Runs molecule for the ansible role + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '>=3.9' + - name: Install dependencies + run: | + pip install molecule molecule-plugins[docker] pytest pytest-testinfra + - name: Test Ansible Bootstrap + run: | + cd deploy/roles/catchall + molecule test + env: + PY_COLORS: 1 diff --git a/deploy/cloud-init.yaml b/deploy/cloud-init.yaml index 06864b62..50b49fc5 100644 --- a/deploy/cloud-init.yaml +++ b/deploy/cloud-init.yaml @@ -23,6 +23,8 @@ packages: - ansible - jq - python3-openstackclient + - python3-pip + - python3.10-venv - retry write_files: @@ -40,14 +42,14 @@ write_files: SLACK_WEBHOOK_URL="%SLACK_WEBHOOK_URL%" COMMIT_SHA="%REF%" SHORT_COMMIT_SHA="%SHORT_REF%" + FEDCLOUD_LOCKER_TOKEN="%FEDCLOUD_LOCKER_TOKEN%" # get the repo code and untar at cwd curl -L -H "Accept: application/vnd.github.v3+raw" \ "https://api.github.com/repos/EGI-Federation/fedcloud-catchall-operations/tarball/$COMMIT_SHA" | \ tar xz --strip=1 cd deploy - echo "%ANSIBLE_SECRETS%" | base64 -d > ./secrets.yaml - ./deploy.sh "$OAUTH_TOKEN" "$COMMIT_SHA" \ + ./deploy.sh "$OAUTH_TOKEN" "$COMMIT_SHA" "$FEDCLOUD_LOCKER_TOKEN" \ "$SHORT_COMMIT_SHA" "$SLACK_WEBHOOK_URL" path: /var/lib/cloud/scripts/per-boot/deploy.sh permissions: '0755' diff --git a/deploy/deploy.sh b/deploy/deploy.sh index 252235fb..159be470 100755 --- a/deploy/deploy.sh +++ b/deploy/deploy.sh @@ -1,15 +1,25 @@ #!/bin/sh # Configure current host with ansible -# Expects as arguments the OAUTH_TOKEN, the COMMIT_SHA and the SLACK_WEBHOOK_URL - +# Expects as arguments: +# - a GitHub OAUTH_TOKEN to update the PR +# - the COMMIT_SHA +# - a locker for fedcloud secret to obtain the secrets +# - the SHORT_SHA used for pulling the docker image to use +# - a SLACK_WEBHOOK_URL to report on the status set -e OAUTH_TOKEN="$1" COMMIT_SHA="$2" -SHORT_SHA="$3" -SLACK_WEBHOOK_URL="$4" +FEDCLOUD_SECRET_LOCKER="$3" +SHORT_SHA="$4" +SLACK_WEBHOOK_URL="$5" + +# create a virtual env for fedcloudclient +python3 -m venv "$PWD/.venv" +"$PWD/.venv/bin/pip" install fedcloudclient -ansible-galaxy install git+https://github.com/EGI-Federation/ansible-role-fedcloud-ops.git +"$PWD/.venv/bin/fedcloud" secret get --locker-token "$FEDCLOUD_SECRET_LOCKER" \ + deploy data >secrets.yaml echo "cloud_info_image: \"ghcr.io/egi-federation/fedcloud-cloud-info:sha-$SHORT_SHA\"" >>extra-vars.yaml diff --git a/deploy/playbook.yaml b/deploy/playbook.yaml index 7d9ab46f..61f35053 100644 --- a/deploy/playbook.yaml +++ b/deploy/playbook.yaml @@ -2,7 +2,7 @@ - hosts: all become: true roles: - - role: ansible-role-fedcloud-ops + - role: catchall tags: ["all", "docker"] vars: site_config_dir: ../sites/ diff --git a/deploy/roles/catchall/defaults/main.yaml b/deploy/roles/catchall/defaults/main.yaml new file mode 100644 index 00000000..5f6605cc --- /dev/null +++ b/deploy/roles/catchall/defaults/main.yaml @@ -0,0 +1,22 @@ +# AMS details +ams_project: egi_cloud_info +ams_host: msg.argo.grnet.gr +ams_token: secret + +# check-in endpoint +checkin_token_endpoint: "https://aai.egi.eu/oidc/token" + +# docker image for the cloud info provider +cloud_info_image: egifedcloud/ops-cloud-info:latest + +# site configuration location +site_config_dir: sites + +# No site information as default +sites: [] + +cloud_info_cron: + minute: "4,34" + hour: "*" + weekday: "*" + timeout: "600" diff --git a/deploy/roles/catchall/molecule/default/converge.yml b/deploy/roles/catchall/molecule/default/converge.yml new file mode 100644 index 00000000..2f4ad264 --- /dev/null +++ b/deploy/roles/catchall/molecule/default/converge.yml @@ -0,0 +1,28 @@ +--- +- name: Converge + hosts: all + tasks: + - name: "Include catchall role" + ansible.builtin.include_role: + name: "catchall" + vars: + sites: + - endpoint: https://example.com:5000/v3/ + gocdb: foo.bar + vos: + - auth: + project_id: a123456 + name: sample_vo + - auth: + project_id: b987659 + name: vo.example.com + - endpoint: https://site.org:5000/v3/ + gocdb: bar.foo + region: region1 + vos: + - auth: + project_id: a123456 + name: sample_vo + - auth: + project_id: b987659 + name: vo.example.com diff --git a/deploy/roles/catchall/molecule/default/molecule.yml b/deploy/roles/catchall/molecule/default/molecule.yml new file mode 100644 index 00000000..fcdd0e07 --- /dev/null +++ b/deploy/roles/catchall/molecule/default/molecule.yml @@ -0,0 +1,13 @@ +--- +dependency: + name: galaxy +driver: + name: docker +platforms: + - name: instance + image: ubuntu:latest +lint: ansible-lint --exclude .github/ +provisioner: + name: ansible +verifier: + name: testinfra diff --git a/deploy/roles/catchall/molecule/default/tests/test_default.py b/deploy/roles/catchall/molecule/default/tests/test_default.py new file mode 100644 index 00000000..5fb4704b --- /dev/null +++ b/deploy/roles/catchall/molecule/default/tests/test_default.py @@ -0,0 +1,30 @@ +import hashlib +import os + +import testinfra.utils.ansible_runner + +testinfra_hosts = testinfra.utils.ansible_runner.AnsibleRunner( + os.environ["MOLECULE_INVENTORY_FILE"] +).get_hosts("all") + + +def test_site_files(host): + endpoint_hash = hashlib.md5(b"https://example.com:5000/v3/").hexdigest() + filename = "foo-bar-%s" % endpoint_hash + assert host.file("/etc/egi/cloud-info/").is_directory + assert host.file("/etc/egi/cloud-info/%s.yaml" % filename).exists + assert not host.file("/etc/egi/cloud-info/%s.env" % filename).contains("OS_REGION") + assert host.file("/etc/egi/cloud-info/%s.env" % filename).exists + assert host.file("/etc/cron.d/cloud-info-%s" % filename).exists + + +def test_site_files_region(host): + endpoint_hash = hashlib.md5(b"https://site.org:5000/v3/").hexdigest() + filename = "bar-foo-%s" % endpoint_hash + assert host.file("/etc/egi/cloud-info/").is_directory + assert host.file("/etc/egi/cloud-info/%s.yaml" % filename).exists + assert host.file("/etc/egi/cloud-info/%s.env" % filename).exists + assert host.file("/etc/egi/cloud-info/%s.env" % filename).contains( + "OS_REGION=region1" + ) + assert host.file("/etc/cron.d/cloud-info-%s" % filename).exists diff --git a/deploy/roles/catchall/requirements.txt b/deploy/roles/catchall/requirements.txt new file mode 100644 index 00000000..c745c484 --- /dev/null +++ b/deploy/roles/catchall/requirements.txt @@ -0,0 +1,4 @@ +molecule +molecule-plugins[docker] +pytest-testinfra +ansible-lint diff --git a/deploy/roles/catchall/tasks/cloud-info.yml b/deploy/roles/catchall/tasks/cloud-info.yml new file mode 100644 index 00000000..a3019ca4 --- /dev/null +++ b/deploy/roles/catchall/tasks/cloud-info.yml @@ -0,0 +1,26 @@ +--- +- name: Cloud-info config directory + ansible.builtin.template: + src: site-info.yaml.j2 + dest: /etc/egi/cloud-info/{{ filename }}.yaml + mode: "600" + +- name: Cloud info env + ansible.builtin.template: + src: cloud-info.env.j2 + dest: /etc/egi/cloud-info/{{ filename }}.env + mode: "600" + +- name: Cloud info cron + ansible.builtin.cron: + name: cloud-info-provider {{ site.gocdb }} + weekday: "{{ cloud_info_cron.weekday }}" + minute: "{{ cloud_info_cron.minute }}" + hour: "{{ cloud_info_cron.hour }}" + user: root + job: > + flock -n -w {{ cloud_info_cron.timeout }} /var/lock/cloud-info/{{ filename }} + docker run --rm -v /etc/egi:/etc/egi:ro + --env-file /etc/egi/cloud-info/{{ filename }}.env + {{ cloud_info_image }} >> /var/log/cloud-info/{{ filename }}.log 2>&1 + cron_file: "cloud-info-{{ filename }}" diff --git a/deploy/roles/catchall/tasks/docker.yml b/deploy/roles/catchall/tasks/docker.yml new file mode 100644 index 00000000..15405be8 --- /dev/null +++ b/deploy/roles/catchall/tasks/docker.yml @@ -0,0 +1,59 @@ +--- +- name: Install dependencies + ansible.builtin.apt: + name: + - apt-transport-https + - ca-certificates + - curl + - gnupg-agent + - software-properties-common + state: present + update_cache: true + +- name: Docker repo key + ansible.builtin.apt_key: + id: 9DC858229FC7DD38854AE2D88D81803C0EBFCD88 + url: https://download.docker.com/linux/ubuntu/gpg + state: present + +- name: Add docker repo + ansible.builtin.apt_repository: + repo: "deb [arch=amd64] https://download.docker.com/linux/ubuntu {{ ansible_distribution_release }} stable" + state: present + +- name: Install docker + ansible.builtin.apt: + name: + - docker-ce + - docker-ce-cli + - containerd.io + state: present + update_cache: true + +- name: Ensure docker config dir is present + ansible.builtin.file: + path: /etc/docker + state: directory + mode: "775" + +- name: Configure docker + ansible.builtin.copy: + # this is very CESNET-MCC specific, may be better to move as configurable + content: | + { + "mtu": 1442, + "exec-opts": ["native.cgroupdriver=systemd"], + "log-driver": "json-file", + "log-opts": { + "max-size": "100m" + }, + "storage-driver": "overlay2" + } + dest: /etc/docker/daemon.json + mode: "660" + +- name: Restart docker + ansible.builtin.systemd: + name: docker + state: restarted + daemon_reload: true diff --git a/deploy/roles/catchall/tasks/main.yml b/deploy/roles/catchall/tasks/main.yml new file mode 100644 index 00000000..09157e11 --- /dev/null +++ b/deploy/roles/catchall/tasks/main.yml @@ -0,0 +1,47 @@ +--- +- name: Ensure cron is available + ansible.builtin.apt: + name: cron + state: present + update_cache: true + +- name: Install docker + ansible.builtin.include_tasks: docker.yml + # this is only executed if explicity requested + tags: ['never', 'docker'] + +- name: Load site configuration + ansible.builtin.include_vars: + file: "{{ item }}" + name: "{{ 'site_incl_vars_' ~ item | basename | splitext | first }}" + with_fileglob: + - "{{ site_config_dir }}/*.yaml" + +- name: Set site configuration variable + ansible.builtin.set_fact: + sites: "{{ sites | default([]) + [lookup('vars', item)] }}" + loop: "{{ query('varnames', '^site_incl_vars_(.*)$') }}" + +- name: EGI configuration + block: + - name: Create directories + ansible.builtin.file: + path: "{{ item }}" + state: directory + mode: "755" + owner: 1999 + group: 1999 + loop: + - /etc/egi + - /etc/egi/vos + - /etc/egi/cloud-info + - /var/lock/cloud-info + - /var/log/cloud-info + - name: Site specific config + ansible.builtin.include_tasks: cloud-info.yml + vars: + site: "{{ item }}" + filename: "{{ item.gocdb | replace('.', '-') }}-{{ item.endpoint | hash('md5') }}" + with_items: + - "{{ sites }}" + when: sites is iterable diff --git a/deploy/roles/catchall/templates/cloud-info.env.j2 b/deploy/roles/catchall/templates/cloud-info.env.j2 new file mode 100644 index 00000000..9c7c6e2a --- /dev/null +++ b/deploy/roles/catchall/templates/cloud-info.env.j2 @@ -0,0 +1,14 @@ +AMS_HOST={{ ams_host }} +AMS_PROJECT={{ ams_project }} +AMS_TOKEN={{ ams_token }} +CHECKIN_OIDC_TOKEN={{ checkin_token_endpoint }} +CHECKIN_SECRETS_PATH=/etc/egi/vos/ +CLOUD_INFO_CONFIG=/etc/egi/cloud-info/{{ filename }}.yaml +OS_AUTH_TYPE=v3oidcaccesstoken +OS_AUTH_URL={{ site.endpoint }} +OS_IDENTITY_PROVIDER=egi.eu +OS_PROTOCOL={{ site.protocol | default('openid') }} +{% if "region" in site %} +OS_REGION={{ site.region }} +{% endif %} +SITE_NAME={{ site.gocdb }} diff --git a/deploy/roles/catchall/templates/site-info.yaml.j2 b/deploy/roles/catchall/templates/site-info.yaml.j2 new file mode 100644 index 00000000..293e3ccf --- /dev/null +++ b/deploy/roles/catchall/templates/site-info.yaml.j2 @@ -0,0 +1,9 @@ +site: + name: {{ site.gocdb }} + +compute: + shares: +{% for vo in site.vos %} + {{ vo.name }}: + {{ vo | default({}) | to_nice_yaml(indent=2) | indent(6) }} +{% endfor %} diff --git a/deploy/roles/catchall/vars/main.yml b/deploy/roles/catchall/vars/main.yml new file mode 100644 index 00000000..44b09048 --- /dev/null +++ b/deploy/roles/catchall/vars/main.yml @@ -0,0 +1 @@ +# Role variables