-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathopenedx_mongodb_clean.yml
111 lines (99 loc) · 4.8 KB
/
openedx_mongodb_clean.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# Ansible playbook to:
#
# Recover space on MongoDB for edx-platform by deleting unreachable, historical course content data.
#
# The intention of this playbook is to be run periodically with a greater schedule, like weekly or monthly.
# On a time where the platform isn't in use, like from the night of Saturday to Sunday.
#
# Example generate make plan, but don't delete anything:
# ansible-playbook -i nau-data/envs/development/hosts.ini openedx_mongodb_clean.yml
#
# Example generate make plan and delete/prune older information:
# ansible-playbook -i nau-data/envs/development/hosts.ini openedx_mongodb_clean.yml -e prune=true
#
---
- name: Exports information to excel from Open EDX LMS/Studio databases
# by default run on the last mongo server on the inventory
hosts: "mongo_docker_servers:&{{ target | default('mongo_docker_servers') }}[-1]"
become: True
gather_facts: True
vars:
_tubular_version: "{{ version | default('open-release/nutmeg.master') }}"
_tubular_repo: "{{ repo | default('https://github.com/openedx/tubular.git') }}"
_tubular_base_directory: /tmp/openedx-tubular
_tubular_git_directory: "/tmp/openedx-tubular/tubular"
_tubular_virtualenv_directory: "/tmp/openedx-tubular/venv"
_tubular_output_directory: "{{ _tubular_base_directory }}/output"
_tubular_make_plan: "{{ make_plan | default(true) | bool }}"
_tubular_prune: "{{ prune | default(false) | bool }}"
_tubular_docker_image: openedx_tubular:latest
_tubular_docker_run: docker run -t --rm --name openedx-tubular -v {{ _tubular_output_directory }}/:/data {{ _tubular_docker_image }}
_tubular_mongodb_connection: mongodb://{{ EDXAPP_MONGO_USER }}:{{ EDXAPP_MONGO_PASSWORD }}@{%- for host in EDXAPP_MONGO_HOSTS -%}{{ host }}:{{ EDXAPP_MONGO_PORT }}{%- if not loop.last -%},{%- endif -%}{%- endfor -%}/{{ EDXAPP_MONGO_DB_NAME }}
_tubular_timestamp: "{{ ansible_date_time.iso8601_basic_short }}" # Example: 20181025T120844
tasks:
- name: Clone tubular tool
git:
repo: "{{ _tubular_repo }}"
dest: "{{ _tubular_base_directory }}"
version: "{{ _tubular_version }}"
force: yes
- name: Create output folder
file:
path: "{{ _tubular_output_directory }}"
state: directory
# we need a custom Dockerfile because the one that exists in the repository is incompatible with
# the pip requirements
- name: Template new Dockerfile
copy:
dest: "{{ _tubular_base_directory }}/Dockerfile"
content: |
FROM python:3.8-buster
WORKDIR /app
ADD . /app
RUN pip install .
- name: Build docker image
shell:
docker build -t {{ _tubular_docker_image }} .
args:
chdir: "{{ _tubular_base_directory }}"
- name: Runs make_plan on structures.py to make a change plan
shell: |
{{ _tubular_docker_run }} structures.py --connection="{{ _tubular_mongodb_connection }}" --database-name {{ EDXAPP_MONGO_DB_NAME }} make_plan -v DEBUG /data/out.json --details /data/details.txt --delay 2000 --batch-size 1000
register: make_plan_output
when: _tubular_make_plan
- name: Print stdout of make_plan output
debug:
msg: "{{ make_plan_output.stdout_lines }}"
when: make_plan_output.stdout_lines is defined and _tubular_make_plan
- name: Print stderr of make_plan output
debug:
msg: "{{ make_plan_output.stderr_lines }}"
when: make_plan_output.stderr_lines is defined and _tubular_make_plan
- name: Install s3cmd
package:
name: s3cmd
state: present
when: _tubular_make_plan
# Save the out.json to a S3 bucket.
- name: Save the plan to S3
command: "s3cmd put {{ _tubular_output_directory }}/out.json s3://{{ AWS_S3_DBS_BACKUP_BUCKET_NAME }}/tubular/{{ _tubular_timestamp }}/{{ item }} --host {{ AWS_S3_DBS_BACKUP_HOST }} --host-bucket {{ AWS_S3_DBS_BACKUP_BUCKET_NAME }}"
environment:
AWS_ACCESS_KEY_ID: "{{ EDXAPP_BACKUPS_AWS_ACCESS_KEY_ID }}"
AWS_SECRET_ACCESS_KEY: "{{ EDXAPP_BACKUPS_AWS_SECRET_ACCESS_KEY }}"
when: _tubular_make_plan
with_items:
- out.json
- details.txt
- name: Runs prune on the structures.py to delete/prune the older course structures
shell: |
{{ _tubular_docker_run }} structures.py --connection="{{ _tubular_mongodb_connection }}" --database-name {{ EDXAPP_MONGO_DB_NAME }} prune /data/out.json --delay 2000 --batch-size 1000
register: prune_output
when: _tubular_prune
- name: Print stdout of prune output
debug:
msg: "{{ prune_output.stdout_lines }}"
when: prune_output.stdout_lines is defined and _tubular_prune
- name: Print stderr of prune output
debug:
msg: "{{ prune_output.stderr_lines }}"
when: prune_output.stderr_lines is defined and _tubular_prune