From eaceb4fdcd01729c65ed60f0a321a1fb5ced59a2 Mon Sep 17 00:00:00 2001 From: jeff-dude Date: Tue, 28 Nov 2023 18:44:09 -0500 Subject: [PATCH 1/8] initiate script --- scripts/drop_spells_removed_from_spellbook.py | 161 ++++++++++++++++++ 1 file changed, 161 insertions(+) create mode 100644 scripts/drop_spells_removed_from_spellbook.py diff --git a/scripts/drop_spells_removed_from_spellbook.py b/scripts/drop_spells_removed_from_spellbook.py new file mode 100644 index 00000000000..74656119724 --- /dev/null +++ b/scripts/drop_spells_removed_from_spellbook.py @@ -0,0 +1,161 @@ +import subprocess +import json +import os + +env = 'dev' # Change this based on your needs + +# step 1: List dbt models and output in JSON format +dbt_command = ['dbt', 'ls', '--resource-type', 'model', '--output', 'json'] +dbt_output_bytes = subprocess.check_output(dbt_command) +dbt_output_str = dbt_output_bytes.decode('utf-8') +dbt_lines = dbt_output_str.splitlines() +dbt_json_objects = [line for line in dbt_lines if line.strip().startswith('{')] +dbt_data_list = [json.loads(obj) for obj in dbt_json_objects] + +# step 2: Iterate through each JSON object and categorize based on 'materialized' +view_models_dbt = [] +table_models_dbt = [] + +for data in dbt_data_list: + materialized = data.get('config', {}).get('materialized', '').lower() + schema = data.get('config', {}).get('schema', 'schema_not_found') + alias = data.get('config', {}).get('alias', 'alias_not_found') + + if materialized == 'view': + view_models_dbt.append(f"{schema}.{alias}") + elif materialized == 'table' or materialized == 'incremental': + table_models_dbt.append(f"{schema}.{alias}") + +# # Print the results for dbt models +# print("DBT View Models:") +# print("\n".join(view_models_dbt)) + +# print("\nDBT Table or Incremental Models:") +# print("\n".join(table_models_dbt)) + +# step 3: build function to run psql queries set below +def run_psql_command(sql_query): + psql_command = [ + 'psql', + '-h', postgres_host, + '-p', str(postgres_port), + '-U', postgres_user, + '-t', + '-c', sql_query + ] + + # Use subprocess.run without stdin and pass the password through the environment + psql_process = subprocess.run( + psql_command, + text=True, + env=dict(os.environ, PGPASSWORD=postgres_password), + capture_output=True # Capture both stdout and stderr + ) + + # Check for errors + if psql_process.returncode != 0: + print("Error executing psql command:") + print("psql_process.stderr:", psql_process.stderr) + return [] + + # Use stdout instead of psql_process.stdout + result_lines = psql_process.stdout.splitlines() + + # Remove the last element if it's an empty string + if not result_lines[-1]: + result_lines.pop() + + return result_lines + +# step 4: Determine PostgreSQL connection details and SQL queries based on the environment +# Set common PostgreSQL connection details +postgres_port = 5432 +postgres_user = "hive" + +if env == 'dev': + postgres_host = "dev-spellbook-metastore-db" + postgres_password = os.environ.get("SH_METASTORE_DEV_PASS") + + # SQL query for dev environment (tables) + tables_sql_query = f""" + SELECT DISTINCT REPLACE(d."NAME", 'dbt_jeff_dude_', '') || '.' || t."TBL_NAME" + FROM "TBLS" t + JOIN "DBS" d ON d."DB_ID" = t."DB_ID" + WHERE t."OWNER_TYPE" = 'USER' + AND t."OWNER" = 'admin' + AND t."TBL_TYPE" = 'EXTERNAL_TABLE' + AND d."NAME" LIKE 'dbt_jeff_dude_%'; + """ + + # SQL query for dev environment (views) + views_sql_query = f""" + SELECT DISTINCT REPLACE(d."NAME", 'dbt_jeff_dude_', '') || '.' || t."TBL_NAME" + FROM "TBLS" t + JOIN "DBS" d ON d."DB_ID" = t."DB_ID" + WHERE t."OWNER_TYPE" = 'USER' + AND t."OWNER" = 'admin' + AND t."TBL_TYPE" = 'VIRTUAL_VIEW' + AND d."NAME" LIKE 'dbt_jeff_dude_%'; + """ + +elif env == 'prod': + postgres_host = "prod-metastore-db" + postgres_password = os.environ.get("SH_METASTORE_PROD_PASS") + + # SQL query for prod environment (tables) + tables_sql_query = f""" + SELECT DISTINCT d."NAME" || '.' || t."TBL_NAME" + FROM "TBLS" t + JOIN "DBS" d ON d."DB_ID" = t."DB_ID" + JOIN "TABLE_PARAMS" tp ON tp."TBL_ID" = t."TBL_ID" + WHERE tp."PARAM_KEY" = 'dune.data_explorer.category' + AND tp."PARAM_VALUE" = 'abstraction' + AND t."OWNER_TYPE" = 'USER' + AND t."OWNER" = 'spellbook' + AND t."TBL_TYPE" = 'EXTERNAL_TABLE'; + """ + + # SQL query for prod environment (views) + views_sql_query = f""" + SELECT DISTINCT d."NAME" || '.' || t."TBL_NAME" + FROM "TBLS" t + JOIN "DBS" d ON d."DB_ID" = t."DB_ID" + JOIN "TABLE_PARAMS" tp ON tp."TBL_ID" = t."TBL_ID" + WHERE tp."PARAM_KEY" = 'dune.data_explorer.category' + AND tp."PARAM_VALUE" = 'abstraction' + AND t."OWNER_TYPE" = 'USER' + AND t."OWNER" = 'spellbook' + AND t."TBL_TYPE" = 'VIRTUAL_VIEW'; + """ + +else: + raise ValueError("Invalid environment. Use 'dev' or 'prod'.") + + +# step 5: run psql query for tables +psql_tables = run_psql_command(tables_sql_query) +# Trim whitespace from PostgreSQL tables +psql_tables = [table.strip() for table in psql_tables] + +# step 6: run psql query for views +psql_views = run_psql_command(views_sql_query) +# Trim whitespace from PostgreSQL views +psql_views = [view.strip() for view in psql_views] + +# # Print the results for PostgreSQL tables +# print("\nPostgreSQL views:") +# print("\n".join(psql_views)) + +# step 7: Compare psql_views vs. view_models_dbt +print("\nViews in PostgreSQL but not in DBT:") +for view in psql_views: + if view not in view_models_dbt: + # Add a print statement for dropping the view + print(f"DROP VIEW IF EXISTS {view};") + +# step 8: Compare psql_tables vs. table_models_dbt +print("\nTables in PostgreSQL but not in DBT:") +for table in psql_tables: + if table not in table_models_dbt: + # Add a print statement for dropping the table + print(f"DROP TABLE IF EXISTS {table};") \ No newline at end of file From 9062c6d22fea613859d4935cef23dee006227165 Mon Sep 17 00:00:00 2001 From: jeff-dude Date: Tue, 28 Nov 2023 18:55:29 -0500 Subject: [PATCH 2/8] reformat script to be clean(er) --- scripts/drop_spells_removed_from_spellbook.py | 219 ++++++++---------- 1 file changed, 99 insertions(+), 120 deletions(-) diff --git a/scripts/drop_spells_removed_from_spellbook.py b/scripts/drop_spells_removed_from_spellbook.py index 74656119724..e4b61820a4a 100644 --- a/scripts/drop_spells_removed_from_spellbook.py +++ b/scripts/drop_spells_removed_from_spellbook.py @@ -2,160 +2,139 @@ import json import os -env = 'dev' # Change this based on your needs +# Constants +DEV_ENV = 'dev' +PROD_ENV = 'prod' + +def generate_tables_query(env): + if env == DEV_ENV: + return """ + SELECT DISTINCT REPLACE(d."NAME", 'dbt_jeff_dude_', '') || '.' || t."TBL_NAME" + FROM "TBLS" t + JOIN "DBS" d ON d."DB_ID" = t."DB_ID" + WHERE t."OWNER_TYPE" = 'USER' + AND t."OWNER" = 'admin' + AND t."TBL_TYPE" = 'EXTERNAL_TABLE' + AND d."NAME" LIKE 'dbt_jeff_dude_%'; + """ + elif env == PROD_ENV: + return """ + SELECT DISTINCT d."NAME" || '.' || t."TBL_NAME" + FROM "TBLS" t + JOIN "DBS" d ON d."DB_ID" = t."DB_ID" + JOIN "TABLE_PARAMS" tp ON tp."TBL_ID" = t."TBL_ID" + WHERE tp."PARAM_KEY" = 'dune.data_explorer.category' + AND tp."PARAM_VALUE" = 'abstraction' + AND t."OWNER_TYPE" = 'USER' + AND t."OWNER" = 'spellbook' + AND t."TBL_TYPE" = 'EXTERNAL_TABLE'; + """ + else: + raise ValueError("Invalid environment. Use 'dev' or 'prod'.") + +def generate_views_query(env): + if env == DEV_ENV: + return """ + SELECT DISTINCT REPLACE(d."NAME", 'dbt_jeff_dude_', '') || '.' || t."TBL_NAME" + FROM "TBLS" t + JOIN "DBS" d ON d."DB_ID" = t."DB_ID" + WHERE t."OWNER_TYPE" = 'USER' + AND t."OWNER" = 'admin' + AND t."TBL_TYPE" = 'VIRTUAL_VIEW' + AND d."NAME" LIKE 'dbt_jeff_dude_%'; + """ + elif env == PROD_ENV: + return """ + SELECT DISTINCT d."NAME" || '.' || t."TBL_NAME" + FROM "TBLS" t + JOIN "DBS" d ON d."DB_ID" = t."DB_ID" + JOIN "TABLE_PARAMS" tp ON tp."TBL_ID" = t."TBL_ID" + WHERE tp."PARAM_KEY" = 'dune.data_explorer.category' + AND tp."PARAM_VALUE" = 'abstraction' + AND t."OWNER_TYPE" = 'USER' + AND t."OWNER" = 'spellbook' + AND t."TBL_TYPE" = 'VIRTUAL_VIEW'; + """ + else: + raise ValueError("Invalid environment. Use 'dev' or 'prod'.") + +def run_psql_command(sql_query, env): + postgres_host = "dev-spellbook-metastore-db" if env == DEV_ENV else "prod-metastore-db" + postgres_password = os.environ.get("SH_METASTORE_DEV_PASS") if env == DEV_ENV else os.environ.get("SH_METASTORE_PROD_PASS") -# step 1: List dbt models and output in JSON format -dbt_command = ['dbt', 'ls', '--resource-type', 'model', '--output', 'json'] -dbt_output_bytes = subprocess.check_output(dbt_command) -dbt_output_str = dbt_output_bytes.decode('utf-8') -dbt_lines = dbt_output_str.splitlines() -dbt_json_objects = [line for line in dbt_lines if line.strip().startswith('{')] -dbt_data_list = [json.loads(obj) for obj in dbt_json_objects] - -# step 2: Iterate through each JSON object and categorize based on 'materialized' -view_models_dbt = [] -table_models_dbt = [] - -for data in dbt_data_list: - materialized = data.get('config', {}).get('materialized', '').lower() - schema = data.get('config', {}).get('schema', 'schema_not_found') - alias = data.get('config', {}).get('alias', 'alias_not_found') - - if materialized == 'view': - view_models_dbt.append(f"{schema}.{alias}") - elif materialized == 'table' or materialized == 'incremental': - table_models_dbt.append(f"{schema}.{alias}") - -# # Print the results for dbt models -# print("DBT View Models:") -# print("\n".join(view_models_dbt)) - -# print("\nDBT Table or Incremental Models:") -# print("\n".join(table_models_dbt)) - -# step 3: build function to run psql queries set below -def run_psql_command(sql_query): psql_command = [ 'psql', '-h', postgres_host, - '-p', str(postgres_port), - '-U', postgres_user, + '-p', '5432', + '-U', 'hive', '-t', '-c', sql_query ] - # Use subprocess.run without stdin and pass the password through the environment psql_process = subprocess.run( psql_command, text=True, env=dict(os.environ, PGPASSWORD=postgres_password), - capture_output=True # Capture both stdout and stderr + capture_output=True ) - # Check for errors if psql_process.returncode != 0: print("Error executing psql command:") print("psql_process.stderr:", psql_process.stderr) return [] - # Use stdout instead of psql_process.stdout result_lines = psql_process.stdout.splitlines() - # Remove the last element if it's an empty string if not result_lines[-1]: result_lines.pop() return result_lines -# step 4: Determine PostgreSQL connection details and SQL queries based on the environment -# Set common PostgreSQL connection details -postgres_port = 5432 -postgres_user = "hive" - -if env == 'dev': - postgres_host = "dev-spellbook-metastore-db" - postgres_password = os.environ.get("SH_METASTORE_DEV_PASS") - - # SQL query for dev environment (tables) - tables_sql_query = f""" - SELECT DISTINCT REPLACE(d."NAME", 'dbt_jeff_dude_', '') || '.' || t."TBL_NAME" - FROM "TBLS" t - JOIN "DBS" d ON d."DB_ID" = t."DB_ID" - WHERE t."OWNER_TYPE" = 'USER' - AND t."OWNER" = 'admin' - AND t."TBL_TYPE" = 'EXTERNAL_TABLE' - AND d."NAME" LIKE 'dbt_jeff_dude_%'; - """ - - # SQL query for dev environment (views) - views_sql_query = f""" - SELECT DISTINCT REPLACE(d."NAME", 'dbt_jeff_dude_', '') || '.' || t."TBL_NAME" - FROM "TBLS" t - JOIN "DBS" d ON d."DB_ID" = t."DB_ID" - WHERE t."OWNER_TYPE" = 'USER' - AND t."OWNER" = 'admin' - AND t."TBL_TYPE" = 'VIRTUAL_VIEW' - AND d."NAME" LIKE 'dbt_jeff_dude_%'; - """ - -elif env == 'prod': - postgres_host = "prod-metastore-db" - postgres_password = os.environ.get("SH_METASTORE_PROD_PASS") - - # SQL query for prod environment (tables) - tables_sql_query = f""" - SELECT DISTINCT d."NAME" || '.' || t."TBL_NAME" - FROM "TBLS" t - JOIN "DBS" d ON d."DB_ID" = t."DB_ID" - JOIN "TABLE_PARAMS" tp ON tp."TBL_ID" = t."TBL_ID" - WHERE tp."PARAM_KEY" = 'dune.data_explorer.category' - AND tp."PARAM_VALUE" = 'abstraction' - AND t."OWNER_TYPE" = 'USER' - AND t."OWNER" = 'spellbook' - AND t."TBL_TYPE" = 'EXTERNAL_TABLE'; - """ - - # SQL query for prod environment (views) - views_sql_query = f""" - SELECT DISTINCT d."NAME" || '.' || t."TBL_NAME" - FROM "TBLS" t - JOIN "DBS" d ON d."DB_ID" = t."DB_ID" - JOIN "TABLE_PARAMS" tp ON tp."TBL_ID" = t."TBL_ID" - WHERE tp."PARAM_KEY" = 'dune.data_explorer.category' - AND tp."PARAM_VALUE" = 'abstraction' - AND t."OWNER_TYPE" = 'USER' - AND t."OWNER" = 'spellbook' - AND t."TBL_TYPE" = 'VIRTUAL_VIEW'; - """ - -else: - raise ValueError("Invalid environment. Use 'dev' or 'prod'.") - - -# step 5: run psql query for tables -psql_tables = run_psql_command(tables_sql_query) -# Trim whitespace from PostgreSQL tables -psql_tables = [table.strip() for table in psql_tables] +# Main script +env = PROD_ENV # Change this based on your needs -# step 6: run psql query for views -psql_views = run_psql_command(views_sql_query) -# Trim whitespace from PostgreSQL views -psql_views = [view.strip() for view in psql_views] +# Step 1: List dbt models and output in JSON format +dbt_command = ['dbt', 'ls', '--resource-type', 'model', '--output', 'json'] +dbt_output_bytes = subprocess.check_output(dbt_command) +dbt_output_str = dbt_output_bytes.decode('utf-8') +dbt_lines = dbt_output_str.splitlines() +dbt_json_objects = [line for line in dbt_lines if line.strip().startswith('{')] +dbt_data_list = [json.loads(obj) for obj in dbt_json_objects] -# # Print the results for PostgreSQL tables -# print("\nPostgreSQL views:") -# print("\n".join(psql_views)) +# Iterate through each JSON object and categorize based on 'materialized' +view_models_dbt = [] +table_models_dbt = [] + +for data in dbt_data_list: + materialized = data.get('config', {}).get('materialized', '').lower() + schema = data.get('config', {}).get('schema', 'schema_not_found') + alias = data.get('config', {}).get('alias', 'alias_not_found') + + if materialized == 'view': + view_models_dbt.append(f"{schema}.{alias}") + elif materialized == 'table' or materialized == 'incremental': + table_models_dbt.append(f"{schema}.{alias}") + +# Generate SQL queries +tables_sql_query = generate_tables_query(env) +views_sql_query = generate_views_query(env) + +# Run psql queries for tables and views +psql_tables = run_psql_command(tables_sql_query, env) +psql_views = run_psql_command(views_sql_query, env) + +# Trim whitespace from PostgreSQL tables and views +psql_tables = [table.strip() for table in psql_tables] +psql_views = [view.strip() for view in psql_views] -# step 7: Compare psql_views vs. view_models_dbt +# Compare psql_views vs. view_models_dbt print("\nViews in PostgreSQL but not in DBT:") for view in psql_views: if view not in view_models_dbt: - # Add a print statement for dropping the view print(f"DROP VIEW IF EXISTS {view};") -# step 8: Compare psql_tables vs. table_models_dbt +# Compare psql_tables vs. table_models_dbt print("\nTables in PostgreSQL but not in DBT:") for table in psql_tables: if table not in table_models_dbt: - # Add a print statement for dropping the table - print(f"DROP TABLE IF EXISTS {table};") \ No newline at end of file + print(f"DROP TABLE IF EXISTS {table};") From 8778f88012fd006bdd937c9e79ad318754046372 Mon Sep 17 00:00:00 2001 From: jeff-dude Date: Wed, 29 Nov 2023 14:08:59 -0500 Subject: [PATCH 3/8] use dev, focus on one spell --- scripts/drop_spells_removed_from_spellbook.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/drop_spells_removed_from_spellbook.py b/scripts/drop_spells_removed_from_spellbook.py index e4b61820a4a..a9dbf8bfb83 100644 --- a/scripts/drop_spells_removed_from_spellbook.py +++ b/scripts/drop_spells_removed_from_spellbook.py @@ -15,7 +15,7 @@ def generate_tables_query(env): WHERE t."OWNER_TYPE" = 'USER' AND t."OWNER" = 'admin' AND t."TBL_TYPE" = 'EXTERNAL_TABLE' - AND d."NAME" LIKE 'dbt_jeff_dude_%'; + AND d."NAME" LIKE 'dbt_jeff_dude_carbonhood_%'; """ elif env == PROD_ENV: return """ @@ -41,7 +41,7 @@ def generate_views_query(env): WHERE t."OWNER_TYPE" = 'USER' AND t."OWNER" = 'admin' AND t."TBL_TYPE" = 'VIRTUAL_VIEW' - AND d."NAME" LIKE 'dbt_jeff_dude_%'; + AND d."NAME" LIKE 'dbt_jeff_dude_carbonhood_%'; """ elif env == PROD_ENV: return """ @@ -91,7 +91,7 @@ def run_psql_command(sql_query, env): return result_lines # Main script -env = PROD_ENV # Change this based on your needs +env = DEV_ENV # Change this based on your needs # Step 1: List dbt models and output in JSON format dbt_command = ['dbt', 'ls', '--resource-type', 'model', '--output', 'json'] From 36277c2d68560686de57a1a9f970093e60f6b116 Mon Sep 17 00:00:00 2001 From: jeff-dude Date: Wed, 29 Nov 2023 17:40:41 -0500 Subject: [PATCH 4/8] update schema filter --- scripts/drop_spells_removed_from_spellbook.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/drop_spells_removed_from_spellbook.py b/scripts/drop_spells_removed_from_spellbook.py index a9dbf8bfb83..df2376ade96 100644 --- a/scripts/drop_spells_removed_from_spellbook.py +++ b/scripts/drop_spells_removed_from_spellbook.py @@ -15,7 +15,7 @@ def generate_tables_query(env): WHERE t."OWNER_TYPE" = 'USER' AND t."OWNER" = 'admin' AND t."TBL_TYPE" = 'EXTERNAL_TABLE' - AND d."NAME" LIKE 'dbt_jeff_dude_carbonhood_%'; + AND d."NAME" LIKE 'dbt_jeff_dude_dex%'; """ elif env == PROD_ENV: return """ @@ -41,7 +41,7 @@ def generate_views_query(env): WHERE t."OWNER_TYPE" = 'USER' AND t."OWNER" = 'admin' AND t."TBL_TYPE" = 'VIRTUAL_VIEW' - AND d."NAME" LIKE 'dbt_jeff_dude_carbonhood_%'; + AND d."NAME" LIKE 'dbt_jeff_dude_dex%'; """ elif env == PROD_ENV: return """ @@ -95,6 +95,7 @@ def run_psql_command(sql_query, env): # Step 1: List dbt models and output in JSON format dbt_command = ['dbt', 'ls', '--resource-type', 'model', '--output', 'json'] +# dbt_command = ['dbt', 'ls', '--resource-type', 'model', '--output', 'json', '--exclude', 'dex_trades_beta'] dbt_output_bytes = subprocess.check_output(dbt_command) dbt_output_str = dbt_output_bytes.decode('utf-8') dbt_lines = dbt_output_str.splitlines() From 8756bc1d564c5e4860fcdd7ffd10c32b9f492567 Mon Sep 17 00:00:00 2001 From: jeff-dude Date: Tue, 5 Dec 2023 18:24:35 -0500 Subject: [PATCH 5/8] rename script, create gh action --- .github/workflows/drop_deprecated_spells.yml | 40 +++++ scripts/drop_deprecated_spells.py | 109 ++++++++++++++ scripts/drop_spells_removed_from_spellbook.py | 141 ------------------ 3 files changed, 149 insertions(+), 141 deletions(-) create mode 100644 .github/workflows/drop_deprecated_spells.yml create mode 100644 scripts/drop_deprecated_spells.py delete mode 100644 scripts/drop_spells_removed_from_spellbook.py diff --git a/.github/workflows/drop_deprecated_spells.yml b/.github/workflows/drop_deprecated_spells.yml new file mode 100644 index 00000000000..9711245ffb9 --- /dev/null +++ b/.github/workflows/drop_deprecated_spells.yml @@ -0,0 +1,40 @@ +# .github/workflows/run_python_script.yaml +name: Drop Deprecated Spells + +on: + schedule: + - cron: '0 14 * * 1' # At 9 AM EST every Monday (14:00 UTC) + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + run-script: + runs-on: [ self-hosted, linux, spellbook-trino-ci ] + timeout-minutes: 10 # Timeout set to 10 minutes + + steps: + - name: Check out repository code + uses: actions/checkout@v3 + with: + ref: main # Specify the main branch + + - name: Setup Python environment + uses: actions/setup-python@v3 + with: + python-version: '3.9' # or whichever version your script requires + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install psycopg2-binary # Add other dependencies here + + - name: Set environment variables + run: | + echo "SH_METASTORE_PROD_PASS=${{ secrets.SH_METASTORE_PROD_PASS }}" >> $GITHUB_ENV + # Set any other necessary environment variables here + + - name: Run Python script + run: python drop_deprecated_spells.py diff --git a/scripts/drop_deprecated_spells.py b/scripts/drop_deprecated_spells.py new file mode 100644 index 00000000000..f65bb58a791 --- /dev/null +++ b/scripts/drop_deprecated_spells.py @@ -0,0 +1,109 @@ +import subprocess +import json +import os + +def generate_tables_query(): + return """ + SELECT DISTINCT d."NAME" || '.' || t."TBL_NAME" + FROM "TBLS" t + JOIN "DBS" d ON d."DB_ID" = t."DB_ID" + JOIN "TABLE_PARAMS" tp ON tp."TBL_ID" = t."TBL_ID" + WHERE tp."PARAM_KEY" = 'dune.data_explorer.category' + AND tp."PARAM_VALUE" = 'abstraction' + AND t."OWNER_TYPE" = 'USER' + AND t."OWNER" = 'spellbook' + AND t."TBL_TYPE" = 'EXTERNAL_TABLE'; + """ + +def generate_views_query(): + return """ + SELECT DISTINCT d."NAME" || '.' || t."TBL_NAME" + FROM "TBLS" t + JOIN "DBS" d ON d."DB_ID" = t."DB_ID" + JOIN "TABLE_PARAMS" tp ON tp."TBL_ID" = t."TBL_ID" + WHERE tp."PARAM_KEY" = 'dune.data_explorer.category' + AND tp."PARAM_VALUE" = 'abstraction' + AND t."OWNER_TYPE" = 'USER' + AND t."OWNER" = 'spellbook' + AND t."TBL_TYPE" = 'VIRTUAL_VIEW'; + """ + +def run_psql_command(sql_query): + postgres_host = "prod-metastore-db" + postgres_password = os.environ.get("SH_METASTORE_PROD_PASS") + + psql_command = [ + 'psql', + '-h', postgres_host, + '-p', '5432', + '-U', 'hive', + '-t', + '-c', sql_query + ] + + psql_process = subprocess.run( + psql_command, + text=True, + env=dict(os.environ, PGPASSWORD=postgres_password), + capture_output=True + ) + + if psql_process.returncode != 0: + print("Error executing psql command:") + print("psql_process.stderr:", psql_process.stderr) + return [] + + result_lines = psql_process.stdout.splitlines() + + if not result_lines[-1]: + result_lines.pop() + + return result_lines + +# Main script + +# Step 1: List dbt models and output in JSON format +dbt_command = ['dbt', 'ls', '--resource-type', 'model', '--output', 'json'] +dbt_output_bytes = subprocess.check_output(dbt_command) +dbt_output_str = dbt_output_bytes.decode('utf-8') +dbt_lines = dbt_output_str.splitlines() +dbt_json_objects = [line for line in dbt_lines if line.strip().startswith('{')] +dbt_data_list = [json.loads(obj) for obj in dbt_json_objects] + +# Iterate through each JSON object and categorize based on 'materialized' +view_models_dbt = [] +table_models_dbt = [] + +for data in dbt_data_list: + materialized = data.get('config', {}).get('materialized', '').lower() + schema = data.get('config', {}).get('schema', 'schema_not_found') + alias = data.get('config', {}).get('alias', 'alias_not_found') + + if materialized == 'view': + view_models_dbt.append(f"{schema}.{alias}") + elif materialized == 'table' or materialized == 'incremental': + table_models_dbt.append(f"{schema}.{alias}") + +# Generate SQL queries +tables_sql_query = generate_tables_query() +views_sql_query = generate_views_query() + +# Run psql queries for tables and views +psql_tables = run_psql_command(tables_sql_query) +psql_views = run_psql_command(views_sql_query) + +# Trim whitespace from PostgreSQL tables and views +psql_tables = [table.strip() for table in psql_tables] +psql_views = [view.strip() for view in psql_views] + +# Compare psql_views vs. view_models_dbt +print("\nViews in PostgreSQL but not in DBT:") +for view in psql_views: + if view not in view_models_dbt: + print(f"DROP VIEW IF EXISTS {view};") + +# Compare psql_tables vs. table_models_dbt +print("\nTables in PostgreSQL but not in DBT:") +for table in psql_tables: + if table not in table_models_dbt: + print(f"DROP TABLE IF EXISTS {table};") diff --git a/scripts/drop_spells_removed_from_spellbook.py b/scripts/drop_spells_removed_from_spellbook.py deleted file mode 100644 index df2376ade96..00000000000 --- a/scripts/drop_spells_removed_from_spellbook.py +++ /dev/null @@ -1,141 +0,0 @@ -import subprocess -import json -import os - -# Constants -DEV_ENV = 'dev' -PROD_ENV = 'prod' - -def generate_tables_query(env): - if env == DEV_ENV: - return """ - SELECT DISTINCT REPLACE(d."NAME", 'dbt_jeff_dude_', '') || '.' || t."TBL_NAME" - FROM "TBLS" t - JOIN "DBS" d ON d."DB_ID" = t."DB_ID" - WHERE t."OWNER_TYPE" = 'USER' - AND t."OWNER" = 'admin' - AND t."TBL_TYPE" = 'EXTERNAL_TABLE' - AND d."NAME" LIKE 'dbt_jeff_dude_dex%'; - """ - elif env == PROD_ENV: - return """ - SELECT DISTINCT d."NAME" || '.' || t."TBL_NAME" - FROM "TBLS" t - JOIN "DBS" d ON d."DB_ID" = t."DB_ID" - JOIN "TABLE_PARAMS" tp ON tp."TBL_ID" = t."TBL_ID" - WHERE tp."PARAM_KEY" = 'dune.data_explorer.category' - AND tp."PARAM_VALUE" = 'abstraction' - AND t."OWNER_TYPE" = 'USER' - AND t."OWNER" = 'spellbook' - AND t."TBL_TYPE" = 'EXTERNAL_TABLE'; - """ - else: - raise ValueError("Invalid environment. Use 'dev' or 'prod'.") - -def generate_views_query(env): - if env == DEV_ENV: - return """ - SELECT DISTINCT REPLACE(d."NAME", 'dbt_jeff_dude_', '') || '.' || t."TBL_NAME" - FROM "TBLS" t - JOIN "DBS" d ON d."DB_ID" = t."DB_ID" - WHERE t."OWNER_TYPE" = 'USER' - AND t."OWNER" = 'admin' - AND t."TBL_TYPE" = 'VIRTUAL_VIEW' - AND d."NAME" LIKE 'dbt_jeff_dude_dex%'; - """ - elif env == PROD_ENV: - return """ - SELECT DISTINCT d."NAME" || '.' || t."TBL_NAME" - FROM "TBLS" t - JOIN "DBS" d ON d."DB_ID" = t."DB_ID" - JOIN "TABLE_PARAMS" tp ON tp."TBL_ID" = t."TBL_ID" - WHERE tp."PARAM_KEY" = 'dune.data_explorer.category' - AND tp."PARAM_VALUE" = 'abstraction' - AND t."OWNER_TYPE" = 'USER' - AND t."OWNER" = 'spellbook' - AND t."TBL_TYPE" = 'VIRTUAL_VIEW'; - """ - else: - raise ValueError("Invalid environment. Use 'dev' or 'prod'.") - -def run_psql_command(sql_query, env): - postgres_host = "dev-spellbook-metastore-db" if env == DEV_ENV else "prod-metastore-db" - postgres_password = os.environ.get("SH_METASTORE_DEV_PASS") if env == DEV_ENV else os.environ.get("SH_METASTORE_PROD_PASS") - - psql_command = [ - 'psql', - '-h', postgres_host, - '-p', '5432', - '-U', 'hive', - '-t', - '-c', sql_query - ] - - psql_process = subprocess.run( - psql_command, - text=True, - env=dict(os.environ, PGPASSWORD=postgres_password), - capture_output=True - ) - - if psql_process.returncode != 0: - print("Error executing psql command:") - print("psql_process.stderr:", psql_process.stderr) - return [] - - result_lines = psql_process.stdout.splitlines() - - if not result_lines[-1]: - result_lines.pop() - - return result_lines - -# Main script -env = DEV_ENV # Change this based on your needs - -# Step 1: List dbt models and output in JSON format -dbt_command = ['dbt', 'ls', '--resource-type', 'model', '--output', 'json'] -# dbt_command = ['dbt', 'ls', '--resource-type', 'model', '--output', 'json', '--exclude', 'dex_trades_beta'] -dbt_output_bytes = subprocess.check_output(dbt_command) -dbt_output_str = dbt_output_bytes.decode('utf-8') -dbt_lines = dbt_output_str.splitlines() -dbt_json_objects = [line for line in dbt_lines if line.strip().startswith('{')] -dbt_data_list = [json.loads(obj) for obj in dbt_json_objects] - -# Iterate through each JSON object and categorize based on 'materialized' -view_models_dbt = [] -table_models_dbt = [] - -for data in dbt_data_list: - materialized = data.get('config', {}).get('materialized', '').lower() - schema = data.get('config', {}).get('schema', 'schema_not_found') - alias = data.get('config', {}).get('alias', 'alias_not_found') - - if materialized == 'view': - view_models_dbt.append(f"{schema}.{alias}") - elif materialized == 'table' or materialized == 'incremental': - table_models_dbt.append(f"{schema}.{alias}") - -# Generate SQL queries -tables_sql_query = generate_tables_query(env) -views_sql_query = generate_views_query(env) - -# Run psql queries for tables and views -psql_tables = run_psql_command(tables_sql_query, env) -psql_views = run_psql_command(views_sql_query, env) - -# Trim whitespace from PostgreSQL tables and views -psql_tables = [table.strip() for table in psql_tables] -psql_views = [view.strip() for view in psql_views] - -# Compare psql_views vs. view_models_dbt -print("\nViews in PostgreSQL but not in DBT:") -for view in psql_views: - if view not in view_models_dbt: - print(f"DROP VIEW IF EXISTS {view};") - -# Compare psql_tables vs. table_models_dbt -print("\nTables in PostgreSQL but not in DBT:") -for table in psql_tables: - if table not in table_models_dbt: - print(f"DROP TABLE IF EXISTS {table};") From 7beec64787a99937f062d4426f5bd3a1e5ff1bb4 Mon Sep 17 00:00:00 2001 From: jeff-dude Date: Tue, 5 Dec 2023 18:34:09 -0500 Subject: [PATCH 6/8] try to add run on PR test --- .github/workflows/drop_deprecated_spells.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.github/workflows/drop_deprecated_spells.yml b/.github/workflows/drop_deprecated_spells.yml index 9711245ffb9..8d48d718ad5 100644 --- a/.github/workflows/drop_deprecated_spells.yml +++ b/.github/workflows/drop_deprecated_spells.yml @@ -1,6 +1,18 @@ # .github/workflows/run_python_script.yaml name: Drop Deprecated Spells +on: + {# push: + branches: + - drop-inactive-spells # Trigger the workflow on pushes to the feature branch #} + pull_request: + branches: + - drop-inactive-spells # Trigger the workflow on pull requests targeting the feature branch + schedule: + - cron: '0 14 * * 1' # At 9 AM EST every Monday (14:00 UTC) + workflow_dispatch: + + on: schedule: - cron: '0 14 * * 1' # At 9 AM EST every Monday (14:00 UTC) From debdeb3cb5e797205d00b20534dba7653dfe6056 Mon Sep 17 00:00:00 2001 From: jeff-dude Date: Tue, 5 Dec 2023 18:35:57 -0500 Subject: [PATCH 7/8] try on push to feature branch --- .github/workflows/drop_deprecated_spells.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/drop_deprecated_spells.yml b/.github/workflows/drop_deprecated_spells.yml index 8d48d718ad5..5e0c75dd64b 100644 --- a/.github/workflows/drop_deprecated_spells.yml +++ b/.github/workflows/drop_deprecated_spells.yml @@ -2,9 +2,9 @@ name: Drop Deprecated Spells on: - {# push: + push: branches: - - drop-inactive-spells # Trigger the workflow on pushes to the feature branch #} + - drop-inactive-spells # Trigger the workflow on pushes to the feature branch pull_request: branches: - drop-inactive-spells # Trigger the workflow on pull requests targeting the feature branch From 311f20cbaab55486bd520a4ecc5ebb372a3e6826 Mon Sep 17 00:00:00 2001 From: jeff-dude Date: Tue, 5 Dec 2023 18:37:34 -0500 Subject: [PATCH 8/8] try on all PRs --- .github/workflows/drop_deprecated_spells.yml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/drop_deprecated_spells.yml b/.github/workflows/drop_deprecated_spells.yml index 5e0c75dd64b..bb4d12bf9c2 100644 --- a/.github/workflows/drop_deprecated_spells.yml +++ b/.github/workflows/drop_deprecated_spells.yml @@ -2,12 +2,11 @@ name: Drop Deprecated Spells on: - push: - branches: - - drop-inactive-spells # Trigger the workflow on pushes to the feature branch pull_request: - branches: - - drop-inactive-spells # Trigger the workflow on pull requests targeting the feature branch + paths-ignore: + - 'scripts/**' + - 'Pipfile' + - '.gitignore' schedule: - cron: '0 14 * * 1' # At 9 AM EST every Monday (14:00 UTC) workflow_dispatch: