From 80fdddd559fad8e451461dbea7094fc40f05f709 Mon Sep 17 00:00:00 2001 From: James Weakley Date: Thu, 10 Nov 2022 08:46:29 +1100 Subject: [PATCH] Add apply_meta_as_tags macro (#21) * Add apply_meta_as_tags macro * Include package name in instructions * Now works correctly when model only contains column tags and not table tags. Improved documentation of internal macros and added clarification to readme. * Uppercase model/column names when comparing, correct comments * Fix typo in readme * Add documentation to macros.yml --- .gitignore | 1 + README.md | 42 +++++++++ dbt_project.yml | 2 +- macros/apply_meta_as_tags.sql | 158 ++++++++++++++++++++++++++++++++++ macros/macros.md | 47 ++++++++++ macros/macros.yml | 6 ++ 6 files changed, 255 insertions(+), 1 deletion(-) create mode 100644 macros/apply_meta_as_tags.sql diff --git a/.gitignore b/.gitignore index dad33a4..bd45744 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ target/ dbt_modules/ +dbt_packages/ logs/ diff --git a/README.md b/README.md index e64ae6e..287a9f9 100644 --- a/README.md +++ b/README.md @@ -104,6 +104,48 @@ Call the macro as an [operation](https://docs.getdbt.com/docs/using-operations): dbt run-operation drop_schema --args "{'schema_name': 'customers_temp', 'database': 'production'}" ``` +### snowflake_utils.apply_meta_as_tags ([source](macros/apply_meta_as_tags.sql)) +This macro applies specific model meta properties as Snowflake tags during on-run-end. This allows you to author Snowflake tags as part of your dbt project. + +#### Arguments +* `results` (required): The [on-run-end context object](https://docs.getdbt.com/reference/dbt-jinja-functions/on-run-end-context). + +#### Usage + +First, configure your dbt model to have the 'database_tags' meta property as shown (tag examples borrowed from [here](https://docs.snowflake.com/en/user-guide/tag-based-masking-policies.html)): + +``` +schema.yml + +models: + - name: ACCOUNT + config: + meta: + database_tags: + accounting_row_string: a + + columns: + - name: ACCOUNT_NAME + meta: + database_tags: + accounting_col_string: b +``` + +The above means: +The Snowflake table ACCOUNT will have the tag 'accounting_row_string' set to the value 'visible'. +Its columns ACCOUNT_NAME and ACCOUNT_NUMBER will both have the tag 'accounting_col_string' set to the value 'visible' + +The macro must be called as part of on-run-end, so add the following to dbt_project.yml: +``` +on-run-end: "{{ snowflake_utils.apply_meta_as_tags(results) }}" +``` + +#### Tag removal +This macro only seeks to add or update the tags which are specified in dbt. It won't delete tags which are not defined. +If you need this behaviour, it usually comes naturally as dbt drops and recreates tables/views for most materializations. +If you are using the incremental materialization, be aware of this limitation. + + ---- ## Contributions diff --git a/dbt_project.yml b/dbt_project.yml index e4917c9..58f7dbb 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -1,5 +1,5 @@ name: 'snowflake_utils' -version: '0.2.0' +version: '0.3.0' config-version: 2 diff --git a/macros/apply_meta_as_tags.sql b/macros/apply_meta_as_tags.sql new file mode 100644 index 0000000..fbf3aa6 --- /dev/null +++ b/macros/apply_meta_as_tags.sql @@ -0,0 +1,158 @@ +{% macro apply_meta_as_tags(results) %} + {{ log('apply_meta_as_tags', info=True) }} + {{ log(results) }} + {% if execute %} + {# + -- The tags_by_schema object will act as a local cache of Snowflake tags. + -- This means we only need to call "show tags in " once per schema we process. + #} + {%- set tags_by_schema = {} -%} + {% for res in results -%} + {% if model_contains_tag_meta(res.node) %} + + {%- set model_database = res.node.database -%} + {%- set model_schema = res.node.schema -%} + {%- set model_schema_full = model_database+'.'+model_schema -%} + {%- set model_alias = res.node.alias -%} + + {% if model_schema_full not in tags_by_schema.keys() %} + {{ log('need to fetch tags for schema '+model_schema_full, info=True) }} + {%- call statement('main', fetch_result=True) -%} + show tags in {{model_database}}.{{model_schema}} + {%- endcall -%} + {%- set _ = tags_by_schema.update({model_schema_full: load_result('main')['table'].columns.get('name').values()|list}) -%} + {{ log('Added tags to cache', info=True) }} + {% else %} + {{ log('already have tag info for schema', info=True) }} + {% endif %} + + {%- set current_tags_in_schema = tags_by_schema[model_schema_full] -%} + {{ log('current_tags_in_schema:', info=True) }} + {{ log(current_tags_in_schema, info=True) }} + {{ log("========== Processing tags for "+model_schema_full+"."+model_alias+" ==========", info=True) }} + + {% set line -%} + node: {{ res.node.unique_id }}; status: {{ res.status }} (message: {{ res.message }}) + database tags: {{ res.node.meta.database_tags}} + materialized: {{ res.node.config.materialized }} + {%- endset %} + {{ log(line, info=True) }} + {# + -- Uses the tag_references_all_columns table function to fetch existing tags for the table + #} + {%- call statement('main', fetch_result=True) -%} + select LEVEL,OBJECT_NAME,COLUMN_NAME,UPPER(TAG_NAME) as TAG_NAME,TAG_VALUE from table(information_schema.tag_references_all_columns('{{model_alias}}', 'table')) + {%- endcall -%} + {%- set existing_tags_for_table = load_result('main')['data'] -%} + {{ log('Existing tags for table:', info=True) }} + {{ log(existing_tags_for_table, info=True) }} + + {% for table_tag in res.node.meta.database_tags %} + {{ create_tag_if_missing(current_tags_in_schema,table_tag|upper) }} + {% set desired_tag_value = res.node.meta.database_tags[table_tag] %} + {{set_table_tag_value_if_different(model_alias|upper,table_tag,desired_tag_value,existing_tags_for_table)}} + {% endfor %} + {% for column in res.node.columns %} + {% for column_tag in res.node.columns[column].meta.database_tags %} + {{log(column_tag,info=True)}} + {{create_tag_if_missing(current_tags_in_schema,column_tag|upper)}} + {% set desired_tag_value = res.node.columns[column].meta.database_tags[column_tag] %} + {{set_column_tag_value_if_different(model_alias|upper,column|upper,column_tag,desired_tag_value,existing_tags_for_table)}} + {% endfor %} + {% endfor %} + {{ log("========== Finished processing tags for "+model_alias+" ==========", info=True) }} + {% endif %} + {% endfor %} + {% endif %} +{% endmacro %} + +{# +-- Given a node in a Result object, returns True if either the model meta contains database_tags, +-- or any of the column's meta contains database_tags. +-- Otherwise it returns False +#} +{% macro model_contains_tag_meta(model_node) %} + {% if model_node.meta.database_tags %} + {{ return(True) }} + {% endif %} + {% for column in model_node.columns %} + {% if model_node.columns[column].meta.database_tags %} + {{ return(True) }} + {% endif %} + {% endfor %} + {{ return(False) }} +{% endmacro %} + +{# +-- Snowflake tags must exist before they are used. +-- Given a list of all the existing tags in the account (all_tag_names), +-- checks if the new tag (new_tag) is already in the list and +-- creates it in Snowflake if it doesn't. +#} +{% macro create_tag_if_missing(all_tag_names,new_tag) %} + {% if new_tag not in all_tag_names %} + {{ log('Creating missing tag '+new_tag, info=True) }} + {%- call statement('main', fetch_result=True) -%} + create tag {{new_tag}} + {%- endcall -%} + {{ all_tag_names.append(new_tag)}} + {{ log(load_result('main').data, info=True) }} + {% else %} + {{ log('Tag already exists: '+new_tag, info=True) }} + {% endif %} +{% endmacro %} + +-- select LEVEL,OBJECT_NAME,COLUMN_NAME,UPPER(TAG_NAME) as TAG_NAME,TAG_VALUE +{# +-- Updates the value of a Snowflake table tag, if the provided value is different. +-- existing_tags contains the results from querying tag_references_all_columns. +-- The first column (attribute '0') contains 'TABLE' or 'COLUMN', since we're looking +-- at table tags here then we include only 'TABLE' values. +-- The second column (attribute '1') contains the name of the table, we filter on that. +-- The third column (attribute '2') contains the name of the column, not relevant here. +-- The fourth column (attribute '3') contains the tag name, so we filter on that too. +-- The fifth column (index 4) contains the value of the tag, so we compare with the desired_tag_value +-- to see if we need to update it +#} +{% macro set_table_tag_value_if_different(table_name,tag_name,desired_tag_value,existing_tags) %} + {{ log('Ensuring tag '+tag_name+' has value '+desired_tag_value+' at table level', info=True) }} + {{ log(existing_tags, info=True) }} + {%- set existing_tag_for_table = existing_tags|selectattr('0','equalto','TABLE')|selectattr('1','equalto',table_name|upper)|selectattr('3','equalto',tag_name|upper)|list -%} + {{ log('Filtered tags for table:', info=True) }} + {{ log(existing_tag_for_table, info=True) }} + {% if existing_tag_for_table|length > 0 and existing_tag_for_table[0][4]==desired_tag_value %} + {{ log('Correct tag value already exists', info=True) }} + {% else %} + {{ log('Setting tag value for '+tag_name+' to value '+desired_tag_value, info=True) }} + {%- call statement('main', fetch_result=True) -%} + alter table {{table_name}} set tag {{tag_name}} = '{{desired_tag_value}}' + {%- endcall -%} + {{ log(load_result('main').data, info=True) }} + {% endif %} +{% endmacro %} +{# +-- Updates the value of a Snowflake column tag, if the provided value is different. +-- existing_tags contains the results from querying tag_references_all_columns. +-- The first column (attribute '0') contains 'TABLE' or 'COLUMN', since we're looking +-- at column tags here then we include only 'COLUMN' values. +-- The second column (attribute '1') contains the name of the table, we filter on that. +-- The third column (attribute '2') contains the name of the column, we filter on that. +-- The fourth column (attribute '3') contains the tag name, so we filter on that too. +-- The fifth column (index 4) contains the value of the tag, so we compare with the desired_tag_value +-- to see if we need to update it +#} +{% macro set_column_tag_value_if_different(table_name,column_name,tag_name,desired_tag_value,existing_tags) %} + {{ log('Ensuring tag '+tag_name+' has value '+desired_tag_value+' at column level', info=True) }} + {%- set existing_tag_for_column = existing_tags|selectattr('0','equalto','COLUMN')|selectattr('1','equalto',table_name|upper)|selectattr('2','equalto',column_name|upper)|selectattr('3','equalto',tag_name|upper)|list -%} + {{ log('Filtered tags for column:', info=True) }} + {{ log(existing_tag_for_column, info=True) }} + {% if existing_tag_for_column|length > 0 and existing_tag_for_column[0][4]==desired_tag_value %} + {{ log('Correct tag value already exists', info=True) }} + {% else %} + {{ log('Setting tag value for '+tag_name+' to value '+desired_tag_value, info=True) }} + {%- call statement('main', fetch_result=True) -%} + alter table {{table_name}} modify column {{column_name}} set tag {{tag_name}} = '{{desired_tag_value}}' + {%- endcall -%} + {{ log(load_result('main').data, info=True) }} + {% endif %} +{% endmacro %} \ No newline at end of file diff --git a/macros/macros.md b/macros/macros.md index b3ee9c7..a44a4e7 100644 --- a/macros/macros.md +++ b/macros/macros.md @@ -77,3 +77,50 @@ When a variable is configured for a conditon _and_ that condition is matched whe ``` {% enddocs %} +{% docs apply_meta_as_tags %} +This macro applies specific model meta properties as Snowflake tags during on-run-end. This allows you to author Snowflake tags as part of your dbt project. + +This macro applies specific model meta properties as Snowflake tags during on-run-end. This allows you to author Snowflake tags as part of your dbt project. + +#### Arguments +* `results` (required): The [on-run-end context object](https://docs.getdbt.com/reference/dbt-jinja-functions/on-run-end-context). + +#### Usage + +First, configure your dbt model to have the 'database_tags' meta property as shown (tag examples borrowed from [here](https://docs.snowflake.com/en/user-guide/tag-based-masking-policies.html)): +{% raw %} +``` +schema.yml + +models: + - name: ACCOUNT + config: + meta: + database_tags: + accounting_row_string: a + + columns: + - name: ACCOUNT_NAME + meta: + database_tags: + accounting_col_string: b +``` +{% endraw %} + +The above means: +The Snowflake table ACCOUNT will have the tag 'accounting_row_string' set to the value 'visible'. +Its columns ACCOUNT_NAME and ACCOUNT_NUMBER will both have the tag 'accounting_col_string' set to the value 'visible' + +The macro must be called as part of on-run-end, so add the following to dbt_project.yml: +{% raw %} +``` +on-run-end: "{{ snowflake_utils.apply_meta_as_tags(results) }}" +``` +{% endraw %} + +#### Tag removal +This macro only seeks to add or update the tags which are specified in dbt. It won't delete tags which are not defined. +If you need this behaviour, it usually comes naturally as dbt drops and recreates tables/views for most materializations. +If you are using the incremental materialization, be aware of this limitation. + +{% enddocs %} \ No newline at end of file diff --git a/macros/macros.yml b/macros/macros.yml index 7b340fe..9434d8e 100644 --- a/macros/macros.yml +++ b/macros/macros.yml @@ -23,3 +23,9 @@ macros: - name: warehouse_size description: '{{ doc("warehouse_size") }}' + + - name: apply_meta_as_tags + description: '{{ doc("apply_meta_as_tags") }}' + arguments: + - name: results + description: The on-run-end context object \ No newline at end of file