Skip to content

Commit

Permalink
feat: add environment variables for tag database and schema (#49)
Browse files Browse the repository at this point in the history
* Introduve environment varirables for tags created in common schema

* Introduve environment varirables for tags created in common schema

* Introduve environment varirables for tags created in common schema

* Introduve environment varirables for tags created in common schema

* Introduve environment varirables for tags created in common schema

* Introduve environment varirables for tags created in common schema

* Introduve environment varirables for tags created in common schema

* Introduve environment varirables for tags created in common schema

* Introduve environment varirables for tags created in common schema

* Introduve environment varirables for tags created in common schema

* Introduve environment varirables for tags created in common schema

* Introduve environment varirables for tags created in common schema

---------

Co-authored-by: Arjun Thangaraj <[email protected]>
  • Loading branch information
arjunthangaraj and Arjun Thangaraj authored Jan 15, 2024
1 parent 3d72c29 commit 33491ea
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 22 deletions.
12 changes: 11 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,18 @@ The above means:
The Snowflake table ACCOUNT will have the tag 'accounting_row_string' set to the value 'visible'.
Its columns ACCOUNT_NAME and ACCOUNT_NUMBER will both have the tag 'accounting_col_string' set to the value 'visible'

All tags are created in the schema of the model where they are added. In the above example the tags will end up
All tags are created in the schema of the model where they are added, by default. In the above example the tags will end up
in the FINANCE schema (name depends on how [DBT has been configured](https://docs.getdbt.com/docs/build/custom-schemas)).
If the tag needs to be created in a different location/referred from different location, the below two environment
variables need to be added in dbt_project.yml file as below:

```yml
# dbt_project.yml

common_tag_database: 'audit'
common_tag_schema: 'tags'
```
In the above example, the tags will be created in audit.tags .
The macro must be called as part of on-run-end, so add the following to dbt_project.yml:
```
Expand Down
50 changes: 29 additions & 21 deletions macros/apply_meta_as_tags.sql
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,13 @@
{%- set tags_by_schema = {} -%}
{% for res in results -%}
{% if snowflake_utils.model_contains_tag_meta(res.node) %}


-- Tagging database and schema will be fetched from the below environment variables.
-- If not given any enviornment variables, model database and schema will be used
{%- set tag_database = var('common_tag_database', res.node.database) -%}
{%- set tag_schema = var('common_tag_schema', res.node.schema) -%}
{%- set tag_schema_full = tag_database+'.'+tag_schema -%}

{%- set model_database = res.node.database -%}
{%- set model_schema = res.node.schema -%}
{%- set model_schema_full = model_database+'.'+model_schema -%}
Expand All @@ -23,18 +29,18 @@
USE SCHEMA {{model_schema}}
{%- endcall -%}

{% if model_schema_full not in tags_by_schema.keys() %}
{{ log('need to fetch tags for schema '+model_schema_full, info=True) }}
{% if tag_schema_full not in tags_by_schema.keys() %}
{{ log('need to fetch tags for schema '+tag_schema_full, info=True) }}
{%- call statement('main', fetch_result=True) -%}
show tags in {{model_database}}.{{model_schema}}
show tags in {{tag_database}}.{{tag_schema}}
{%- endcall -%}
{%- set _ = tags_by_schema.update({model_schema_full: load_result('main')['table'].columns.get('name').values()|list}) -%}
{%- set _ = tags_by_schema.update({tag_schema_full: load_result('main')['table'].columns.get('name').values()|list}) -%}
{{ log('Added tags to cache', info=True) }}
{% else %}
{{ log('already have tag info for schema', info=True) }}
{% endif %}

{%- set current_tags_in_schema = tags_by_schema[model_schema_full] -%}
{%- set current_tags_in_schema = tags_by_schema[tag_schema_full] -%}
{{ log('current_tags_in_schema:', info=True) }}
{{ log(current_tags_in_schema, info=True) }}
{{ log("========== Processing tags for "+model_schema_full+"."+model_alias+" ==========", info=True) }}
Expand Down Expand Up @@ -62,16 +68,18 @@
{{ log(existing_tags_for_table, info=True) }}

{% for table_tag in model_meta.database_tags %}
{{ snowflake_utils.create_tag_if_missing(current_tags_in_schema,table_tag|upper) }}
{% set table_tag_full = tag_schema_full+'.'+table_tag %}
{{ snowflake_utils.create_tag_if_missing(current_tags_in_schema,table_tag_full|upper) }}
{% set desired_tag_value = model_meta.database_tags[table_tag] %}
{{ snowflake_utils.set_table_tag_value_if_different(model_alias|upper,table_tag,desired_tag_value,existing_tags_for_table) }}
{{ snowflake_utils.set_table_tag_value_if_different(model_alias|upper,table_tag_full|upper,desired_tag_value,existing_tags_for_table) }}
{% endfor %}
{% for column in res.node.columns %}
{% for column_tag in res.node.columns[column].meta.database_tags %}
{% set column_tag_full = tag_schema_full+'.'+column_tag %}
{{log(column_tag,info=True)}}
{{ snowflake_utils.create_tag_if_missing(current_tags_in_schema,column_tag|upper)}}
{{ snowflake_utils.create_tag_if_missing(current_tags_in_schema,column_tag_full|upper)}}
{% set desired_tag_value = res.node.columns[column].meta.database_tags[column_tag] %}
{{ snowflake_utils.set_column_tag_value_if_different(model_alias|upper,column|upper,column_tag,desired_tag_value,existing_tags_for_table)}}
{{ snowflake_utils.set_column_tag_value_if_different(model_alias|upper,column|upper,column_tag_full|upper,desired_tag_value,existing_tags_for_table)}}
{% endfor %}
{% endfor %}
{{ log("========== Finished processing tags for "+model_alias+" ==========", info=True) }}
Expand All @@ -82,16 +90,16 @@
{{ return('') }}
{% endmacro %}

{#
-- Given a node in a Result object, returns True if either the model meta contains database_tags,
{#
-- Given a node in a Result object, returns True if either the model meta contains database_tags,
-- or any of the column's meta contains database_tags.
-- Otherwise it returns False
#}
{% macro model_contains_tag_meta(model_node) %}
{% if model_node.meta.database_tags %}
{{ return(True) }}
{% endif %}
{#
{#
-- For compatibility with the old results structure
#}
{% if model_node.config.meta.database_tags %}
Expand All @@ -105,14 +113,14 @@
{{ return(False) }}
{% endmacro %}

{#
{#
-- Snowflake tags must exist before they are used.
-- Given a list of all the existing tags in the account (all_tag_names),
-- Given a list of all the existing tags in the account (all_tag_names),
-- checks if the new tag (new_tag) is already in the list and
-- creates it in Snowflake if it doesn't.
#}
{% macro create_tag_if_missing(all_tag_names,new_tag) %}
{% if new_tag not in all_tag_names %}
{% if new_tag.split('.')[2] not in all_tag_names %}
{{ log('Creating missing tag '+new_tag, info=True) }}
{%- call statement('main', fetch_result=True) -%}
create tag {{new_tag}}
Expand All @@ -125,9 +133,9 @@
{% endmacro %}

-- select LEVEL,OBJECT_NAME,COLUMN_NAME,UPPER(TAG_NAME) as TAG_NAME,TAG_VALUE
{#
{#
-- Updates the value of a Snowflake table tag, if the provided value is different.
-- existing_tags contains the results from querying tag_references_all_columns.
-- existing_tags contains the results from querying tag_references_all_columns.
-- The first column (attribute '0') contains 'TABLE' or 'COLUMN', since we're looking
-- at table tags here then we include only 'TABLE' values.
-- The second column (attribute '1') contains the name of the table, we filter on that.
Expand All @@ -152,9 +160,9 @@
{{ log(load_result('main').data, info=True) }}
{% endif %}
{% endmacro %}
{#
{#
-- Updates the value of a Snowflake column tag, if the provided value is different.
-- existing_tags contains the results from querying tag_references_all_columns.
-- existing_tags contains the results from querying tag_references_all_columns.
-- The first column (attribute '0') contains 'TABLE' or 'COLUMN', since we're looking
-- at column tags here then we include only 'COLUMN' values.
-- The second column (attribute '1') contains the name of the table, we filter on that.
Expand All @@ -177,4 +185,4 @@
{%- endcall -%}
{{ log(load_result('main').data, info=True) }}
{% endif %}
{% endmacro %}
{% endmacro %}

0 comments on commit 33491ea

Please sign in to comment.