From 191ec39b56ad8af5b4c263436fe119ba908696f4 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Tue, 10 Oct 2023 12:53:28 -0700 Subject: [PATCH 01/21] add topological sort --- .../src/datahub/utilities/topological_sort.py | 49 +++++++++++++++++++ .../tests/unit/test_topological_sort.py | 33 +++++++++++++ 2 files changed, 82 insertions(+) create mode 100644 metadata-ingestion/src/datahub/utilities/topological_sort.py create mode 100644 metadata-ingestion/tests/unit/test_topological_sort.py diff --git a/metadata-ingestion/src/datahub/utilities/topological_sort.py b/metadata-ingestion/src/datahub/utilities/topological_sort.py new file mode 100644 index 00000000000000..f807dfe96063a7 --- /dev/null +++ b/metadata-ingestion/src/datahub/utilities/topological_sort.py @@ -0,0 +1,49 @@ +from collections import deque +from typing import Dict, Iterable, List, Tuple, TypeVar + +_K = TypeVar("_K") + + +def topological_sort(nodes: List[_K], edges: List[Tuple[_K, _K]]) -> Iterable[_K]: + """Topological sort of a directed acyclic graph or forest. + + This is an implementation of Kahn's algorithm. + + Args: + nodes: List of nodes. + edges: List of edges, as tuples of (source, target). + + Returns: + List of nodes in topological order. + """ + + # Build adjacency list. + adj_list: Dict[_K, List[_K]] = {node: [] for node in nodes} + for source, target in edges: + adj_list[source].append(target) + + # Build in-degree map. + in_degrees: Dict[_K, int] = {node: 0 for node in nodes} + for _source, target in edges: + in_degrees[target] += 1 + + # Initialize queue with nodes with in-degree 0. + queue = deque(node for node in nodes if in_degrees[node] == 0) + + results = 0 + while queue: + node = queue.popleft() + + results += 1 + yield node + + # Decrement in-degree of each neighbor. + for neighbor in adj_list[node]: + in_degrees[neighbor] -= 1 + + # If in-degree is 0, add to queue. + if in_degrees[neighbor] == 0: + queue.append(neighbor) + + if results != len(nodes): + raise ValueError("Graph contains cycles.") diff --git a/metadata-ingestion/tests/unit/test_topological_sort.py b/metadata-ingestion/tests/unit/test_topological_sort.py new file mode 100644 index 00000000000000..4300816b6c48fc --- /dev/null +++ b/metadata-ingestion/tests/unit/test_topological_sort.py @@ -0,0 +1,33 @@ +import pytest + +from datahub.utilities.topological_sort import topological_sort + + +def test_topological_sort_valid(): + nodes = ["a", "b", "c", "d", "e", "f"] + edges = [ + ("a", "d"), + ("f", "b"), + ("b", "d"), + ("f", "a"), + ("d", "c"), + ] + + # This isn't the only valid topological sort order. + expected_order = ["e", "f", "b", "a", "d", "c"] + assert list(topological_sort(nodes, edges)) == expected_order + + +def test_topological_sort_invalid(): + nodes = ["a", "b", "c", "d", "e", "f"] + edges = [ + ("a", "d"), + ("f", "b"), + ("b", "d"), + ("f", "a"), + ("d", "c"), + ("c", "f"), + ] + + with pytest.raises(ValueError, match="cycle"): + list(topological_sort(nodes, edges)) From a5717143b22be025fc106e6d339b6a296df12803 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Mon, 9 Oct 2023 13:23:34 -0700 Subject: [PATCH 02/21] start adding schema inference --- metadata-ingestion/setup.py | 4 ++-- .../src/datahub/ingestion/source/dbt/dbt_common.py | 7 ++++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 0b8661b0df5f5a..633cce2ddac1fa 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -299,8 +299,8 @@ "datahub-lineage-file": set(), "datahub-business-glossary": set(), "delta-lake": {*data_lake_profiling, *delta_lake}, - "dbt": {"requests"} | aws_common, - "dbt-cloud": {"requests"}, + "dbt": {"requests"} | sqlglot_lib | aws_common, + "dbt-cloud": {"requests"} | sqlglot_lib, "druid": sql_common | {"pydruid>=0.6.2"}, "dynamodb": aws_common, # Starting with 7.14.0 python client is checking if it is connected to elasticsearch client. If its not it throws diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index 48d2118a9b0917..fae7450b5ed0a6 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -280,6 +280,10 @@ class DBTCommonConfig( default=False, description="When enabled, dbt test warnings will be treated as failures.", ) + infer_dbt_schemas: bool = Field( + default=True, + description="When enabled, schemas will be inferred from the dbt node definition.", + ) @validator("target_platform") def validate_target_platform_value(cls, target_platform: str) -> str: @@ -548,7 +552,6 @@ def get_column_type( @support_status(SupportStatus.CERTIFIED) @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion") @capability(SourceCapability.LINEAGE_COARSE, "Enabled by default") -@capability(SourceCapability.USAGE_STATS, "", supported=False) class DBTSourceBase(StatefulIngestionSourceBase): def __init__(self, config: DBTCommonConfig, ctx: PipelineContext, platform: str): super().__init__(config, ctx) @@ -982,6 +985,8 @@ def get_schema_metadata( self.config.strip_user_ids_from_email, ) + # TODO if infer_dbt_schemas, load from saved schemas too + canonical_schema: List[SchemaField] = [] for column in node.columns: description = None From fe33bb3cff58d54643ef3d28ec4464feaa73c234 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Tue, 10 Oct 2023 19:47:18 -0700 Subject: [PATCH 03/21] start adding dbt schema inference --- .../ingestion/source/dbt/dbt_common.py | 147 ++++++++++++++++-- .../tests/integration/dbt/test_dbt.py | 12 +- 2 files changed, 145 insertions(+), 14 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index fae7450b5ed0a6..a9ee0af3190728 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -1,3 +1,4 @@ +import itertools import logging import re from abc import abstractmethod @@ -102,7 +103,9 @@ ) from datahub.specific.dataset import DatasetPatchBuilder from datahub.utilities.mapping import Constants, OperationProcessor +from datahub.utilities.sqlglot_lineage import SchemaResolver, sqlglot_lineage from datahub.utilities.time import datetime_to_ts_millis +from datahub.utilities.topological_sort import topological_sort logger = logging.getLogger(__name__) DBT_PLATFORM = "dbt" @@ -351,6 +354,8 @@ class DBTColumn: meta: Dict[str, Any] = field(default_factory=dict) tags: List[str] = field(default_factory=list) + datahub_data_type: Optional[SchemaFieldDataType] = None + @dataclass class DBTNode: @@ -416,6 +421,34 @@ def get_urn( env=env, ) + @property + def exists_in_target_platform(self): + return not (self.materialization == "ephemeral" or self.node_type == "test") + + def merge_schema_fields(self, schema_fields: List[SchemaField]) -> None: + """ + Merges the schema fields into the DBTNode. + + This will prefer the dbt columns info over the schema metadata info. + """ + + if self.columns: + # If we already have columns, don't overwrite them. + # TODO maybe we should augment them instead? + return + + self.columns = [ + DBTColumn( + name=schema_field.fieldPath, + comment="", + description="", + index=i, + data_type=schema_field.nativeDataType, + datahub_data_type=schema_field.type, + ) + for i, schema_field in enumerate(schema_fields) + ] + def get_custom_properties(node: DBTNode) -> Dict[str, str]: # initialize custom properties to node's meta props @@ -649,23 +682,23 @@ def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: ] def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: - if self.config.write_semantics == "PATCH" and not self.ctx.graph: - raise ConfigurationError( - "With PATCH semantics, dbt source requires a datahub_api to connect to. " - "Consider using the datahub-rest sink or provide a datahub_api: configuration on your ingestion recipe." - ) + if self.config.write_semantics == "PATCH": + self.ctx.require_graph("Using dbt with write_semantics=PATCH") + if self.config.infer_dbt_schemas: + self.ctx.require_graph("Using dbt with infer_dbt_schemas=True") all_nodes, additional_custom_props = self.load_nodes() all_nodes_map = {node.dbt_name: node for node in all_nodes} - nodes = self.filter_nodes(all_nodes) - additional_custom_props_filtered = { key: value for key, value in additional_custom_props.items() if value is not None } + inferred_schemas = self._infer_schemas_and_update_cll(all_nodes_map) + + nodes = self._filter_nodes(all_nodes) non_test_nodes = [ dataset_node for dataset_node in nodes if dataset_node.node_type != "test" ] @@ -693,7 +726,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: all_nodes_map, ) - def filter_nodes(self, all_nodes: List[DBTNode]) -> List[DBTNode]: + def _filter_nodes(self, all_nodes: List[DBTNode]) -> List[DBTNode]: nodes = [] for node in all_nodes: key = node.dbt_name @@ -705,6 +738,99 @@ def filter_nodes(self, all_nodes: List[DBTNode]) -> List[DBTNode]: return nodes + def _infer_schemas_and_update_cll( + self, all_nodes_map: Dict[str, DBTNode] + ) -> Dict[str, SchemaMetadata]: + if not self.config.infer_dbt_schemas: + return {} + + graph = self.ctx.require_graph("Using dbt with infer_dbt_schemas") + + # TODO: maybe write these to the nodes directly? + schemas: Dict[str, SchemaMetadata] = {} + schema_resolver = SchemaResolver( + platform=self.config.target_platform, + platform_instance=self.config.target_platform_instance, + env=self.config.env, + ) + + # TODO maintain bidict of dbt node name <-> target platform urn + + # TODO: only process non-test nodes here + + # Iterate over the dbt nodes in topological order. + # This ensures that we process upstream nodes before downstream nodes. + for dbt_name in topological_sort( + list(all_nodes_map.keys()), + edges=list( + itertools.chain.from_iterable( + [(upstream, node.dbt_name) for upstream in node.upstream_nodes] + for node in all_nodes_map.values() + ) + ), + ): + node = all_nodes_map[dbt_name] + + dbt_node_urn = node.get_urn( + DBT_PLATFORM, self.config.env, self.config.platform_instance + ) + if node.exists_in_target_platform: + target_node_urn = node.get_urn( + self.config.target_platform, + self.config.env, + self.config.target_platform_instance, + ) + else: + target_node_urn = None + + # Fetch the schema from the graph if possible. + schema_fields: Optional[List[SchemaField]] = None + if target_node_urn: + schema_metadata = graph.get_aspect(target_node_urn, SchemaMetadata) + if schema_metadata: + schema_fields = schema_metadata.fields + + # Run sql parser to infer the schema + generate column lineage. + sql_result = None + if node.compiled_code: + sql_result = sqlglot_lineage( + node.compiled_code, schema_resolver=schema_resolver + ) + + # TODO when saving the lineage info, drop transitive deps + + # If we didn't fetch the schema from the graph, use the inferred schema. + if not schema_fields and sql_result and sql_result.column_lineage: + schema_fields = [ + SchemaField( + fieldPath=column_lineage.downstream.column, + type=column_lineage.downstream.column_type + or SchemaFieldDataType(type=NullTypeClass()), + nativeDataType="", + ) + for column_lineage in sql_result.column_lineage + ] + + # Merge the schema fields into the dbt node. + if schema_fields: + node.merge_schema_fields(schema_fields) + + # Add the node to the schema resolver, so that CLL works for + # downstream nodes. + if target_node_urn and node.columns: + schema_resolver.add_raw_schema_info( + target_node_urn, + { + # TODO: This raw type logic is a bit hacky. + column.name: str(column.datahub_data_type.type) + if column.datahub_data_type + else column.data_type + for column in node.columns + }, + ) + + return schemas + def create_platform_mces( self, dbt_nodes: List[DBTNode], @@ -784,7 +910,7 @@ def create_platform_mces( else: # We are creating empty node for platform and only add lineage/keyaspect. aspects = [] - if node.materialization == "ephemeral" or node.node_type == "test": + if not node.exists_in_target_platform: continue # This code block is run when we are generating entities of platform type. @@ -1034,7 +1160,8 @@ def get_schema_metadata( field = SchemaField( fieldPath=field_name, nativeDataType=column.data_type, - type=get_column_type( + type=column.datahub_data_type + or get_column_type( report, node.dbt_name, column.data_type, node.dbt_adapter ), description=description, diff --git a/metadata-ingestion/tests/integration/dbt/test_dbt.py b/metadata-ingestion/tests/integration/dbt/test_dbt.py index a970ff6a5de7ad..abf5959b1f4ec7 100644 --- a/metadata-ingestion/tests/integration/dbt/test_dbt.py +++ b/metadata-ingestion/tests/integration/dbt/test_dbt.py @@ -57,6 +57,7 @@ def set_paths( "target_platform": self.target_platform, "enable_meta_mapping": False, "write_semantics": "OVERRIDE", + "infer_dbt_schemas": False, "meta_mapping": { "owner": { "match": "^@(.*)", @@ -259,6 +260,7 @@ def test_dbt_tests(pytestconfig, tmp_path, mock_time, **kwargs): ), # this is just here to avoid needing to access datahub server write_semantics="OVERRIDE", + infer_dbt_schemas=False, ), ), sink=DynamicTypedConfig(type="file", config={"filename": str(output_file)}), @@ -361,11 +363,12 @@ def test_dbt_tests_only_assertions(pytestconfig, tmp_path, mock_time, **kwargs): test_results_path=str( (test_resources_dir / "jaffle_shop_test_results.json").resolve() ), - # this is just here to avoid needing to access datahub server - write_semantics="OVERRIDE", entities_enabled=DBTEntitiesEnabled( test_results=EmitDirective.ONLY ), + # this is just here to avoid needing to access datahub server + write_semantics="OVERRIDE", + infer_dbt_schemas=False, ), ), sink=DynamicTypedConfig(type="file", config={"filename": str(output_file)}), @@ -440,13 +443,14 @@ def test_dbt_only_test_definitions_and_results( test_results_path=str( (test_resources_dir / "jaffle_shop_test_results.json").resolve() ), - # this is just here to avoid needing to access datahub server - write_semantics="OVERRIDE", entities_enabled=DBTEntitiesEnabled( sources=EmitDirective.NO, seeds=EmitDirective.NO, models=EmitDirective.NO, ), + # this is just here to avoid needing to access datahub server + write_semantics="OVERRIDE", + infer_dbt_schemas=False, ), ), sink=DynamicTypedConfig(type="file", config={"filename": str(output_file)}), From 1c811efabe9be6f8410775d71f26d6f37866acc2 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 11 Oct 2023 14:46:43 -0700 Subject: [PATCH 04/21] add mvp support for dbt cll --- .../airflow-plugin/setup.py | 2 +- .../ingestion/source/dbt/dbt_common.py | 164 ++++++++++++++---- 2 files changed, 132 insertions(+), 34 deletions(-) diff --git a/metadata-ingestion-modules/airflow-plugin/setup.py b/metadata-ingestion-modules/airflow-plugin/setup.py index a5af881022d8c9..44f2e574e58928 100644 --- a/metadata-ingestion-modules/airflow-plugin/setup.py +++ b/metadata-ingestion-modules/airflow-plugin/setup.py @@ -14,7 +14,7 @@ def get_long_description(): return pathlib.Path(os.path.join(root, "README.md")).read_text() -_version = package_metadata["__version__"] +_version: str = package_metadata["__version__"] _self_pin = f"=={_version}" if not _version.endswith("dev0") else "" diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index a9ee0af3190728..de14ed3cfbbb2d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -15,7 +15,6 @@ AllowDenyPattern, ConfigEnum, ConfigModel, - ConfigurationError, LineageConfig, ) from datahub.configuration.source_common import DatasetSourceConfigMixin @@ -68,6 +67,9 @@ ) from datahub.metadata.com.linkedin.pegasus2avro.dataset import ( DatasetLineageTypeClass, + FineGrainedLineage, + FineGrainedLineageDownstreamType, + FineGrainedLineageUpstreamType, UpstreamClass, UpstreamLineage, ) @@ -287,6 +289,10 @@ class DBTCommonConfig( default=True, description="When enabled, schemas will be inferred from the dbt node definition.", ) + include_column_lineage: bool = Field( + default=False, + description="When enabled, column-level lineage will be extracted from the dbt node definition.", + ) @validator("target_platform") def validate_target_platform_value(cls, target_platform: str) -> str: @@ -357,6 +363,14 @@ class DBTColumn: datahub_data_type: Optional[SchemaFieldDataType] = None +@dataclass +class DBTColumnLineageInfo: + upstream_dbt_name: str + + upstream_col: str + downstream_col: str + + @dataclass class DBTNode: """ @@ -387,7 +401,8 @@ class DBTNode: owner: Optional[str] columns: List[DBTColumn] = field(default_factory=list) - upstream_nodes: List[str] = field(default_factory=list) + upstream_nodes: List[str] = field(default_factory=list) # list of upstream dbt_name + upstream_cll: List[DBTColumnLineageInfo] = field(default_factory=list) meta: Dict[str, Any] = field(default_factory=dict) query_tag: Dict[str, Any] = field(default_factory=dict) @@ -421,6 +436,37 @@ def get_urn( env=env, ) + def get_urn_for_upstream_lineage( + self, + dbt_platform_instance: Optional[str], + target_platform: str, + target_platform_instance: Optional[str], + env: str, + ) -> str: + """ + Get the urn to use when referencing this node in a dbt node's upstream lineage. + + If the node is a source or an ephemeral dbt node, we should point at the dbt node. + Otherwise, the node is materialized in the target platform, and so lineage should + point there. + """ + # TODO: This logic shouldn't live in the DBTNode class. It should be moved to the source. + + platform_value = DBT_PLATFORM + platform_instance_value = dbt_platform_instance + + materialized = self.materialization + if materialized in {"view", "table", "incremental", "snapshot"}: + # upstream urns point to the target platform + platform_value = target_platform + platform_instance_value = target_platform_instance + + return self.get_urn( + target_platform=platform_value, + env=env, + data_platform_instance=platform_instance_value, + ) + @property def exists_in_target_platform(self): return not (self.materialization == "ephemeral" or self.node_type == "test") @@ -494,21 +540,12 @@ def get_upstreams( upstream_manifest_node = all_nodes[upstream] # This logic creates lineages among dbt nodes. - platform_value = DBT_PLATFORM - platform_instance_value = platform_instance - - materialized = upstream_manifest_node.materialization - - if materialized in {"view", "table", "incremental", "snapshot"}: - # upstream urns point to the target platform - platform_value = target_platform - platform_instance_value = target_platform_instance - upstream_urns.append( - upstream_manifest_node.get_urn( - platform_value, - environment, - platform_instance_value, + upstream_manifest_node.get_urn_for_upstream_lineage( + dbt_platform_instance=platform_instance, + target_platform=target_platform, + target_platform_instance=target_platform_instance, + env=environment, ) ) return upstream_urns @@ -645,9 +682,10 @@ def create_test_entity_mcps( target_platform=self.config.target_platform, target_platform_instance=self.config.target_platform_instance, environment=self.config.env, - platform_instance=None, + platform_instance=self.config.platform_instance, ) + # In case a dbt test depends on multiple tables, we create separate assertions for each. for upstream_urn in sorted(upstream_urns): if self.config.entities_enabled.can_emit_node_type("test"): yield make_assertion_from_test( @@ -754,7 +792,7 @@ def _infer_schemas_and_update_cll( env=self.config.env, ) - # TODO maintain bidict of dbt node name <-> target platform urn + target_platform_urn_to_dbt_name: Dict[str, str] = {} # TODO: only process non-test nodes here @@ -780,6 +818,8 @@ def _infer_schemas_and_update_cll( self.config.env, self.config.target_platform_instance, ) + + target_platform_urn_to_dbt_name[target_node_urn] = node.dbt_name else: target_node_urn = None @@ -797,7 +837,24 @@ def _infer_schemas_and_update_cll( node.compiled_code, schema_resolver=schema_resolver ) - # TODO when saving the lineage info, drop transitive deps + # Save the column lineage. + # TODO add cte stops based on the upstreams + if ( + self.config.include_column_lineage + and sql_result + and sql_result.column_lineage + ): + node.upstream_cll = [ + DBTColumnLineageInfo( + upstream_dbt_name=target_platform_urn_to_dbt_name[ + upstream_column.table + ], + upstream_col=upstream_column.column, + downstream_col=column_lineage_info.downstream.column, + ) + for column_lineage_info in sql_result.column_lineage + for upstream_column in column_lineage_info.upstreams + ] # If we didn't fetch the schema from the graph, use the inferred schema. if not schema_fields and sql_result and sql_result.column_lineage: @@ -806,7 +863,8 @@ def _infer_schemas_and_update_cll( fieldPath=column_lineage.downstream.column, type=column_lineage.downstream.column_type or SchemaFieldDataType(type=NullTypeClass()), - nativeDataType="", + nativeDataType=column_lineage.downstream.native_column_type + or "", ) for column_lineage in sql_result.column_lineage ] @@ -821,10 +879,7 @@ def _infer_schemas_and_update_cll( schema_resolver.add_raw_schema_info( target_node_urn, { - # TODO: This raw type logic is a bit hacky. - column.name: str(column.datahub_data_type.type) - if column.datahub_data_type - else column.data_type + column.name: column.data_type or "unknown" for column in node.columns }, ) @@ -1267,27 +1322,70 @@ def _create_lineage_aspect_for_dbt_node( """ This method creates lineage amongst dbt nodes. A dbt node can be linked to other dbt nodes or a platform node. """ - upstream_urns = get_upstreams( - node.upstream_nodes, - all_nodes_map, - self.config.target_platform, - self.config.target_platform_instance, - self.config.env, - self.config.platform_instance, - ) # if a node is of type source in dbt, its upstream lineage should have the corresponding table/view # from the platform. This code block is executed when we are generating entities of type "dbt". if node.node_type == "source": - upstream_urns.append( + upstream_urns = [ node.get_urn( self.config.target_platform, self.config.env, self.config.target_platform_instance, ) + ] + cll = None + else: + upstream_urns = get_upstreams( + node.upstream_nodes, + all_nodes_map, + self.config.target_platform, + self.config.target_platform_instance, + self.config.env, + self.config.platform_instance, + ) + + node_urn = node.get_urn( + target_platform=DBT_PLATFORM, + env=self.config.env, + data_platform_instance=self.config.platform_instance, ) + + def _translate_dbt_name_to_upstream_urn(dbt_name: str) -> str: + return all_nodes_map[dbt_name].get_urn_for_upstream_lineage( + dbt_platform_instance=self.config.platform_instance, + target_platform=self.config.target_platform, + target_platform_instance=self.config.target_platform_instance, + env=self.config.env, + ) + + cll = [ + FineGrainedLineage( + upstreamType=FineGrainedLineageUpstreamType.FIELD_SET, + downstreamType=FineGrainedLineageDownstreamType.FIELD_SET, + upstreams=[ + mce_builder.make_schema_field_urn( + _translate_dbt_name_to_upstream_urn( + upstream_column.upstream_dbt_name + ), + upstream_column.upstream_col, + ) + for upstream_column in upstreams + ], + downstreams=[ + mce_builder.make_schema_field_urn(node_urn, downstream) + ], + ) + for downstream, upstreams in itertools.groupby( + node.upstream_cll, lambda x: x.downstream_col + ) + ] + if upstream_urns: upstreams_lineage_class = get_upstream_lineage(upstream_urns) + + if self.config.include_column_lineage and cll: + upstreams_lineage_class.fineGrainedLineages = cll + return upstreams_lineage_class return None From dbcad42726049a978496b274fa1ea7d85a30fcbe Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 11 Oct 2023 15:49:11 -0700 Subject: [PATCH 05/21] code cleanup --- .../ingestion/source/dbt/dbt_common.py | 30 +++++++++---------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index de14ed3cfbbb2d..232ba1347f90d8 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -15,6 +15,7 @@ AllowDenyPattern, ConfigEnum, ConfigModel, + ConfigurationError, LineageConfig, ) from datahub.configuration.source_common import DatasetSourceConfigMixin @@ -480,7 +481,6 @@ def merge_schema_fields(self, schema_fields: List[SchemaField]) -> None: if self.columns: # If we already have columns, don't overwrite them. - # TODO maybe we should augment them instead? return self.columns = [ @@ -734,7 +734,10 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: if value is not None } - inferred_schemas = self._infer_schemas_and_update_cll(all_nodes_map) + # We need to run this before filtering nodes, because the info generated + # for a filtered node may be used by an unfiltered node. + # NOTE: This method mutates the DBTNode objects directly. + self._infer_schemas_and_update_cll(all_nodes_map) nodes = self._filter_nodes(all_nodes) non_test_nodes = [ @@ -776,16 +779,16 @@ def _filter_nodes(self, all_nodes: List[DBTNode]) -> List[DBTNode]: return nodes - def _infer_schemas_and_update_cll( - self, all_nodes_map: Dict[str, DBTNode] - ) -> Dict[str, SchemaMetadata]: + def _infer_schemas_and_update_cll(self, all_nodes_map: Dict[str, DBTNode]) -> None: if not self.config.infer_dbt_schemas: - return {} + if self.config.include_column_lineage: + raise ConfigurationError( + "`infer_dbt_schemas` must be enabled to use `include_column_lineage`" + ) + return graph = self.ctx.require_graph("Using dbt with infer_dbt_schemas") - # TODO: maybe write these to the nodes directly? - schemas: Dict[str, SchemaMetadata] = {} schema_resolver = SchemaResolver( platform=self.config.target_platform, platform_instance=self.config.target_platform_instance, @@ -794,8 +797,6 @@ def _infer_schemas_and_update_cll( target_platform_urn_to_dbt_name: Dict[str, str] = {} - # TODO: only process non-test nodes here - # Iterate over the dbt nodes in topological order. # This ensures that we process upstream nodes before downstream nodes. for dbt_name in topological_sort( @@ -809,9 +810,6 @@ def _infer_schemas_and_update_cll( ): node = all_nodes_map[dbt_name] - dbt_node_urn = node.get_urn( - DBT_PLATFORM, self.config.env, self.config.platform_instance - ) if node.exists_in_target_platform: target_node_urn = node.get_urn( self.config.target_platform, @@ -833,12 +831,14 @@ def _infer_schemas_and_update_cll( # Run sql parser to infer the schema + generate column lineage. sql_result = None if node.compiled_code: + # TODO: Add CTE stops based on the upstreams list. The code as currently + # written will generate lineage to the upstreams of ephemeral nodes instead + # of the ephemeral node itself. sql_result = sqlglot_lineage( node.compiled_code, schema_resolver=schema_resolver ) # Save the column lineage. - # TODO add cte stops based on the upstreams if ( self.config.include_column_lineage and sql_result @@ -884,8 +884,6 @@ def _infer_schemas_and_update_cll( }, ) - return schemas - def create_platform_mces( self, dbt_nodes: List[DBTNode], From 3410d29cc7f0d692cb05a52bd2929ad886250976 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 11 Oct 2023 15:57:20 -0700 Subject: [PATCH 06/21] other tweaks --- .../src/datahub/ingestion/source/dbt/dbt_common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index 232ba1347f90d8..d50070436730ff 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -287,7 +287,7 @@ class DBTCommonConfig( description="When enabled, dbt test warnings will be treated as failures.", ) infer_dbt_schemas: bool = Field( - default=True, + default=False, description="When enabled, schemas will be inferred from the dbt node definition.", ) include_column_lineage: bool = Field( @@ -622,6 +622,7 @@ def get_column_type( @support_status(SupportStatus.CERTIFIED) @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion") @capability(SourceCapability.LINEAGE_COARSE, "Enabled by default") +@capability(SourceCapability.LINEAGE_FINE, "Enabled using `include_column_lineage`") class DBTSourceBase(StatefulIngestionSourceBase): def __init__(self, config: DBTCommonConfig, ctx: PipelineContext, platform: str): super().__init__(config, ctx) From 01278fed122a7eff93804c86887f2fe7ec183df4 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Fri, 13 Oct 2023 11:58:48 -0700 Subject: [PATCH 07/21] fix keyerror --- .../src/datahub/ingestion/source/dbt/dbt_common.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index d50070436730ff..f0be719e3aa61f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -855,6 +855,10 @@ def _infer_schemas_and_update_cll(self, all_nodes_map: Dict[str, DBTNode]) -> No ) for column_lineage_info in sql_result.column_lineage for upstream_column in column_lineage_info.upstreams + # Only include the CLL if the table in in the upstream list. + if target_platform_urn_to_dbt_name.get(upstream_column.table) + and target_platform_urn_to_dbt_name[upstream_column.table] + in node.upstream_nodes ] # If we didn't fetch the schema from the graph, use the inferred schema. From c3527669b196d718bf652e2e9afae43ff3a97626 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 25 Oct 2023 23:50:45 -0700 Subject: [PATCH 08/21] start fix --- .../ingestion/source/dbt/dbt_common.py | 124 +++++++++++++----- .../src/datahub/utilities/sqlglot_lineage.py | 68 +++++++--- .../unit/sql_parsing/test_sqlglot_lineage.py | 38 +++++- 3 files changed, 182 insertions(+), 48 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index f0be719e3aa61f..868b13f06d73d0 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -106,7 +106,13 @@ ) from datahub.specific.dataset import DatasetPatchBuilder from datahub.utilities.mapping import Constants, OperationProcessor -from datahub.utilities.sqlglot_lineage import SchemaResolver, sqlglot_lineage +from datahub.utilities.sqlglot_lineage import ( + SchemaResolver, + SqlParsingDebugInfo, + SqlParsingResult, + detach_ctes, + sqlglot_lineage, +) from datahub.utilities.time import datetime_to_ts_millis from datahub.utilities.topological_sort import topological_sort @@ -404,6 +410,7 @@ class DBTNode: columns: List[DBTColumn] = field(default_factory=list) upstream_nodes: List[str] = field(default_factory=list) # list of upstream dbt_name upstream_cll: List[DBTColumnLineageInfo] = field(default_factory=list) + cll_debug_info: Optional[SqlParsingDebugInfo] = None meta: Dict[str, Any] = field(default_factory=dict) query_tag: Dict[str, Any] = field(default_factory=dict) @@ -425,6 +432,8 @@ def get_urn( self, target_platform: str, env: str, + # If target_platform = dbt, this is the dbt platform instance. + # Otherwise, it's the target platform instance. data_platform_instance: Optional[str], ) -> str: db_fqn = self.get_db_fqn() @@ -437,6 +446,22 @@ def get_urn( env=env, ) + def get_urn_fake_ephemeral( + self, + target_platform: str, + env: str, + target_platform_instance: Optional[str], + ) -> str: + assert self.materialization == "ephemeral" + db_fqn = f"__datahub__dbt__ephemeral__{self.name}" + + return mce_builder.make_dataset_urn_with_platform_instance( + platform=target_platform, + name=db_fqn, + platform_instance=target_platform_instance, + env=env, + ) + def get_urn_for_upstream_lineage( self, dbt_platform_instance: Optional[str], @@ -811,20 +836,29 @@ def _infer_schemas_and_update_cll(self, all_nodes_map: Dict[str, DBTNode]) -> No ): node = all_nodes_map[dbt_name] + target_node_urn = None + should_fetch_target_node_schema = False if node.exists_in_target_platform: target_node_urn = node.get_urn( self.config.target_platform, self.config.env, self.config.target_platform_instance, ) - + should_fetch_target_node_schema = True + elif node.node_type == "ephemeral": + # For ephemeral nodes, we "pretend" that they exist in the target platform + # for schema resolution purposes. + target_node_urn = node.get_urn_fake_ephemeral( + target_platform=self.config.target_platform, + target_platform_instance=self.config.target_platform_instance, + env=self.config.env, + ) + if target_node_urn: target_platform_urn_to_dbt_name[target_node_urn] = node.dbt_name - else: - target_node_urn = None # Fetch the schema from the graph if possible. schema_fields: Optional[List[SchemaField]] = None - if target_node_urn: + if target_node_urn and should_fetch_target_node_schema: schema_metadata = graph.get_aspect(target_node_urn, SchemaMetadata) if schema_metadata: schema_fields = schema_metadata.fields @@ -832,34 +866,54 @@ def _infer_schemas_and_update_cll(self, all_nodes_map: Dict[str, DBTNode]) -> No # Run sql parser to infer the schema + generate column lineage. sql_result = None if node.compiled_code: - # TODO: Add CTE stops based on the upstreams list. The code as currently - # written will generate lineage to the upstreams of ephemeral nodes instead - # of the ephemeral node itself. - sql_result = sqlglot_lineage( - node.compiled_code, schema_resolver=schema_resolver - ) + try: + # Add CTE stops based on the upstreams list. + preprocessed_sql = detach_ctes( + node.compiled_code, + platform=schema_resolver.platform, + cte_mapping={ + f"__dbt__cte__{upstream_node.name}": upstream_node.get_urn_fake_ephemeral( + target_platform=self.config.target_platform, + target_platform_instance=self.config.target_platform_instance, + env=self.config.env, + ) + for upstream_node in [ + all_nodes_map[upstream_node_name] + for upstream_node_name in node.upstream_nodes + if upstream_node_name in all_nodes_map + ] + if upstream_node.materialization == "ephemeral" + }, + ) + except Exception as e: + sql_result = SqlParsingResult.make_from_error(e) + else: + sql_result = sqlglot_lineage( + preprocessed_sql, schema_resolver=schema_resolver + ) # Save the column lineage. - if ( - self.config.include_column_lineage - and sql_result - and sql_result.column_lineage - ): - node.upstream_cll = [ - DBTColumnLineageInfo( - upstream_dbt_name=target_platform_urn_to_dbt_name[ - upstream_column.table - ], - upstream_col=upstream_column.column, - downstream_col=column_lineage_info.downstream.column, - ) - for column_lineage_info in sql_result.column_lineage - for upstream_column in column_lineage_info.upstreams - # Only include the CLL if the table in in the upstream list. - if target_platform_urn_to_dbt_name.get(upstream_column.table) - and target_platform_urn_to_dbt_name[upstream_column.table] - in node.upstream_nodes - ] + if self.config.include_column_lineage and sql_result: + # We only save the debug info here. We're report errors based on it later, after + # applying the configured node filters. + node.cll_debug_info = sql_result.debug_info + + if sql_result.column_lineage: + node.upstream_cll = [ + DBTColumnLineageInfo( + upstream_dbt_name=target_platform_urn_to_dbt_name[ + upstream_column.table + ], + upstream_col=upstream_column.column, + downstream_col=column_lineage_info.downstream.column, + ) + for column_lineage_info in sql_result.column_lineage + for upstream_column in column_lineage_info.upstreams + # Only include the CLL if the table in in the upstream list. + if target_platform_urn_to_dbt_name.get(upstream_column.table) + and target_platform_urn_to_dbt_name[upstream_column.table] + in node.upstream_nodes + ] # If we didn't fetch the schema from the graph, use the inferred schema. if not schema_fields and sql_result and sql_result.column_lineage: @@ -1361,6 +1415,11 @@ def _translate_dbt_name_to_upstream_urn(dbt_name: str) -> str: env=self.config.env, ) + if node.cll_debug_info and node.cll_debug_info.error: + self.report.report_warning( + node.dbt_name, + f"Error parsing column lineage: {node.cll_debug_info.error}", + ) cll = [ FineGrainedLineage( upstreamType=FineGrainedLineageUpstreamType.FIELD_SET, @@ -1377,6 +1436,9 @@ def _translate_dbt_name_to_upstream_urn(dbt_name: str) -> str: downstreams=[ mce_builder.make_schema_field_urn(node_urn, downstream) ], + confidenceScore=node.cll_debug_info.confidence + if node.cll_debug_info + else None, ) for downstream, upstreams in itertools.groupby( node.upstream_cll, lambda x: x.downstream_col diff --git a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py index 526d90b2a1bfab..402bb041c355a6 100644 --- a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py +++ b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py @@ -240,6 +240,16 @@ class SqlParsingResult(_ParserBaseModel): exclude=True, ) + @classmethod + def make_from_error(cls, error: Exception) -> "SqlParsingResult": + return cls( + in_tables=[], + out_tables=[], + debug_info=SqlParsingDebugInfo( + table_error=error, + ), + ) + def _parse_statement(sql: sqlglot.exp.ExpOrStr, dialect: str) -> sqlglot.Expression: statement: sqlglot.Expression = sqlglot.maybe_parse( @@ -1102,14 +1112,47 @@ def sqlglot_lineage( default_schema=default_schema, ) except Exception as e: - return SqlParsingResult( - in_tables=[], - out_tables=[], - column_lineage=None, - debug_info=SqlParsingDebugInfo( - table_error=e, - ), - ) + return SqlParsingResult.make_from_error(e) + + +def detach_ctes( + sql: sqlglot.exp.ExpOrStr, platform: str, cte_mapping: Dict[str, str] +) -> sqlglot.exp.Expression: + """Replace CTE references with table references. + + For example, with cte_mapping = {"__cte_0": "_my_cte_table"}, the following SQL + + WITH __cte_0 AS (SELECT * FROM table1) SELECT * FROM table2 JOIN __cte_0 ON table2.id = __cte_0.id + + is transformed into + + WITH __cte_0 AS (SELECT * FROM table1) SELECT * FROM table2 JOIN _my_cte_table ON table2.id = _my_cte_table.id + + Note that the original __cte_0 definition remains in the query, but is simply not referenced. + The query optimizer should be able to remove it. + + This method makes a major assumption: that no other table/column has the same name as a + key in the cte_mapping. + """ + + dialect = _get_dialect(platform) + statement = _parse_statement(sql, dialect=dialect) + + def replace_cte_refs(node: sqlglot.exp.Expression) -> sqlglot.exp.Expression: + if ( + isinstance(node, sqlglot.exp.Identifier) + and not (node.parent and isinstance(node.parent.parent, sqlglot.exp.CTE)) + and node.name in cte_mapping + ): + # We expect node.parent to be a Table, Column, TableAlias, etc. + # There's quite a few possibilities, so this approach is easier except + # that it assumes that the cte names are unique. + new_node = sqlglot.exp.Identifier(this=cte_mapping[node.name]) + return new_node + else: + return node + + return statement.transform(replace_cte_refs, copy=False) def create_lineage_sql_parsed_result( @@ -1145,14 +1188,7 @@ def create_lineage_sql_parsed_result( default_schema=schema, ) except Exception as e: - return SqlParsingResult( - in_tables=[], - out_tables=[], - column_lineage=None, - debug_info=SqlParsingDebugInfo( - table_error=e, - ), - ) + return SqlParsingResult.make_from_error(e) finally: if needs_close: schema_resolver.close() diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py index dfc5b486abd35f..030d139d78e2ac 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py +++ b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py @@ -3,11 +3,47 @@ import pytest from datahub.testing.check_sql_parser_result import assert_sql_result -from datahub.utilities.sqlglot_lineage import _UPDATE_ARGS_NOT_SUPPORTED_BY_SELECT +from datahub.utilities.sqlglot_lineage import ( + _UPDATE_ARGS_NOT_SUPPORTED_BY_SELECT, + detach_ctes, +) RESOURCE_DIR = pathlib.Path(__file__).parent / "goldens" +def test_detach_ctes_simple(): + original = "WITH __cte_0 AS (SELECT * FROM table1) SELECT * FROM table2 JOIN __cte_0 ON table2.id = __cte_0.id" + detached_expr = detach_ctes( + original, + platform="snowflake", + cte_mapping={"__cte_0": "_my_cte_table"}, + ) + detached = detached_expr.sql(dialect="snowflake") + + assert ( + detached + == "WITH __cte_0 AS (SELECT * FROM table1) SELECT * FROM table2 JOIN _my_cte_table ON table2.id = _my_cte_table.id" + ) + + +def test_detach_ctes_with_alias(): + original = "WITH __cte_0 AS (SELECT * FROM table1) SELECT * FROM table2 JOIN __cte_0 AS tablealias ON table2.id = tablealias.id" + detached_expr = detach_ctes( + original, + platform="snowflake", + cte_mapping={"__cte_0": "_my_cte_table"}, + ) + detached = detached_expr.sql(dialect="snowflake") + + assert ( + detached + == "WITH __cte_0 AS (SELECT * FROM table1) SELECT * FROM table2 JOIN _my_cte_table AS tablealias ON table2.id = tablealias.id" + ) + + +# TODO test if detaching ctes works with BigQuery + + def test_select_max(): # The COL2 should get normalized to col2. assert_sql_result( From 741a77151d5083d92aef5eaf976eedde3717ef75 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 25 Oct 2023 23:50:53 -0700 Subject: [PATCH 09/21] more fixes --- .../ingestion/source/dbt/dbt_common.py | 57 ++++++++++--------- .../src/datahub/utilities/sqlglot_lineage.py | 23 ++++++-- .../unit/sql_parsing/test_sqlglot_lineage.py | 17 ++++++ 3 files changed, 64 insertions(+), 33 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index 868b13f06d73d0..0789a34bfb8a3f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -421,11 +421,14 @@ class DBTNode: test_info: Optional["DBTTest"] = None # only populated if node_type == 'test' test_result: Optional["DBTTestResult"] = None + @staticmethod + def _join_parts(parts: List[Optional[str]]) -> str: + assert parts + return ".".join([part for part in parts if part]) + def get_db_fqn(self) -> str: - if self.database: - fqn = f"{self.database}.{self.schema}.{self.name}" - else: - fqn = f"{self.schema}.{self.name}" + # Database might be None, but schema and name should always be present. + fqn = self._join_parts([self.database, self.schema, self.name]) return fqn.replace('"', "") def get_urn( @@ -446,21 +449,17 @@ def get_urn( env=env, ) - def get_urn_fake_ephemeral( - self, - target_platform: str, - env: str, - target_platform_instance: Optional[str], - ) -> str: - assert self.materialization == "ephemeral" - db_fqn = f"__datahub__dbt__ephemeral__{self.name}" + def is_ephemeral_model(self) -> bool: + return self.materialization == "ephemeral" - return mce_builder.make_dataset_urn_with_platform_instance( - platform=target_platform, - name=db_fqn, - platform_instance=target_platform_instance, - env=env, + def get_fake_ephemeral_table_name(self) -> str: + assert self.is_ephemeral_model() + + # Similar to get_db_fqn. + fqn = self._join_parts( + [self.database, self.schema, f"__datahub__dbt__ephemeral__{self.name}"] ) + return fqn.replace('"', "") def get_urn_for_upstream_lineage( self, @@ -495,7 +494,7 @@ def get_urn_for_upstream_lineage( @property def exists_in_target_platform(self): - return not (self.materialization == "ephemeral" or self.node_type == "test") + return not (self.is_ephemeral_model() or self.node_type == "test") def merge_schema_fields(self, schema_fields: List[SchemaField]) -> None: """ @@ -845,12 +844,13 @@ def _infer_schemas_and_update_cll(self, all_nodes_map: Dict[str, DBTNode]) -> No self.config.target_platform_instance, ) should_fetch_target_node_schema = True - elif node.node_type == "ephemeral": + elif node.is_ephemeral_model(): # For ephemeral nodes, we "pretend" that they exist in the target platform # for schema resolution purposes. - target_node_urn = node.get_urn_fake_ephemeral( - target_platform=self.config.target_platform, - target_platform_instance=self.config.target_platform_instance, + target_node_urn = mce_builder.make_dataset_urn_with_platform_instance( + platform=self.config.target_platform, + name=node.get_fake_ephemeral_table_name(), + platform_instance=self.config.target_platform_instance, env=self.config.env, ) if target_node_urn: @@ -872,17 +872,13 @@ def _infer_schemas_and_update_cll(self, all_nodes_map: Dict[str, DBTNode]) -> No node.compiled_code, platform=schema_resolver.platform, cte_mapping={ - f"__dbt__cte__{upstream_node.name}": upstream_node.get_urn_fake_ephemeral( - target_platform=self.config.target_platform, - target_platform_instance=self.config.target_platform_instance, - env=self.config.env, - ) + f"__dbt__cte__{upstream_node.name}": upstream_node.get_fake_ephemeral_table_name() for upstream_node in [ all_nodes_map[upstream_node_name] for upstream_node_name in node.upstream_nodes if upstream_node_name in all_nodes_map ] - if upstream_node.materialization == "ephemeral" + if upstream_node.is_ephemeral_model() }, ) except Exception as e: @@ -914,6 +910,11 @@ def _infer_schemas_and_update_cll(self, all_nodes_map: Dict[str, DBTNode]) -> No and target_platform_urn_to_dbt_name[upstream_column.table] in node.upstream_nodes ] + if ( + "monthly_billing_with_cust" in node.dbt_name + and node.node_type == "model" + ): + breakpoint() # If we didn't fetch the schema from the graph, use the inferred schema. if not schema_fields and sql_result and sql_result.column_lineage: diff --git a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py index 402bb041c355a6..b0dfd2f1031258 100644 --- a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py +++ b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py @@ -1141,13 +1141,26 @@ def detach_ctes( def replace_cte_refs(node: sqlglot.exp.Expression) -> sqlglot.exp.Expression: if ( isinstance(node, sqlglot.exp.Identifier) - and not (node.parent and isinstance(node.parent.parent, sqlglot.exp.CTE)) + and node.parent + and not isinstance(node.parent.parent, sqlglot.exp.CTE) and node.name in cte_mapping ): - # We expect node.parent to be a Table, Column, TableAlias, etc. - # There's quite a few possibilities, so this approach is easier except - # that it assumes that the cte names are unique. - new_node = sqlglot.exp.Identifier(this=cte_mapping[node.name]) + full_new_name = cte_mapping[node.name] + table_expr = sqlglot.maybe_parse( + full_new_name, dialect=dialect, into=sqlglot.exp.Table + ) + + # We expect node.parent to be a Table or Column. + # Either way, it should support catalog/db/name. + parent = node.parent + + if "catalog" in parent.arg_types: + parent.set("catalog", table_expr.catalog) + if "db" in parent.arg_types: + parent.set("db", table_expr.db) + + new_node = sqlglot.exp.Identifier(this=table_expr.name) + return new_node else: return node diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py index 030d139d78e2ac..480069a9828a30 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py +++ b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py @@ -41,6 +41,23 @@ def test_detach_ctes_with_alias(): ) +def test_detach_ctes_with_multipart_replacement(): + original = "WITH __cte_0 AS (SELECT * FROM table1) SELECT * FROM table2 JOIN __cte_0 ON table2.id = __cte_0.id" + detached_expr = detach_ctes( + original, + platform="snowflake", + cte_mapping={"__cte_0": "my_db.my_schema.my_table"}, + ) + detached = detached_expr.sql(dialect="snowflake") + + assert ( + detached + == "WITH __cte_0 AS (SELECT * FROM table1) SELECT * FROM table2 JOIN my_db.my_schema.my_table ON table2.id = my_db.my_schema.my_table.id" + ) + + +# TODO test lineage with a statement like this: +# SELECT * FROM table2 JOIN my_db.my_schema.my_db.my_schema.my_table ON table2.id = my_db.my_schema.my_db.my_schema.my_table.id # TODO test if detaching ctes works with BigQuery From c3a0d42ed7448e57b7048cd0d0d6fe0585a91795 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 26 Oct 2023 10:50:46 -0700 Subject: [PATCH 10/21] more tests --- .../ingestion/source/dbt/dbt_common.py | 35 ++++++++++++++--- .../goldens/test_snowflake_unused_cte.json | 39 +++++++++++++++++++ .../unit/sql_parsing/test_sqlglot_lineage.py | 31 ++++++++++++--- 3 files changed, 94 insertions(+), 11 deletions(-) create mode 100644 metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_unused_cte.json diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index 0789a34bfb8a3f..76c936a055e7be 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -544,6 +544,31 @@ def get_custom_properties(node: DBTNode) -> Dict[str, str]: return custom_properties +def _get_dbt_cte_names(name: str, target_platform: str) -> List[str]: + # Match the dbt CTE naming scheme: + # The default is defined here https://github.com/dbt-labs/dbt-core/blob/4122f6c308c88be4a24c1ea490802239a4c1abb8/core/dbt/adapters/base/relation.py#L222 + # However, since this PR https://github.com/dbt-labs/dbt-core/pull/2712, it's also possible + # for adapters to override this default. Only a handful actually do though: + # https://github.com/search?type=code&q=add_ephemeral_prefix+path:/%5Edbt%5C/adapters%5C// + + # Regardless, we need to keep the original name to work with older dbt versions. + default_cte_name = f"__dbt__cte__{name}" + + adapter_cte_names = { + "hive": f"tmp__dbt__cte__{name}", + "oracle": f"dbt__cte__{name}__", + "netezza": f"dbt__cte__{name}", + "exasol": f"dbt__CTE__{name}", + "db2": f"DBT_CTE__{name}", # ibm db2 + } + + cte_names = [default_cte_name] + if target_platform in adapter_cte_names: + cte_names.append(adapter_cte_names[target_platform]) + + return cte_names + + def get_upstreams( upstreams: List[str], all_nodes: Dict[str, DBTNode], @@ -872,13 +897,16 @@ def _infer_schemas_and_update_cll(self, all_nodes_map: Dict[str, DBTNode]) -> No node.compiled_code, platform=schema_resolver.platform, cte_mapping={ - f"__dbt__cte__{upstream_node.name}": upstream_node.get_fake_ephemeral_table_name() + cte_name: upstream_node.get_fake_ephemeral_table_name() for upstream_node in [ all_nodes_map[upstream_node_name] for upstream_node_name in node.upstream_nodes if upstream_node_name in all_nodes_map ] if upstream_node.is_ephemeral_model() + for cte_name in _get_dbt_cte_names( + upstream_node.name, schema_resolver.platform + ) }, ) except Exception as e: @@ -910,11 +938,6 @@ def _infer_schemas_and_update_cll(self, all_nodes_map: Dict[str, DBTNode]) -> No and target_platform_urn_to_dbt_name[upstream_column.table] in node.upstream_nodes ] - if ( - "monthly_billing_with_cust" in node.dbt_name - and node.node_type == "model" - ): - breakpoint() # If we didn't fetch the schema from the graph, use the inferred schema. if not schema_fields and sql_result and sql_result.column_lineage: diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_unused_cte.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_unused_cte.json new file mode 100644 index 00000000000000..3916c6dc7c5ef7 --- /dev/null +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_unused_cte.json @@ -0,0 +1,39 @@ +{ + "query_type": "SELECT", + "in_tables": [ + "urn:li:dataset:(urn:li:dataPlatform:snowflake,table1,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:snowflake,table2,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:snowflake,table3,PROD)" + ], + "out_tables": [], + "column_lineage": [ + { + "downstream": { + "table": null, + "column": "col1", + "column_type": null, + "native_column_type": null + }, + "upstreams": [ + { + "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,table1,PROD)", + "column": "col1" + } + ] + }, + { + "downstream": { + "table": null, + "column": "col6", + "column_type": null, + "native_column_type": null + }, + "upstreams": [ + { + "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,table3,PROD)", + "column": "col6" + } + ] + } + ] +} \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py index 480069a9828a30..e2939a96bb2d3f 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py +++ b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py @@ -56,11 +56,6 @@ def test_detach_ctes_with_multipart_replacement(): ) -# TODO test lineage with a statement like this: -# SELECT * FROM table2 JOIN my_db.my_schema.my_db.my_schema.my_table ON table2.id = my_db.my_schema.my_db.my_schema.my_table.id -# TODO test if detaching ctes works with BigQuery - - def test_select_max(): # The COL2 should get normalized to col2. assert_sql_result( @@ -683,6 +678,32 @@ def test_snowflake_column_cast(): ) +def test_snowflake_unused_cte(): + # For this, we expect table level lineage to include table1, but CLL should not. + assert_sql_result( + """ +WITH cte1 AS ( + SELECT col1, col2 + FROM table1 + WHERE col1 = 'value1' +), cte2 AS ( + SELECT col3, col4 + FROM table2 + WHERE col2 = 'value2' +) +SELECT cte1.col1, table3.col6 +FROM cte1 +JOIN table3 ON table3.col5 = cte1.col2 +""", + dialect="snowflake", + expected_file=RESOURCE_DIR / "test_snowflake_unused_cte.json", + ) + + +# TODO test lineage with a statement like this: +# SELECT * FROM table2 JOIN my_db.my_schema.my_db.my_schema.my_table ON table2.id = my_db.my_schema.my_db.my_schema.my_table.id + + # TODO: Add a test for setting platform_instance or env From a3bf5d7ca2235c695aadcff2a3e61fb395ccb430 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 26 Oct 2023 10:50:54 -0700 Subject: [PATCH 11/21] even more tests --- .../test_snowflake_cte_name_collision.json | 47 ++++++++++++++++ ...owflake_full_table_name_col_reference.json | 55 ++++++++++++++++++ .../unit/sql_parsing/test_sqlglot_lineage.py | 56 ++++++++++++++++++- 3 files changed, 156 insertions(+), 2 deletions(-) create mode 100644 metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_cte_name_collision.json create mode 100644 metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_full_table_name_col_reference.json diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_cte_name_collision.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_cte_name_collision.json new file mode 100644 index 00000000000000..44f1075c058ad6 --- /dev/null +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_cte_name_collision.json @@ -0,0 +1,47 @@ +{ + "query_type": "SELECT", + "in_tables": [ + "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.table1,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.table2,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.table3,PROD)" + ], + "out_tables": [], + "column_lineage": [ + { + "downstream": { + "table": null, + "column": "col2", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "native_column_type": "VARCHAR(16777216)" + }, + "upstreams": [ + { + "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.table2,PROD)", + "column": "col2" + } + ] + }, + { + "downstream": { + "table": null, + "column": "col1", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "native_column_type": "VARCHAR(16777216)" + }, + "upstreams": [ + { + "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.table3,PROD)", + "column": "col1" + } + ] + } + ] +} \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_full_table_name_col_reference.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_full_table_name_col_reference.json new file mode 100644 index 00000000000000..f8301f1e8189ee --- /dev/null +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_full_table_name_col_reference.json @@ -0,0 +1,55 @@ +{ + "query_type": "SELECT", + "in_tables": [ + "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.my_table,PROD)" + ], + "out_tables": [], + "column_lineage": [ + { + "downstream": { + "table": null, + "column": "id", + "column_type": null, + "native_column_type": null + }, + "upstreams": [ + { + "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.my_table,PROD)", + "column": "id" + } + ] + }, + { + "downstream": { + "table": null, + "column": "id_gt_100", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "INT" + }, + "upstreams": [ + { + "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.my_table,PROD)", + "column": "id" + } + ] + }, + { + "downstream": { + "table": null, + "column": "struct_field1", + "column_type": null, + "native_column_type": null + }, + "upstreams": [ + { + "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.my_table,PROD)", + "column": "struct_field.field1" + } + ] + } + ] +} \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py index e2939a96bb2d3f..e70aac97182170 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py +++ b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py @@ -700,8 +700,60 @@ def test_snowflake_unused_cte(): ) -# TODO test lineage with a statement like this: -# SELECT * FROM table2 JOIN my_db.my_schema.my_db.my_schema.my_table ON table2.id = my_db.my_schema.my_db.my_schema.my_table.id +def test_snowflake_cte_name_collision(): + # In this example, output col1 should come from table3 and not table1, since the cte is unused. + # We'll still generate table-level lineage that includes table1. + assert_sql_result( + """ +WITH cte_alias AS ( + SELECT col1, col2 + FROM table1 +) +SELECT table2.col2, cte_alias.col1 +FROM table2 +JOIN table3 AS cte_alias ON cte_alias.col2 = cte_alias.col2 +""", + dialect="snowflake", + default_db="my_db", + default_schema="my_schema", + schemas={ + "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.table1,PROD)": { + "col1": "NUMBER(38,0)", + "col2": "VARCHAR(16777216)", + }, + "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.table2,PROD)": { + "col2": "VARCHAR(16777216)", + }, + "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.table3,PROD)": { + "col1": "VARCHAR(16777216)", + "col2": "VARCHAR(16777216)", + }, + }, + expected_file=RESOURCE_DIR / "test_snowflake_cte_name_collision.json", + ) + + +def test_snowflake_full_table_name_col_reference(): + assert_sql_result( + """ +SELECT + my_db.my_schema.my_table.id, + case when my_db.my_schema.my_table.id > 100 then 1 else 0 end as id_gt_100, + my_db.my_schema.my_table.struct_field.field1 as struct_field1, +FROM my_db.my_schema.my_table +""", + dialect="snowflake", + default_db="my_db", + default_schema="my_schema", + schemas={ + "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.my_db.my_schema.my_table,PROD)": { + "id": "NUMBER(38,0)", + "struct_field": "struct", + }, + }, + expected_file=RESOURCE_DIR + / "test_snowflake_full_table_name_col_reference.json", + ) # TODO: Add a test for setting platform_instance or env From 1c439809deb2dfca2d9fa888c2205d48966d32b2 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Tue, 7 Nov 2023 20:01:01 -0800 Subject: [PATCH 12/21] fix validator --- .../ingestion/source/dbt/dbt_common.py | 31 ++++++++++++++++--- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index dd0f0048f2a383..d4f55fa8d0670f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -298,7 +298,7 @@ class DBTCommonConfig( ) include_column_lineage: bool = Field( default=False, - description="When enabled, column-level lineage will be extracted from the dbt node definition.", + description="When enabled, column-level lineage will be extracted from the dbt node definition. Requires `infer_dbt_schemas` and a datahub API instance.", ) # override default value to True. incremental_lineage: bool = Field( @@ -360,6 +360,16 @@ def meta_mapping_validator( ) return meta_mapping + @validator("include_column_lineage") + def validate_include_column_lineage( + cls, include_column_lineage: bool, values: Dict + ) -> bool: + if include_column_lineage and not values.get("infer_dbt_schemas"): + raise ValueError( + "`infer_dbt_schemas` must be enabled to use `include_column_lineage`" + ) + return include_column_lineage + @dataclass class DBTColumn: @@ -501,7 +511,7 @@ def get_urn_for_upstream_lineage( def exists_in_target_platform(self): return not (self.is_ephemeral_model() or self.node_type == "test") - def merge_schema_fields(self, schema_fields: List[SchemaField]) -> None: + def merge_inferred_schema_fields(self, schema_fields: List[SchemaField]) -> None: """ Merges the schema fields into the DBTNode. @@ -959,7 +969,18 @@ def _infer_schemas_and_update_cll(self, all_nodes_map: Dict[str, DBTNode]) -> No # Merge the schema fields into the dbt node. if schema_fields: - node.merge_schema_fields(schema_fields) + node.merge_inferred_schema_fields(schema_fields) + elif node.columns: + # If we don't have an inferred schema, fallback to the user-defined schema. + schema_fields = [ + SchemaField( + fieldPath=column.name, + type=column.datahub_data_type + or SchemaFieldDataType(type=NullTypeClass()), + nativeDataType=column.data_type or "", + ) + for column in node.columns + ] # Add the node to the schema resolver, so that CLL works for # downstream nodes. @@ -967,8 +988,8 @@ def _infer_schemas_and_update_cll(self, all_nodes_map: Dict[str, DBTNode]) -> No schema_resolver.add_raw_schema_info( target_node_urn, { - column.name: column.data_type or "unknown" - for column in node.columns + column.fieldPath: column.nativeDataType + for column in schema_fields }, ) From 89c3334054a53628a8a9d25bfb492e706ad36701 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Tue, 7 Nov 2023 23:53:34 -0800 Subject: [PATCH 13/21] fix column casing bug --- .../ingestion/source/dbt/dbt_common.py | 78 ++++++++++++------- 1 file changed, 52 insertions(+), 26 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index d4f55fa8d0670f..e860ce53e7a921 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -107,6 +107,7 @@ from datahub.specific.dataset import DatasetPatchBuilder from datahub.utilities.mapping import Constants, OperationProcessor from datahub.utilities.sqlglot_lineage import ( + SchemaInfo, SchemaResolver, SqlParsingDebugInfo, SqlParsingResult, @@ -511,7 +512,7 @@ def get_urn_for_upstream_lineage( def exists_in_target_platform(self): return not (self.is_ephemeral_model() or self.node_type == "test") - def merge_inferred_schema_fields(self, schema_fields: List[SchemaField]) -> None: + def merge_schema_fields(self, schema_fields: List[SchemaField]) -> None: """ Merges the schema fields into the DBTNode. @@ -844,6 +845,10 @@ def _filter_nodes(self, all_nodes: List[DBTNode]) -> List[DBTNode]: return nodes + @staticmethod + def _to_schema_info(schema_fields: List[SchemaField]) -> SchemaInfo: + return {column.fieldPath: column.nativeDataType for column in schema_fields} + def _infer_schemas_and_update_cll(self, all_nodes_map: Dict[str, DBTNode]) -> None: if not self.config.infer_dbt_schemas: if self.config.include_column_lineage: @@ -896,13 +901,50 @@ def _infer_schemas_and_update_cll(self, all_nodes_map: Dict[str, DBTNode]) -> No if target_node_urn: target_platform_urn_to_dbt_name[target_node_urn] = node.dbt_name - # Fetch the schema from the graph if possible. + # Our schema resolver preference is: + # 1. dbt catalog + # 2. graph + # 3. inferred + # Exception: if convert_column_urns_to_lowercase is enabled, swap 1 and 2. + # Cases 1 and 2 are handled here, and case 3 is handled after schema inference has occurred. schema_fields: Optional[List[SchemaField]] = None - if target_node_urn and should_fetch_target_node_schema: + + # Fetch the schema from the graph. + if ( + not schema_fields + and target_node_urn + and should_fetch_target_node_schema + ): schema_metadata = graph.get_aspect(target_node_urn, SchemaMetadata) if schema_metadata: schema_fields = schema_metadata.fields + # Otherwise, load the schema from the dbt catalog. + # Note that this might get the casing wrong relative to DataHub, but + # has a more up-to-date column list. + if node.columns and ( + not schema_fields or self.config.convert_column_urns_to_lowercase + ): + schema_fields = [ + SchemaField( + fieldPath=column.name.lower() + if self.config.convert_column_urns_to_lowercase + else column.name, + type=column.datahub_data_type or NullTypeClass(), + nativeDataType=column.data_type, + ) + for column in node.columns + ] + + # Add the node to the schema resolver, so that we can get column + # casing to match the upstream platform. + added_to_schema_resolver = False + if schema_fields: + schema_resolver.add_raw_schema_info( + target_node_urn, self._to_schema_info(schema_fields) + ) + added_to_schema_resolver = True + # Run sql parser to infer the schema + generate column lineage. sql_result = None if node.compiled_code: @@ -967,32 +1009,16 @@ def _infer_schemas_and_update_cll(self, all_nodes_map: Dict[str, DBTNode]) -> No for column_lineage in sql_result.column_lineage ] - # Merge the schema fields into the dbt node. - if schema_fields: - node.merge_inferred_schema_fields(schema_fields) - elif node.columns: - # If we don't have an inferred schema, fallback to the user-defined schema. - schema_fields = [ - SchemaField( - fieldPath=column.name, - type=column.datahub_data_type - or SchemaFieldDataType(type=NullTypeClass()), - nativeDataType=column.data_type or "", - ) - for column in node.columns - ] - - # Add the node to the schema resolver, so that CLL works for - # downstream nodes. - if target_node_urn and node.columns: + # Conditionally add the inferred schema to the schema resolver. + if not added_to_schema_resolver and schema_fields: schema_resolver.add_raw_schema_info( - target_node_urn, - { - column.fieldPath: column.nativeDataType - for column in schema_fields - }, + target_node_urn, self._to_schema_info(schema_fields) ) + # Merge the schema fields into the dbt node. + if schema_fields: + node.merge_schema_fields(schema_fields) + def create_platform_mces( self, dbt_nodes: List[DBTNode], From 618eb2ff3113a6260d234b6e9c3e8f3892e08ac5 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 8 Nov 2023 11:32:41 -0800 Subject: [PATCH 14/21] make graph instance not required --- .../ingestion/source/dbt/dbt_common.py | 20 ++++++++++++------- .../datahub/ingestion/source/dbt/dbt_core.py | 3 ++- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index e860ce53e7a921..31136b252efda4 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -299,7 +299,8 @@ class DBTCommonConfig( ) include_column_lineage: bool = Field( default=False, - description="When enabled, column-level lineage will be extracted from the dbt node definition. Requires `infer_dbt_schemas` and a datahub API instance.", + description="When enabled, column-level lineage will be extracted from the dbt node definition. Requires `infer_dbt_schemas` to be enabled. " + "If you run into issues where the column name casing does not match up with properly, providing a datahub_api or using the rest sink will improve accuracy.", ) # override default value to True. incremental_lineage: bool = Field( @@ -497,7 +498,13 @@ def get_urn_for_upstream_lineage( platform_instance_value = dbt_platform_instance materialized = self.materialization - if materialized in {"view", "table", "incremental", "snapshot"}: + if materialized in { + "view", + "materialized_view", + "table", + "incremental", + "snapshot", + }: # upstream urns point to the target platform platform_value = target_platform platform_instance_value = target_platform_instance @@ -788,8 +795,6 @@ def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: if self.config.write_semantics == "PATCH": self.ctx.require_graph("Using dbt with write_semantics=PATCH") - if self.config.infer_dbt_schemas: - self.ctx.require_graph("Using dbt with infer_dbt_schemas=True") all_nodes, additional_custom_props = self.load_nodes() @@ -857,7 +862,7 @@ def _infer_schemas_and_update_cll(self, all_nodes_map: Dict[str, DBTNode]) -> No ) return - graph = self.ctx.require_graph("Using dbt with infer_dbt_schemas") + graph = self.ctx.graph schema_resolver = SchemaResolver( platform=self.config.target_platform, @@ -902,8 +907,8 @@ def _infer_schemas_and_update_cll(self, all_nodes_map: Dict[str, DBTNode]) -> No target_platform_urn_to_dbt_name[target_node_urn] = node.dbt_name # Our schema resolver preference is: - # 1. dbt catalog - # 2. graph + # 1. graph + # 2. dbt catalog # 3. inferred # Exception: if convert_column_urns_to_lowercase is enabled, swap 1 and 2. # Cases 1 and 2 are handled here, and case 3 is handled after schema inference has occurred. @@ -914,6 +919,7 @@ def _infer_schemas_and_update_cll(self, all_nodes_map: Dict[str, DBTNode]) -> No not schema_fields and target_node_urn and should_fetch_target_node_schema + and graph ): schema_metadata = graph.get_aspect(target_node_urn, SchemaMetadata) if schema_metadata: diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py index dc3a84847beb24..a7703b203bceee 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py @@ -171,7 +171,8 @@ def extract_dbt_entities( catalog_type = None if catalog_node is None: - if materialization != "test": + if materialization not in {"test", "ephemeral"}: + # Test and ephemeral nodes will never show up in the catalog. report.report_warning( key, f"Entity {key} ({name}) is in manifest but missing from catalog", From f3f87c76d43448db1239378c9f2bb4370160aec3 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 8 Nov 2023 12:45:05 -0800 Subject: [PATCH 15/21] fix mypy --- .../src/datahub/ingestion/source/dbt/dbt_common.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index 31136b252efda4..8ac7578e195050 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -936,7 +936,9 @@ def _infer_schemas_and_update_cll(self, all_nodes_map: Dict[str, DBTNode]) -> No fieldPath=column.name.lower() if self.config.convert_column_urns_to_lowercase else column.name, - type=column.datahub_data_type or NullTypeClass(), + type=SchemaFieldDataType( + type=column.datahub_data_type or NullTypeClass() + ), nativeDataType=column.data_type, ) for column in node.columns @@ -945,7 +947,7 @@ def _infer_schemas_and_update_cll(self, all_nodes_map: Dict[str, DBTNode]) -> No # Add the node to the schema resolver, so that we can get column # casing to match the upstream platform. added_to_schema_resolver = False - if schema_fields: + if target_node_urn and schema_fields: schema_resolver.add_raw_schema_info( target_node_urn, self._to_schema_info(schema_fields) ) @@ -1016,7 +1018,7 @@ def _infer_schemas_and_update_cll(self, all_nodes_map: Dict[str, DBTNode]) -> No ] # Conditionally add the inferred schema to the schema resolver. - if not added_to_schema_resolver and schema_fields: + if not added_to_schema_resolver and target_node_urn and schema_fields: schema_resolver.add_raw_schema_info( target_node_urn, self._to_schema_info(schema_fields) ) From a5683c92bdd1182ce1a5388cea3be37816963672 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 8 Nov 2023 12:50:38 -0800 Subject: [PATCH 16/21] make infer dbt schemas default true --- .../src/datahub/ingestion/source/dbt/dbt_common.py | 7 +++---- metadata-ingestion/tests/integration/dbt/test_dbt.py | 4 ---- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index 8ac7578e195050..f6d3dca2b11c2c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -294,7 +294,7 @@ class DBTCommonConfig( description="When enabled, dbt test warnings will be treated as failures.", ) infer_dbt_schemas: bool = Field( - default=False, + default=True, description="When enabled, schemas will be inferred from the dbt node definition.", ) include_column_lineage: bool = Field( @@ -936,9 +936,8 @@ def _infer_schemas_and_update_cll(self, all_nodes_map: Dict[str, DBTNode]) -> No fieldPath=column.name.lower() if self.config.convert_column_urns_to_lowercase else column.name, - type=SchemaFieldDataType( - type=column.datahub_data_type or NullTypeClass() - ), + type=column.datahub_data_type + or SchemaFieldDataType(type=NullTypeClass()), nativeDataType=column.data_type, ) for column in node.columns diff --git a/metadata-ingestion/tests/integration/dbt/test_dbt.py b/metadata-ingestion/tests/integration/dbt/test_dbt.py index abf5959b1f4ec7..95b5374bbb41df 100644 --- a/metadata-ingestion/tests/integration/dbt/test_dbt.py +++ b/metadata-ingestion/tests/integration/dbt/test_dbt.py @@ -57,7 +57,6 @@ def set_paths( "target_platform": self.target_platform, "enable_meta_mapping": False, "write_semantics": "OVERRIDE", - "infer_dbt_schemas": False, "meta_mapping": { "owner": { "match": "^@(.*)", @@ -260,7 +259,6 @@ def test_dbt_tests(pytestconfig, tmp_path, mock_time, **kwargs): ), # this is just here to avoid needing to access datahub server write_semantics="OVERRIDE", - infer_dbt_schemas=False, ), ), sink=DynamicTypedConfig(type="file", config={"filename": str(output_file)}), @@ -368,7 +366,6 @@ def test_dbt_tests_only_assertions(pytestconfig, tmp_path, mock_time, **kwargs): ), # this is just here to avoid needing to access datahub server write_semantics="OVERRIDE", - infer_dbt_schemas=False, ), ), sink=DynamicTypedConfig(type="file", config={"filename": str(output_file)}), @@ -450,7 +447,6 @@ def test_dbt_only_test_definitions_and_results( ), # this is just here to avoid needing to access datahub server write_semantics="OVERRIDE", - infer_dbt_schemas=False, ), ), sink=DynamicTypedConfig(type="file", config={"filename": str(output_file)}), From d4b574aed1f476499c0a39279edee097dcdd6031 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 8 Nov 2023 12:53:13 -0800 Subject: [PATCH 17/21] make dbt files canonical --- .../dbt_enabled_with_schemas_mces_golden.json | 168 ++-- .../dbt_test_column_meta_mapping_golden.json | 184 +++-- .../dbt/dbt_test_events_golden.json | 731 ++++++++++++------ ...th_complex_owner_patterns_mces_golden.json | 153 +++- ...th_data_platform_instance_mces_golden.json | 159 ++-- ...h_non_incremental_lineage_mces_golden.json | 111 ++- ..._target_platform_instance_mces_golden.json | 159 ++-- 7 files changed, 1150 insertions(+), 515 deletions(-) diff --git a/metadata-ingestion/tests/integration/dbt/dbt_enabled_with_schemas_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_enabled_with_schemas_mces_golden.json index 16df7b8e51b24f..23ce6bc4505e56 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_enabled_with_schemas_mces_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_enabled_with_schemas_mces_golden.json @@ -14,7 +14,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-schemas-dbt-enabled" + "runId": "dbt-test-with-schemas-dbt-enabled", + "lastRunId": "no-run-id-provided" } }, { @@ -176,7 +177,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-schemas-dbt-enabled" + "runId": "dbt-test-with-schemas-dbt-enabled", + "lastRunId": "no-run-id-provided" } }, { @@ -195,7 +197,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-schemas-dbt-enabled" + "runId": "dbt-test-with-schemas-dbt-enabled", + "lastRunId": "no-run-id-provided" } }, { @@ -355,7 +358,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-schemas-dbt-enabled" + "runId": "dbt-test-with-schemas-dbt-enabled", + "lastRunId": "no-run-id-provided" } }, { @@ -373,7 +377,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-schemas-dbt-enabled" + "runId": "dbt-test-with-schemas-dbt-enabled", + "lastRunId": "no-run-id-provided" } }, { @@ -575,7 +580,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-schemas-dbt-enabled" + "runId": "dbt-test-with-schemas-dbt-enabled", + "lastRunId": "no-run-id-provided" } }, { @@ -594,7 +600,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-schemas-dbt-enabled" + "runId": "dbt-test-with-schemas-dbt-enabled", + "lastRunId": "no-run-id-provided" } }, { @@ -712,7 +719,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-schemas-dbt-enabled" + "runId": "dbt-test-with-schemas-dbt-enabled", + "lastRunId": "no-run-id-provided" } }, { @@ -730,7 +738,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-schemas-dbt-enabled" + "runId": "dbt-test-with-schemas-dbt-enabled", + "lastRunId": "no-run-id-provided" } }, { @@ -882,7 +891,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-schemas-dbt-enabled" + "runId": "dbt-test-with-schemas-dbt-enabled", + "lastRunId": "no-run-id-provided" } }, { @@ -900,7 +910,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-schemas-dbt-enabled" + "runId": "dbt-test-with-schemas-dbt-enabled", + "lastRunId": "no-run-id-provided" } }, { @@ -1070,7 +1081,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-schemas-dbt-enabled" + "runId": "dbt-test-with-schemas-dbt-enabled", + "lastRunId": "no-run-id-provided" } }, { @@ -1088,7 +1100,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-schemas-dbt-enabled" + "runId": "dbt-test-with-schemas-dbt-enabled", + "lastRunId": "no-run-id-provided" } }, { @@ -1198,7 +1211,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-schemas-dbt-enabled" + "runId": "dbt-test-with-schemas-dbt-enabled", + "lastRunId": "no-run-id-provided" } }, { @@ -1216,7 +1230,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-schemas-dbt-enabled" + "runId": "dbt-test-with-schemas-dbt-enabled", + "lastRunId": "no-run-id-provided" } }, { @@ -1338,7 +1353,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-schemas-dbt-enabled" + "runId": "dbt-test-with-schemas-dbt-enabled", + "lastRunId": "no-run-id-provided" } }, { @@ -1356,7 +1372,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-schemas-dbt-enabled" + "runId": "dbt-test-with-schemas-dbt-enabled", + "lastRunId": "no-run-id-provided" } }, { @@ -1486,7 +1503,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-schemas-dbt-enabled" + "runId": "dbt-test-with-schemas-dbt-enabled", + "lastRunId": "no-run-id-provided" } }, { @@ -1504,7 +1522,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-schemas-dbt-enabled" + "runId": "dbt-test-with-schemas-dbt-enabled", + "lastRunId": "no-run-id-provided" } }, { @@ -1698,7 +1717,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-schemas-dbt-enabled" + "runId": "dbt-test-with-schemas-dbt-enabled", + "lastRunId": "no-run-id-provided" } }, { @@ -1716,7 +1736,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-schemas-dbt-enabled" + "runId": "dbt-test-with-schemas-dbt-enabled", + "lastRunId": "no-run-id-provided" } }, { @@ -1862,7 +1883,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-schemas-dbt-enabled" + "runId": "dbt-test-with-schemas-dbt-enabled", + "lastRunId": "no-run-id-provided" } }, { @@ -1880,7 +1902,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-schemas-dbt-enabled" + "runId": "dbt-test-with-schemas-dbt-enabled", + "lastRunId": "no-run-id-provided" } }, { @@ -2047,7 +2070,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-schemas-dbt-enabled" + "runId": "dbt-test-with-schemas-dbt-enabled", + "lastRunId": "no-run-id-provided" } }, { @@ -2065,7 +2089,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-schemas-dbt-enabled" + "runId": "dbt-test-with-schemas-dbt-enabled", + "lastRunId": "no-run-id-provided" } }, { @@ -2211,7 +2236,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-schemas-dbt-enabled" + "runId": "dbt-test-with-schemas-dbt-enabled", + "lastRunId": "no-run-id-provided" } }, { @@ -2229,7 +2255,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-schemas-dbt-enabled" + "runId": "dbt-test-with-schemas-dbt-enabled", + "lastRunId": "no-run-id-provided" } }, { @@ -2375,7 +2402,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-schemas-dbt-enabled" + "runId": "dbt-test-with-schemas-dbt-enabled", + "lastRunId": "no-run-id-provided" } }, { @@ -2393,7 +2421,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-schemas-dbt-enabled" + "runId": "dbt-test-with-schemas-dbt-enabled", + "lastRunId": "no-run-id-provided" } }, { @@ -2539,7 +2568,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-schemas-dbt-enabled" + "runId": "dbt-test-with-schemas-dbt-enabled", + "lastRunId": "no-run-id-provided" } }, { @@ -2557,7 +2587,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-schemas-dbt-enabled" + "runId": "dbt-test-with-schemas-dbt-enabled", + "lastRunId": "no-run-id-provided" } }, { @@ -2703,7 +2734,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-schemas-dbt-enabled" + "runId": "dbt-test-with-schemas-dbt-enabled", + "lastRunId": "no-run-id-provided" } }, { @@ -2712,12 +2744,25 @@ "changeType": "PATCH", "aspectName": "upstreamLineage", "aspect": { - "value": "[{\"op\": \"add\", \"path\": \"/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Cpagila.dbt_postgres.an-aliased-view-for-monthly-billing%2CPROD%29\", \"value\": {\"auditStamp\": {\"time\": 1643871600000, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD)\", \"type\": \"TRANSFORMED\"}}]", - "contentType": "application/json-patch+json" + "json": [ + { + "op": "add", + "path": "/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Cpagila.dbt_postgres.an-aliased-view-for-monthly-billing%2CPROD%29", + "value": { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD)", + "type": "TRANSFORMED" + } + } + ] }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-schemas-dbt-enabled" + "runId": "dbt-test-with-schemas-dbt-enabled", + "lastRunId": "no-run-id-provided" } }, { @@ -2726,12 +2771,25 @@ "changeType": "PATCH", "aspectName": "upstreamLineage", "aspect": { - "value": "[{\"op\": \"add\", \"path\": \"/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Cpagila.dbt_postgres.an-aliased-view-for-payments%2CPROD%29\", \"value\": {\"auditStamp\": {\"time\": 1643871600000, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD)\", \"type\": \"TRANSFORMED\"}}]", - "contentType": "application/json-patch+json" + "json": [ + { + "op": "add", + "path": "/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Cpagila.dbt_postgres.an-aliased-view-for-payments%2CPROD%29", + "value": { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD)", + "type": "TRANSFORMED" + } + } + ] }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-schemas-dbt-enabled" + "runId": "dbt-test-with-schemas-dbt-enabled", + "lastRunId": "no-run-id-provided" } }, { @@ -2740,12 +2798,25 @@ "changeType": "PATCH", "aspectName": "upstreamLineage", "aspect": { - "value": "[{\"op\": \"add\", \"path\": \"/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Cpagila.dbt_postgres.payments_by_customer_by_month%2CPROD%29\", \"value\": {\"auditStamp\": {\"time\": 1643871600000, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.payments_by_customer_by_month,PROD)\", \"type\": \"TRANSFORMED\"}}]", - "contentType": "application/json-patch+json" + "json": [ + { + "op": "add", + "path": "/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Cpagila.dbt_postgres.payments_by_customer_by_month%2CPROD%29", + "value": { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.payments_by_customer_by_month,PROD)", + "type": "TRANSFORMED" + } + } + ] }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-schemas-dbt-enabled" + "runId": "dbt-test-with-schemas-dbt-enabled", + "lastRunId": "no-run-id-provided" } }, { @@ -2760,7 +2831,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-schemas-dbt-enabled" + "runId": "dbt-test-with-schemas-dbt-enabled", + "lastRunId": "no-run-id-provided" } }, { @@ -2775,7 +2847,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-schemas-dbt-enabled" + "runId": "dbt-test-with-schemas-dbt-enabled", + "lastRunId": "no-run-id-provided" } }, { @@ -2790,7 +2863,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-schemas-dbt-enabled" + "runId": "dbt-test-with-schemas-dbt-enabled", + "lastRunId": "no-run-id-provided" } }, { @@ -2805,7 +2879,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-schemas-dbt-enabled" + "runId": "dbt-test-with-schemas-dbt-enabled", + "lastRunId": "no-run-id-provided" } }, { @@ -2820,7 +2895,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-schemas-dbt-enabled" + "runId": "dbt-test-with-schemas-dbt-enabled", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_column_meta_mapping_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_column_meta_mapping_golden.json index 4557cb03248291..50a4afb551250b 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_column_meta_mapping_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_column_meta_mapping_golden.json @@ -14,7 +14,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-column-meta-mapping" + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" } }, { @@ -118,7 +119,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-column-meta-mapping" + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" } }, { @@ -137,7 +139,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-column-meta-mapping" + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" } }, { @@ -275,7 +278,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-column-meta-mapping" + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" } }, { @@ -293,7 +297,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-column-meta-mapping" + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" } }, { @@ -487,7 +492,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-column-meta-mapping" + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" } }, { @@ -506,7 +512,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-column-meta-mapping" + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" } }, { @@ -652,7 +659,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-column-meta-mapping" + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" } }, { @@ -670,7 +678,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-column-meta-mapping" + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" } }, { @@ -920,7 +929,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-column-meta-mapping" + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" } }, { @@ -938,7 +948,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-column-meta-mapping" + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" } }, { @@ -1084,7 +1095,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-column-meta-mapping" + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" } }, { @@ -1102,7 +1114,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-column-meta-mapping" + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" } }, { @@ -1272,7 +1285,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-column-meta-mapping" + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" } }, { @@ -1290,7 +1304,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-column-meta-mapping" + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" } }, { @@ -1400,7 +1415,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-column-meta-mapping" + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" } }, { @@ -1418,7 +1434,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-column-meta-mapping" + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" } }, { @@ -1540,7 +1557,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-column-meta-mapping" + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" } }, { @@ -1558,7 +1576,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-column-meta-mapping" + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" } }, { @@ -1688,7 +1707,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-column-meta-mapping" + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" } }, { @@ -1706,7 +1726,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-column-meta-mapping" + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" } }, { @@ -1900,7 +1921,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-column-meta-mapping" + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" } }, { @@ -1918,7 +1940,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-column-meta-mapping" + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" } }, { @@ -2064,7 +2087,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-column-meta-mapping" + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" } }, { @@ -2082,7 +2106,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-column-meta-mapping" + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" } }, { @@ -2249,7 +2274,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-column-meta-mapping" + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" } }, { @@ -2267,7 +2293,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-column-meta-mapping" + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" } }, { @@ -2413,7 +2440,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-column-meta-mapping" + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" } }, { @@ -2431,7 +2459,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-column-meta-mapping" + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" } }, { @@ -2577,7 +2606,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-column-meta-mapping" + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" } }, { @@ -2595,7 +2625,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-column-meta-mapping" + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" } }, { @@ -2741,7 +2772,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-column-meta-mapping" + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" } }, { @@ -2759,7 +2791,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-column-meta-mapping" + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" } }, { @@ -2905,7 +2938,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-column-meta-mapping" + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" } }, { @@ -2914,12 +2948,25 @@ "changeType": "PATCH", "aspectName": "upstreamLineage", "aspect": { - "value": "[{\"op\": \"add\", \"path\": \"/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Cpagila.public.an-aliased-view-for-monthly-billing%2CPROD%29\", \"value\": {\"auditStamp\": {\"time\": 1643871600000, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD)\", \"type\": \"TRANSFORMED\"}}]", - "contentType": "application/json-patch+json" + "json": [ + { + "op": "add", + "path": "/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Cpagila.public.an-aliased-view-for-monthly-billing%2CPROD%29", + "value": { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD)", + "type": "TRANSFORMED" + } + } + ] }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-column-meta-mapping" + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" } }, { @@ -2928,12 +2975,25 @@ "changeType": "PATCH", "aspectName": "upstreamLineage", "aspect": { - "value": "[{\"op\": \"add\", \"path\": \"/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Cpagila.public.an_aliased_view_for_payments%2CPROD%29\", \"value\": {\"auditStamp\": {\"time\": 1643871600000, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD)\", \"type\": \"TRANSFORMED\"}}]", - "contentType": "application/json-patch+json" + "json": [ + { + "op": "add", + "path": "/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Cpagila.public.an_aliased_view_for_payments%2CPROD%29", + "value": { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD)", + "type": "TRANSFORMED" + } + } + ] }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-column-meta-mapping" + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" } }, { @@ -2942,12 +3002,25 @@ "changeType": "PATCH", "aspectName": "upstreamLineage", "aspect": { - "value": "[{\"op\": \"add\", \"path\": \"/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Cpagila.public.payments_by_customer_by_month%2CPROD%29\", \"value\": {\"auditStamp\": {\"time\": 1643871600000, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payments_by_customer_by_month,PROD)\", \"type\": \"TRANSFORMED\"}}]", - "contentType": "application/json-patch+json" + "json": [ + { + "op": "add", + "path": "/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Cpagila.public.payments_by_customer_by_month%2CPROD%29", + "value": { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payments_by_customer_by_month,PROD)", + "type": "TRANSFORMED" + } + } + ] }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-column-meta-mapping" + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" } }, { @@ -2956,12 +3029,25 @@ "changeType": "PATCH", "aspectName": "upstreamLineage", "aspect": { - "value": "[{\"op\": \"add\", \"path\": \"/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Cpagila.public.customer_snapshot%2CPROD%29\", \"value\": {\"auditStamp\": {\"time\": 1643871600000, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD)\", \"type\": \"TRANSFORMED\"}}]", - "contentType": "application/json-patch+json" + "json": [ + { + "op": "add", + "path": "/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Cpagila.public.customer_snapshot%2CPROD%29", + "value": { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD)", + "type": "TRANSFORMED" + } + } + ] }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-column-meta-mapping" + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" } }, { @@ -2976,7 +3062,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-column-meta-mapping" + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" } }, { @@ -2991,7 +3078,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-column-meta-mapping" + "runId": "dbt-column-meta-mapping", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_events_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_events_golden.json index 086c5a78e92a45..3e8ddf317f387e 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_events_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_events_golden.json @@ -15,7 +15,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -203,7 +204,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -222,7 +224,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -429,7 +432,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -447,7 +451,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -565,7 +570,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -583,7 +589,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -713,7 +720,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -731,7 +739,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -861,7 +870,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -878,7 +888,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -975,7 +986,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -992,7 +1004,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1101,7 +1114,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1118,7 +1132,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1227,7 +1242,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1236,12 +1252,25 @@ "changeType": "PATCH", "aspectName": "upstreamLineage", "aspect": { - "value": "[{\"op\": \"add\", \"path\": \"/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Ccalm-pagoda-323403.jaffle_shop.customers%2CPROD%29\", \"value\": {\"auditStamp\": {\"time\": 1643871600000, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:dbt,calm-pagoda-323403.jaffle_shop.customers,PROD)\", \"type\": \"TRANSFORMED\"}}]", - "contentType": "application/json-patch+json" + "json": [ + { + "op": "add", + "path": "/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Ccalm-pagoda-323403.jaffle_shop.customers%2CPROD%29", + "value": { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,calm-pagoda-323403.jaffle_shop.customers,PROD)", + "type": "TRANSFORMED" + } + } + ] }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1250,12 +1279,25 @@ "changeType": "PATCH", "aspectName": "upstreamLineage", "aspect": { - "value": "[{\"op\": \"add\", \"path\": \"/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Ccalm-pagoda-323403.jaffle_shop.orders%2CPROD%29\", \"value\": {\"auditStamp\": {\"time\": 1643871600000, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:dbt,calm-pagoda-323403.jaffle_shop.orders,PROD)\", \"type\": \"TRANSFORMED\"}}]", - "contentType": "application/json-patch+json" + "json": [ + { + "op": "add", + "path": "/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Ccalm-pagoda-323403.jaffle_shop.orders%2CPROD%29", + "value": { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,calm-pagoda-323403.jaffle_shop.orders,PROD)", + "type": "TRANSFORMED" + } + } + ] }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1264,12 +1306,25 @@ "changeType": "PATCH", "aspectName": "upstreamLineage", "aspect": { - "value": "[{\"op\": \"add\", \"path\": \"/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Ccalm-pagoda-323403.jaffle_shop.stg_customers%2CPROD%29\", \"value\": {\"auditStamp\": {\"time\": 1643871600000, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:dbt,calm-pagoda-323403.jaffle_shop.stg_customers,PROD)\", \"type\": \"TRANSFORMED\"}}]", - "contentType": "application/json-patch+json" + "json": [ + { + "op": "add", + "path": "/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Ccalm-pagoda-323403.jaffle_shop.stg_customers%2CPROD%29", + "value": { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,calm-pagoda-323403.jaffle_shop.stg_customers,PROD)", + "type": "TRANSFORMED" + } + } + ] }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1278,12 +1333,25 @@ "changeType": "PATCH", "aspectName": "upstreamLineage", "aspect": { - "value": "[{\"op\": \"add\", \"path\": \"/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Ccalm-pagoda-323403.jaffle_shop.stg_orders%2CPROD%29\", \"value\": {\"auditStamp\": {\"time\": 1643871600000, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:dbt,calm-pagoda-323403.jaffle_shop.stg_orders,PROD)\", \"type\": \"TRANSFORMED\"}}]", - "contentType": "application/json-patch+json" + "json": [ + { + "op": "add", + "path": "/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Ccalm-pagoda-323403.jaffle_shop.stg_orders%2CPROD%29", + "value": { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,calm-pagoda-323403.jaffle_shop.stg_orders,PROD)", + "type": "TRANSFORMED" + } + } + ] }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1292,12 +1360,25 @@ "changeType": "PATCH", "aspectName": "upstreamLineage", "aspect": { - "value": "[{\"op\": \"add\", \"path\": \"/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Ccalm-pagoda-323403.jaffle_shop.stg_payments%2CPROD%29\", \"value\": {\"auditStamp\": {\"time\": 1643871600000, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:dbt,calm-pagoda-323403.jaffle_shop.stg_payments,PROD)\", \"type\": \"TRANSFORMED\"}}]", - "contentType": "application/json-patch+json" + "json": [ + { + "op": "add", + "path": "/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Ccalm-pagoda-323403.jaffle_shop.stg_payments%2CPROD%29", + "value": { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,calm-pagoda-323403.jaffle_shop.stg_payments,PROD)", + "type": "TRANSFORMED" + } + } + ] }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1306,12 +1387,25 @@ "changeType": "PATCH", "aspectName": "upstreamLineage", "aspect": { - "value": "[{\"op\": \"add\", \"path\": \"/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Ccalm-pagoda-323403.jaffle_shop.raw_customers%2CPROD%29\", \"value\": {\"auditStamp\": {\"time\": 1643871600000, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:dbt,calm-pagoda-323403.jaffle_shop.raw_customers,PROD)\", \"type\": \"TRANSFORMED\"}}]", - "contentType": "application/json-patch+json" + "json": [ + { + "op": "add", + "path": "/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Ccalm-pagoda-323403.jaffle_shop.raw_customers%2CPROD%29", + "value": { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,calm-pagoda-323403.jaffle_shop.raw_customers,PROD)", + "type": "TRANSFORMED" + } + } + ] }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1320,12 +1414,25 @@ "changeType": "PATCH", "aspectName": "upstreamLineage", "aspect": { - "value": "[{\"op\": \"add\", \"path\": \"/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Ccalm-pagoda-323403.jaffle_shop.raw_orders%2CPROD%29\", \"value\": {\"auditStamp\": {\"time\": 1643871600000, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:dbt,calm-pagoda-323403.jaffle_shop.raw_orders,PROD)\", \"type\": \"TRANSFORMED\"}}]", - "contentType": "application/json-patch+json" + "json": [ + { + "op": "add", + "path": "/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Ccalm-pagoda-323403.jaffle_shop.raw_orders%2CPROD%29", + "value": { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,calm-pagoda-323403.jaffle_shop.raw_orders,PROD)", + "type": "TRANSFORMED" + } + } + ] }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1334,12 +1441,25 @@ "changeType": "PATCH", "aspectName": "upstreamLineage", "aspect": { - "value": "[{\"op\": \"add\", \"path\": \"/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Ccalm-pagoda-323403.jaffle_shop.raw_payments%2CPROD%29\", \"value\": {\"auditStamp\": {\"time\": 1643871600000, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:dbt,calm-pagoda-323403.jaffle_shop.raw_payments,PROD)\", \"type\": \"TRANSFORMED\"}}]", - "contentType": "application/json-patch+json" + "json": [ + { + "op": "add", + "path": "/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Ccalm-pagoda-323403.jaffle_shop.raw_payments%2CPROD%29", + "value": { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,calm-pagoda-323403.jaffle_shop.raw_payments,PROD)", + "type": "TRANSFORMED" + } + } + ] }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1354,7 +1474,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1397,7 +1518,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1408,23 +1530,24 @@ "aspect": { "json": { "timestampMillis": 1655565131058, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" - }, "runId": "c7a6b778-0e0f-4789-b567-ca7e124a6840", - "assertionUrn": "urn:li:assertion:b052a324c05327985f3b579a19ad7579", "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,calm-pagoda-323403.jaffle_shop.orders,PROD)", "status": "COMPLETE", "result": { "type": "SUCCESS", "nativeResults": {} + }, + "assertionUrn": "urn:li:assertion:b052a324c05327985f3b579a19ad7579", + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" } } }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1439,7 +1562,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1482,7 +1606,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1493,23 +1618,24 @@ "aspect": { "json": { "timestampMillis": 1655565131075, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" - }, "runId": "c7a6b778-0e0f-4789-b567-ca7e124a6840", - "assertionUrn": "urn:li:assertion:da743330013b7e3e3707ac6e526ab408", "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,calm-pagoda-323403.jaffle_shop.stg_orders,PROD)", "status": "COMPLETE", "result": { "type": "SUCCESS", "nativeResults": {} + }, + "assertionUrn": "urn:li:assertion:da743330013b7e3e3707ac6e526ab408", + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" } } }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1524,7 +1650,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1567,7 +1694,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1578,23 +1706,24 @@ "aspect": { "json": { "timestampMillis": 1655565131073, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" - }, "runId": "c7a6b778-0e0f-4789-b567-ca7e124a6840", - "assertionUrn": "urn:li:assertion:2887b9c826e0be6296a37833bdc380bd", "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,calm-pagoda-323403.jaffle_shop.stg_payments,PROD)", "status": "COMPLETE", "result": { "type": "SUCCESS", "nativeResults": {} + }, + "assertionUrn": "urn:li:assertion:2887b9c826e0be6296a37833bdc380bd", + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" } } }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1609,7 +1738,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1640,7 +1770,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1651,23 +1782,24 @@ "aspect": { "json": { "timestampMillis": 1655565131077, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" - }, "runId": "c7a6b778-0e0f-4789-b567-ca7e124a6840", - "assertionUrn": "urn:li:assertion:591d8dc8939e0cf9bf0fd03264ad1a0e", "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,calm-pagoda-323403.jaffle_shop.orders,PROD)", "status": "COMPLETE", "result": { "type": "SUCCESS", "nativeResults": {} + }, + "assertionUrn": "urn:li:assertion:591d8dc8939e0cf9bf0fd03264ad1a0e", + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" } } }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1682,7 +1814,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1732,7 +1865,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1747,7 +1881,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1791,7 +1926,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1802,12 +1938,7 @@ "aspect": { "json": { "timestampMillis": 1655565137668, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" - }, "runId": "c7a6b778-0e0f-4789-b567-ca7e124a6840", - "assertionUrn": "urn:li:assertion:bf7fd2b46d2c32ee9bb036acd1559782", "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,calm-pagoda-323403.jaffle_shop.customers,PROD)", "status": "COMPLETE", "result": { @@ -1815,12 +1946,18 @@ "nativeResults": { "message": "Database Error in test dbt_expectations_expect_column_values_to_be_in_set_customers_customer_id__customer_id_is_not_null__0__1__2 (models/schema.yml)\n No matching signature for operator = for argument types: INT64, STRING. Supported signature: ANY = ANY at [46:25]\n compiled SQL at target/run/jaffle_shop/models/schema.yml/dbt_expectations_expect_column_e42202dc29e1149de0f5c3966219796d.sql" } + }, + "assertionUrn": "urn:li:assertion:bf7fd2b46d2c32ee9bb036acd1559782", + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" } } }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1835,7 +1972,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1874,7 +2012,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1885,12 +2024,7 @@ "aspect": { "json": { "timestampMillis": 1655565137668, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" - }, "runId": "c7a6b778-0e0f-4789-b567-ca7e124a6840", - "assertionUrn": "urn:li:assertion:1c217b7587a0cad47a07a09bfe154055", "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,calm-pagoda-323403.jaffle_shop.orders,PROD)", "status": "COMPLETE", "result": { @@ -1898,12 +2032,18 @@ "nativeResults": { "message": "Database Error in test dbt_expectations_expect_column_values_to_not_be_in_set_orders_credit_card_amount__credit_card_amount_is_not_null__0 (models/schema.yml)\n No matching signature for operator = for argument types: FLOAT64, STRING. Supported signature: ANY = ANY at [36:25]\n compiled SQL at target/run/jaffle_shop/models/schema.yml/dbt_expectations_expect_column_fdf581b1071168614662824120d65b90.sql" } + }, + "assertionUrn": "urn:li:assertion:1c217b7587a0cad47a07a09bfe154055", + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" } } }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1918,7 +2058,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1954,7 +2095,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1965,23 +2107,24 @@ "aspect": { "json": { "timestampMillis": 1655565132560, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" - }, "runId": "c7a6b778-0e0f-4789-b567-ca7e124a6840", - "assertionUrn": "urn:li:assertion:44519aa345bf3ea896179f9f352ae946", "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,calm-pagoda-323403.jaffle_shop.customers,PROD)", "status": "COMPLETE", "result": { "type": "SUCCESS", "nativeResults": {} + }, + "assertionUrn": "urn:li:assertion:44519aa345bf3ea896179f9f352ae946", + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" } } }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1996,7 +2139,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2032,7 +2176,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2043,23 +2188,24 @@ "aspect": { "json": { "timestampMillis": 1655565133585, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" - }, "runId": "c7a6b778-0e0f-4789-b567-ca7e124a6840", - "assertionUrn": "urn:li:assertion:bbd78a070092f54313153abec49f6f31", "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,calm-pagoda-323403.jaffle_shop.orders,PROD)", "status": "COMPLETE", "result": { "type": "SUCCESS", "nativeResults": {} + }, + "assertionUrn": "urn:li:assertion:bbd78a070092f54313153abec49f6f31", + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" } } }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2074,7 +2220,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2110,7 +2257,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2121,23 +2269,24 @@ "aspect": { "json": { "timestampMillis": 1655565133591, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" - }, "runId": "c7a6b778-0e0f-4789-b567-ca7e124a6840", - "assertionUrn": "urn:li:assertion:52d06197762e3608d94609e96f03a0a7", "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,calm-pagoda-323403.jaffle_shop.orders,PROD)", "status": "COMPLETE", "result": { "type": "SUCCESS", "nativeResults": {} + }, + "assertionUrn": "urn:li:assertion:52d06197762e3608d94609e96f03a0a7", + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" } } }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2152,7 +2301,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2188,7 +2338,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2199,23 +2350,24 @@ "aspect": { "json": { "timestampMillis": 1655565133595, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" - }, "runId": "c7a6b778-0e0f-4789-b567-ca7e124a6840", - "assertionUrn": "urn:li:assertion:ca065a99637630468f688717590beeab", "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,calm-pagoda-323403.jaffle_shop.orders,PROD)", "status": "COMPLETE", "result": { "type": "SUCCESS", "nativeResults": {} + }, + "assertionUrn": "urn:li:assertion:ca065a99637630468f688717590beeab", + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" } } }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2230,7 +2382,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2266,7 +2419,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2277,23 +2431,24 @@ "aspect": { "json": { "timestampMillis": 1655565134031, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" - }, "runId": "c7a6b778-0e0f-4789-b567-ca7e124a6840", - "assertionUrn": "urn:li:assertion:7a305acc5fc049dc9bbd141b814461d0", "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,calm-pagoda-323403.jaffle_shop.orders,PROD)", "status": "COMPLETE", "result": { "type": "SUCCESS", "nativeResults": {} + }, + "assertionUrn": "urn:li:assertion:7a305acc5fc049dc9bbd141b814461d0", + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" } } }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2308,7 +2463,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2344,7 +2500,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2355,23 +2512,24 @@ "aspect": { "json": { "timestampMillis": 1655565134482, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" - }, "runId": "c7a6b778-0e0f-4789-b567-ca7e124a6840", - "assertionUrn": "urn:li:assertion:11087a3d7ae178df22c42922ac8ef8ad", "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,calm-pagoda-323403.jaffle_shop.orders,PROD)", "status": "COMPLETE", "result": { "type": "SUCCESS", "nativeResults": {} + }, + "assertionUrn": "urn:li:assertion:11087a3d7ae178df22c42922ac8ef8ad", + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" } } }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2386,7 +2544,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2422,7 +2581,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2433,23 +2593,24 @@ "aspect": { "json": { "timestampMillis": 1655565134485, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" - }, "runId": "c7a6b778-0e0f-4789-b567-ca7e124a6840", - "assertionUrn": "urn:li:assertion:b301bb47cc4ebce4e78a194b3de11f25", "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,calm-pagoda-323403.jaffle_shop.orders,PROD)", "status": "COMPLETE", "result": { "type": "SUCCESS", "nativeResults": {} + }, + "assertionUrn": "urn:li:assertion:b301bb47cc4ebce4e78a194b3de11f25", + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" } } }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2464,7 +2625,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2500,7 +2662,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2511,23 +2674,24 @@ "aspect": { "json": { "timestampMillis": 1655565134493, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" - }, "runId": "c7a6b778-0e0f-4789-b567-ca7e124a6840", - "assertionUrn": "urn:li:assertion:2e9117138dcc9facda66f1efd55a8cd7", "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,calm-pagoda-323403.jaffle_shop.orders,PROD)", "status": "COMPLETE", "result": { "type": "SUCCESS", "nativeResults": {} + }, + "assertionUrn": "urn:li:assertion:2e9117138dcc9facda66f1efd55a8cd7", + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" } } }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2542,7 +2706,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2578,7 +2743,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2589,23 +2755,24 @@ "aspect": { "json": { "timestampMillis": 1655565134966, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" - }, "runId": "c7a6b778-0e0f-4789-b567-ca7e124a6840", - "assertionUrn": "urn:li:assertion:25ebf4faa9b1654ef54c46d975ca0a81", "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,calm-pagoda-323403.jaffle_shop.stg_customers,PROD)", "status": "COMPLETE", "result": { "type": "SUCCESS", "nativeResults": {} + }, + "assertionUrn": "urn:li:assertion:25ebf4faa9b1654ef54c46d975ca0a81", + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" } } }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2620,7 +2787,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2656,7 +2824,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2667,23 +2836,24 @@ "aspect": { "json": { "timestampMillis": 1655565135368, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" - }, "runId": "c7a6b778-0e0f-4789-b567-ca7e124a6840", - "assertionUrn": "urn:li:assertion:b03abcc447aac70bbebb22a8a9d7dbbe", "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,calm-pagoda-323403.jaffle_shop.stg_orders,PROD)", "status": "COMPLETE", "result": { "type": "SUCCESS", "nativeResults": {} + }, + "assertionUrn": "urn:li:assertion:b03abcc447aac70bbebb22a8a9d7dbbe", + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" } } }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2698,7 +2868,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2734,7 +2905,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2745,23 +2917,24 @@ "aspect": { "json": { "timestampMillis": 1655565135377, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" - }, "runId": "c7a6b778-0e0f-4789-b567-ca7e124a6840", - "assertionUrn": "urn:li:assertion:c1eebc71f36690e4523adca30314e927", "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,calm-pagoda-323403.jaffle_shop.stg_payments,PROD)", "status": "COMPLETE", "result": { "type": "SUCCESS", "nativeResults": {} + }, + "assertionUrn": "urn:li:assertion:c1eebc71f36690e4523adca30314e927", + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" } } }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2776,7 +2949,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2821,7 +2995,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2832,23 +3007,24 @@ "aspect": { "json": { "timestampMillis": 1655565135510, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" - }, "runId": "c7a6b778-0e0f-4789-b567-ca7e124a6840", - "assertionUrn": "urn:li:assertion:b210dbd31c2ee4efc0c24a9e4cf125ef", "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,calm-pagoda-323403.jaffle_shop.customers,PROD)", "status": "COMPLETE", "result": { "type": "SUCCESS", "nativeResults": {} + }, + "assertionUrn": "urn:li:assertion:b210dbd31c2ee4efc0c24a9e4cf125ef", + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" } } }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2893,7 +3069,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2904,23 +3081,24 @@ "aspect": { "json": { "timestampMillis": 1655565135510, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" - }, "runId": "c7a6b778-0e0f-4789-b567-ca7e124a6840", - "assertionUrn": "urn:li:assertion:b210dbd31c2ee4efc0c24a9e4cf125ef", "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,calm-pagoda-323403.jaffle_shop.orders,PROD)", "status": "COMPLETE", "result": { "type": "SUCCESS", "nativeResults": {} + }, + "assertionUrn": "urn:li:assertion:b210dbd31c2ee4efc0c24a9e4cf125ef", + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" } } }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2935,7 +3113,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2977,7 +3156,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2988,23 +3168,24 @@ "aspect": { "json": { "timestampMillis": 1655565135836, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" - }, "runId": "c7a6b778-0e0f-4789-b567-ca7e124a6840", - "assertionUrn": "urn:li:assertion:c51ca9c4b5a1f964bef748f0b8968e71", "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,calm-pagoda-323403.jaffle_shop.customers,PROD)", "status": "COMPLETE", "result": { "type": "SUCCESS", "nativeResults": {} + }, + "assertionUrn": "urn:li:assertion:c51ca9c4b5a1f964bef748f0b8968e71", + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" } } }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3019,7 +3200,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3061,7 +3243,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3072,23 +3255,24 @@ "aspect": { "json": { "timestampMillis": 1655565136269, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" - }, "runId": "c7a6b778-0e0f-4789-b567-ca7e124a6840", - "assertionUrn": "urn:li:assertion:caa9b8060e214cecab88a92dc39c2e60", "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,calm-pagoda-323403.jaffle_shop.orders,PROD)", "status": "COMPLETE", "result": { "type": "SUCCESS", "nativeResults": {} + }, + "assertionUrn": "urn:li:assertion:caa9b8060e214cecab88a92dc39c2e60", + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" } } }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3103,7 +3287,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3145,7 +3330,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3156,23 +3342,24 @@ "aspect": { "json": { "timestampMillis": 1655565136230, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" - }, "runId": "c7a6b778-0e0f-4789-b567-ca7e124a6840", - "assertionUrn": "urn:li:assertion:54bac90e6785bdefd8685ebf8814c429", "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,calm-pagoda-323403.jaffle_shop.stg_customers,PROD)", "status": "COMPLETE", "result": { "type": "SUCCESS", "nativeResults": {} + }, + "assertionUrn": "urn:li:assertion:54bac90e6785bdefd8685ebf8814c429", + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" } } }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3187,7 +3374,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3229,7 +3417,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3240,23 +3429,24 @@ "aspect": { "json": { "timestampMillis": 1655565136395, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" - }, "runId": "c7a6b778-0e0f-4789-b567-ca7e124a6840", - "assertionUrn": "urn:li:assertion:815963e1332b46a203504ba46ebfab24", "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,calm-pagoda-323403.jaffle_shop.stg_orders,PROD)", "status": "COMPLETE", "result": { "type": "SUCCESS", "nativeResults": {} + }, + "assertionUrn": "urn:li:assertion:815963e1332b46a203504ba46ebfab24", + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" } } }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3271,7 +3461,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3313,7 +3504,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3324,23 +3516,24 @@ "aspect": { "json": { "timestampMillis": 1655565136719, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" - }, "runId": "c7a6b778-0e0f-4789-b567-ca7e124a6840", - "assertionUrn": "urn:li:assertion:fac27f352406b941125292413afa8096", "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,calm-pagoda-323403.jaffle_shop.stg_payments,PROD)", "status": "COMPLETE", "result": { "type": "SUCCESS", "nativeResults": {} + }, + "assertionUrn": "urn:li:assertion:fac27f352406b941125292413afa8096", + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" } } }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3355,7 +3548,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3370,7 +3564,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3385,7 +3580,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3400,7 +3596,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3415,7 +3612,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3430,7 +3628,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3445,7 +3644,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3460,7 +3660,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3475,7 +3676,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3490,7 +3692,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3505,7 +3708,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3520,7 +3724,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3535,7 +3740,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3550,7 +3756,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3565,7 +3772,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3580,7 +3788,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3595,7 +3804,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3610,7 +3820,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3625,7 +3836,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3640,7 +3852,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3655,7 +3868,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3670,7 +3884,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3685,7 +3900,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3700,7 +3916,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-2022_02_03-07_00_00" + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_with_complex_owner_patterns_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_with_complex_owner_patterns_mces_golden.json index 19bfb60e62a08d..f08c5c30bdb165 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_with_complex_owner_patterns_mces_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_with_complex_owner_patterns_mces_golden.json @@ -14,7 +14,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-complex-owner-patterns" + "runId": "dbt-test-with-complex-owner-patterns", + "lastRunId": "no-run-id-provided" } }, { @@ -140,7 +141,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-complex-owner-patterns" + "runId": "dbt-test-with-complex-owner-patterns", + "lastRunId": "no-run-id-provided" } }, { @@ -159,7 +161,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-complex-owner-patterns" + "runId": "dbt-test-with-complex-owner-patterns", + "lastRunId": "no-run-id-provided" } }, { @@ -302,7 +305,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-complex-owner-patterns" + "runId": "dbt-test-with-complex-owner-patterns", + "lastRunId": "no-run-id-provided" } }, { @@ -320,7 +324,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-complex-owner-patterns" + "runId": "dbt-test-with-complex-owner-patterns", + "lastRunId": "no-run-id-provided" } }, { @@ -522,7 +527,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-complex-owner-patterns" + "runId": "dbt-test-with-complex-owner-patterns", + "lastRunId": "no-run-id-provided" } }, { @@ -541,7 +547,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-complex-owner-patterns" + "runId": "dbt-test-with-complex-owner-patterns", + "lastRunId": "no-run-id-provided" } }, { @@ -659,7 +666,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-complex-owner-patterns" + "runId": "dbt-test-with-complex-owner-patterns", + "lastRunId": "no-run-id-provided" } }, { @@ -677,7 +685,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-complex-owner-patterns" + "runId": "dbt-test-with-complex-owner-patterns", + "lastRunId": "no-run-id-provided" } }, { @@ -826,7 +835,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-complex-owner-patterns" + "runId": "dbt-test-with-complex-owner-patterns", + "lastRunId": "no-run-id-provided" } }, { @@ -844,7 +854,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-complex-owner-patterns" + "runId": "dbt-test-with-complex-owner-patterns", + "lastRunId": "no-run-id-provided" } }, { @@ -1014,7 +1025,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-complex-owner-patterns" + "runId": "dbt-test-with-complex-owner-patterns", + "lastRunId": "no-run-id-provided" } }, { @@ -1032,7 +1044,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-complex-owner-patterns" + "runId": "dbt-test-with-complex-owner-patterns", + "lastRunId": "no-run-id-provided" } }, { @@ -1142,7 +1155,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-complex-owner-patterns" + "runId": "dbt-test-with-complex-owner-patterns", + "lastRunId": "no-run-id-provided" } }, { @@ -1160,7 +1174,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-complex-owner-patterns" + "runId": "dbt-test-with-complex-owner-patterns", + "lastRunId": "no-run-id-provided" } }, { @@ -1282,7 +1297,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-complex-owner-patterns" + "runId": "dbt-test-with-complex-owner-patterns", + "lastRunId": "no-run-id-provided" } }, { @@ -1300,7 +1316,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-complex-owner-patterns" + "runId": "dbt-test-with-complex-owner-patterns", + "lastRunId": "no-run-id-provided" } }, { @@ -1427,7 +1444,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-complex-owner-patterns" + "runId": "dbt-test-with-complex-owner-patterns", + "lastRunId": "no-run-id-provided" } }, { @@ -1445,7 +1463,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-complex-owner-patterns" + "runId": "dbt-test-with-complex-owner-patterns", + "lastRunId": "no-run-id-provided" } }, { @@ -1639,7 +1658,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-complex-owner-patterns" + "runId": "dbt-test-with-complex-owner-patterns", + "lastRunId": "no-run-id-provided" } }, { @@ -1657,7 +1677,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-complex-owner-patterns" + "runId": "dbt-test-with-complex-owner-patterns", + "lastRunId": "no-run-id-provided" } }, { @@ -1803,7 +1824,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-complex-owner-patterns" + "runId": "dbt-test-with-complex-owner-patterns", + "lastRunId": "no-run-id-provided" } }, { @@ -1821,7 +1843,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-complex-owner-patterns" + "runId": "dbt-test-with-complex-owner-patterns", + "lastRunId": "no-run-id-provided" } }, { @@ -1985,7 +2008,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-complex-owner-patterns" + "runId": "dbt-test-with-complex-owner-patterns", + "lastRunId": "no-run-id-provided" } }, { @@ -2003,7 +2027,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-complex-owner-patterns" + "runId": "dbt-test-with-complex-owner-patterns", + "lastRunId": "no-run-id-provided" } }, { @@ -2149,7 +2174,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-complex-owner-patterns" + "runId": "dbt-test-with-complex-owner-patterns", + "lastRunId": "no-run-id-provided" } }, { @@ -2167,7 +2193,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-complex-owner-patterns" + "runId": "dbt-test-with-complex-owner-patterns", + "lastRunId": "no-run-id-provided" } }, { @@ -2313,7 +2340,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-complex-owner-patterns" + "runId": "dbt-test-with-complex-owner-patterns", + "lastRunId": "no-run-id-provided" } }, { @@ -2331,7 +2359,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-complex-owner-patterns" + "runId": "dbt-test-with-complex-owner-patterns", + "lastRunId": "no-run-id-provided" } }, { @@ -2477,7 +2506,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-complex-owner-patterns" + "runId": "dbt-test-with-complex-owner-patterns", + "lastRunId": "no-run-id-provided" } }, { @@ -2486,12 +2516,25 @@ "changeType": "PATCH", "aspectName": "upstreamLineage", "aspect": { - "value": "[{\"op\": \"add\", \"path\": \"/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Cpagila.dbt_postgres.an-aliased-view-for-monthly-billing%2CPROD%29\", \"value\": {\"auditStamp\": {\"time\": 1643871600000, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD)\", \"type\": \"TRANSFORMED\"}}]", - "contentType": "application/json-patch+json" + "json": [ + { + "op": "add", + "path": "/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Cpagila.dbt_postgres.an-aliased-view-for-monthly-billing%2CPROD%29", + "value": { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD)", + "type": "TRANSFORMED" + } + } + ] }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-complex-owner-patterns" + "runId": "dbt-test-with-complex-owner-patterns", + "lastRunId": "no-run-id-provided" } }, { @@ -2500,12 +2543,25 @@ "changeType": "PATCH", "aspectName": "upstreamLineage", "aspect": { - "value": "[{\"op\": \"add\", \"path\": \"/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Cpagila.dbt_postgres.an-aliased-view-for-payments%2CPROD%29\", \"value\": {\"auditStamp\": {\"time\": 1643871600000, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD)\", \"type\": \"TRANSFORMED\"}}]", - "contentType": "application/json-patch+json" + "json": [ + { + "op": "add", + "path": "/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Cpagila.dbt_postgres.an-aliased-view-for-payments%2CPROD%29", + "value": { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD)", + "type": "TRANSFORMED" + } + } + ] }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-complex-owner-patterns" + "runId": "dbt-test-with-complex-owner-patterns", + "lastRunId": "no-run-id-provided" } }, { @@ -2514,12 +2570,25 @@ "changeType": "PATCH", "aspectName": "upstreamLineage", "aspect": { - "value": "[{\"op\": \"add\", \"path\": \"/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Cpagila.dbt_postgres.payments_by_customer_by_month%2CPROD%29\", \"value\": {\"auditStamp\": {\"time\": 1643871600000, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.payments_by_customer_by_month,PROD)\", \"type\": \"TRANSFORMED\"}}]", - "contentType": "application/json-patch+json" + "json": [ + { + "op": "add", + "path": "/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Cpagila.dbt_postgres.payments_by_customer_by_month%2CPROD%29", + "value": { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.payments_by_customer_by_month,PROD)", + "type": "TRANSFORMED" + } + } + ] }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-complex-owner-patterns" + "runId": "dbt-test-with-complex-owner-patterns", + "lastRunId": "no-run-id-provided" } }, { @@ -2534,7 +2603,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-complex-owner-patterns" + "runId": "dbt-test-with-complex-owner-patterns", + "lastRunId": "no-run-id-provided" } }, { @@ -2549,7 +2619,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-complex-owner-patterns" + "runId": "dbt-test-with-complex-owner-patterns", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_with_data_platform_instance_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_with_data_platform_instance_mces_golden.json index 242c83003b1811..1f797c1adf394f 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_with_data_platform_instance_mces_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_with_data_platform_instance_mces_golden.json @@ -14,7 +14,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-data-platform-instance" + "runId": "dbt-test-with-data-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -141,7 +142,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-data-platform-instance" + "runId": "dbt-test-with-data-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -160,7 +162,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-data-platform-instance" + "runId": "dbt-test-with-data-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -303,7 +306,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-data-platform-instance" + "runId": "dbt-test-with-data-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -321,7 +325,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-data-platform-instance" + "runId": "dbt-test-with-data-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -523,7 +528,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-data-platform-instance" + "runId": "dbt-test-with-data-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -542,7 +548,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-data-platform-instance" + "runId": "dbt-test-with-data-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -660,7 +667,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-data-platform-instance" + "runId": "dbt-test-with-data-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -678,7 +686,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-data-platform-instance" + "runId": "dbt-test-with-data-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -827,7 +836,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-data-platform-instance" + "runId": "dbt-test-with-data-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -845,7 +855,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-data-platform-instance" + "runId": "dbt-test-with-data-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -1015,7 +1026,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-data-platform-instance" + "runId": "dbt-test-with-data-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -1033,7 +1045,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-data-platform-instance" + "runId": "dbt-test-with-data-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -1143,7 +1156,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-data-platform-instance" + "runId": "dbt-test-with-data-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -1161,7 +1175,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-data-platform-instance" + "runId": "dbt-test-with-data-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -1283,7 +1298,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-data-platform-instance" + "runId": "dbt-test-with-data-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -1301,7 +1317,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-data-platform-instance" + "runId": "dbt-test-with-data-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -1428,7 +1445,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-data-platform-instance" + "runId": "dbt-test-with-data-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -1446,7 +1464,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-data-platform-instance" + "runId": "dbt-test-with-data-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -1640,7 +1659,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-data-platform-instance" + "runId": "dbt-test-with-data-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -1658,7 +1678,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-data-platform-instance" + "runId": "dbt-test-with-data-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -1804,7 +1825,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-data-platform-instance" + "runId": "dbt-test-with-data-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -1822,7 +1844,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-data-platform-instance" + "runId": "dbt-test-with-data-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -1986,7 +2009,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-data-platform-instance" + "runId": "dbt-test-with-data-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -2004,7 +2028,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-data-platform-instance" + "runId": "dbt-test-with-data-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -2150,7 +2175,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-data-platform-instance" + "runId": "dbt-test-with-data-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -2168,7 +2194,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-data-platform-instance" + "runId": "dbt-test-with-data-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -2314,7 +2341,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-data-platform-instance" + "runId": "dbt-test-with-data-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -2332,7 +2360,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-data-platform-instance" + "runId": "dbt-test-with-data-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -2478,7 +2507,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-data-platform-instance" + "runId": "dbt-test-with-data-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -2496,7 +2526,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-data-platform-instance" + "runId": "dbt-test-with-data-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -2642,7 +2673,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-data-platform-instance" + "runId": "dbt-test-with-data-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -2651,12 +2683,25 @@ "changeType": "PATCH", "aspectName": "upstreamLineage", "aspect": { - "value": "[{\"op\": \"add\", \"path\": \"/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Cdbt-instance-1.pagila.dbt_postgres.an-aliased-view-for-monthly-billing%2CPROD%29\", \"value\": {\"auditStamp\": {\"time\": 1643871600000, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD)\", \"type\": \"TRANSFORMED\"}}]", - "contentType": "application/json-patch+json" + "json": [ + { + "op": "add", + "path": "/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Cdbt-instance-1.pagila.dbt_postgres.an-aliased-view-for-monthly-billing%2CPROD%29", + "value": { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD)", + "type": "TRANSFORMED" + } + } + ] }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-data-platform-instance" + "runId": "dbt-test-with-data-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -2665,12 +2710,25 @@ "changeType": "PATCH", "aspectName": "upstreamLineage", "aspect": { - "value": "[{\"op\": \"add\", \"path\": \"/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Cdbt-instance-1.pagila.dbt_postgres.an-aliased-view-for-payments%2CPROD%29\", \"value\": {\"auditStamp\": {\"time\": 1643871600000, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.an-aliased-view-for-payments,PROD)\", \"type\": \"TRANSFORMED\"}}]", - "contentType": "application/json-patch+json" + "json": [ + { + "op": "add", + "path": "/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Cdbt-instance-1.pagila.dbt_postgres.an-aliased-view-for-payments%2CPROD%29", + "value": { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.an-aliased-view-for-payments,PROD)", + "type": "TRANSFORMED" + } + } + ] }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-data-platform-instance" + "runId": "dbt-test-with-data-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -2679,12 +2737,25 @@ "changeType": "PATCH", "aspectName": "upstreamLineage", "aspect": { - "value": "[{\"op\": \"add\", \"path\": \"/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Cdbt-instance-1.pagila.dbt_postgres.payments_by_customer_by_month%2CPROD%29\", \"value\": {\"auditStamp\": {\"time\": 1643871600000, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.payments_by_customer_by_month,PROD)\", \"type\": \"TRANSFORMED\"}}]", - "contentType": "application/json-patch+json" + "json": [ + { + "op": "add", + "path": "/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Cdbt-instance-1.pagila.dbt_postgres.payments_by_customer_by_month%2CPROD%29", + "value": { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.payments_by_customer_by_month,PROD)", + "type": "TRANSFORMED" + } + } + ] }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-data-platform-instance" + "runId": "dbt-test-with-data-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -2699,7 +2770,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-data-platform-instance" + "runId": "dbt-test-with-data-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -2714,7 +2786,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-data-platform-instance" + "runId": "dbt-test-with-data-platform-instance", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_with_non_incremental_lineage_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_with_non_incremental_lineage_mces_golden.json index d98b63b9da62fe..bcf11f16d042ad 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_with_non_incremental_lineage_mces_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_with_non_incremental_lineage_mces_golden.json @@ -14,7 +14,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-non-incremental-lineage" + "runId": "dbt-test-with-non-incremental-lineage", + "lastRunId": "no-run-id-provided" } }, { @@ -141,7 +142,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-non-incremental-lineage" + "runId": "dbt-test-with-non-incremental-lineage", + "lastRunId": "no-run-id-provided" } }, { @@ -160,7 +162,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-non-incremental-lineage" + "runId": "dbt-test-with-non-incremental-lineage", + "lastRunId": "no-run-id-provided" } }, { @@ -303,7 +306,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-non-incremental-lineage" + "runId": "dbt-test-with-non-incremental-lineage", + "lastRunId": "no-run-id-provided" } }, { @@ -321,7 +325,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-non-incremental-lineage" + "runId": "dbt-test-with-non-incremental-lineage", + "lastRunId": "no-run-id-provided" } }, { @@ -523,7 +528,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-non-incremental-lineage" + "runId": "dbt-test-with-non-incremental-lineage", + "lastRunId": "no-run-id-provided" } }, { @@ -542,7 +548,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-non-incremental-lineage" + "runId": "dbt-test-with-non-incremental-lineage", + "lastRunId": "no-run-id-provided" } }, { @@ -660,7 +667,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-non-incremental-lineage" + "runId": "dbt-test-with-non-incremental-lineage", + "lastRunId": "no-run-id-provided" } }, { @@ -678,7 +686,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-non-incremental-lineage" + "runId": "dbt-test-with-non-incremental-lineage", + "lastRunId": "no-run-id-provided" } }, { @@ -827,7 +836,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-non-incremental-lineage" + "runId": "dbt-test-with-non-incremental-lineage", + "lastRunId": "no-run-id-provided" } }, { @@ -845,7 +855,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-non-incremental-lineage" + "runId": "dbt-test-with-non-incremental-lineage", + "lastRunId": "no-run-id-provided" } }, { @@ -1015,7 +1026,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-non-incremental-lineage" + "runId": "dbt-test-with-non-incremental-lineage", + "lastRunId": "no-run-id-provided" } }, { @@ -1033,7 +1045,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-non-incremental-lineage" + "runId": "dbt-test-with-non-incremental-lineage", + "lastRunId": "no-run-id-provided" } }, { @@ -1143,7 +1156,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-non-incremental-lineage" + "runId": "dbt-test-with-non-incremental-lineage", + "lastRunId": "no-run-id-provided" } }, { @@ -1161,7 +1175,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-non-incremental-lineage" + "runId": "dbt-test-with-non-incremental-lineage", + "lastRunId": "no-run-id-provided" } }, { @@ -1283,7 +1298,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-non-incremental-lineage" + "runId": "dbt-test-with-non-incremental-lineage", + "lastRunId": "no-run-id-provided" } }, { @@ -1301,7 +1317,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-non-incremental-lineage" + "runId": "dbt-test-with-non-incremental-lineage", + "lastRunId": "no-run-id-provided" } }, { @@ -1428,7 +1445,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-non-incremental-lineage" + "runId": "dbt-test-with-non-incremental-lineage", + "lastRunId": "no-run-id-provided" } }, { @@ -1446,7 +1464,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-non-incremental-lineage" + "runId": "dbt-test-with-non-incremental-lineage", + "lastRunId": "no-run-id-provided" } }, { @@ -1640,7 +1659,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-non-incremental-lineage" + "runId": "dbt-test-with-non-incremental-lineage", + "lastRunId": "no-run-id-provided" } }, { @@ -1658,7 +1678,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-non-incremental-lineage" + "runId": "dbt-test-with-non-incremental-lineage", + "lastRunId": "no-run-id-provided" } }, { @@ -1804,7 +1825,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-non-incremental-lineage" + "runId": "dbt-test-with-non-incremental-lineage", + "lastRunId": "no-run-id-provided" } }, { @@ -1822,7 +1844,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-non-incremental-lineage" + "runId": "dbt-test-with-non-incremental-lineage", + "lastRunId": "no-run-id-provided" } }, { @@ -1986,7 +2009,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-non-incremental-lineage" + "runId": "dbt-test-with-non-incremental-lineage", + "lastRunId": "no-run-id-provided" } }, { @@ -2004,7 +2028,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-non-incremental-lineage" + "runId": "dbt-test-with-non-incremental-lineage", + "lastRunId": "no-run-id-provided" } }, { @@ -2150,7 +2175,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-non-incremental-lineage" + "runId": "dbt-test-with-non-incremental-lineage", + "lastRunId": "no-run-id-provided" } }, { @@ -2168,7 +2194,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-non-incremental-lineage" + "runId": "dbt-test-with-non-incremental-lineage", + "lastRunId": "no-run-id-provided" } }, { @@ -2314,7 +2341,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-non-incremental-lineage" + "runId": "dbt-test-with-non-incremental-lineage", + "lastRunId": "no-run-id-provided" } }, { @@ -2332,7 +2360,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-non-incremental-lineage" + "runId": "dbt-test-with-non-incremental-lineage", + "lastRunId": "no-run-id-provided" } }, { @@ -2478,7 +2507,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-non-incremental-lineage" + "runId": "dbt-test-with-non-incremental-lineage", + "lastRunId": "no-run-id-provided" } }, { @@ -2496,7 +2526,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-non-incremental-lineage" + "runId": "dbt-test-with-non-incremental-lineage", + "lastRunId": "no-run-id-provided" } }, { @@ -2642,7 +2673,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-non-incremental-lineage" + "runId": "dbt-test-with-non-incremental-lineage", + "lastRunId": "no-run-id-provided" } }, { @@ -2669,7 +2701,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-non-incremental-lineage" + "runId": "dbt-test-with-non-incremental-lineage", + "lastRunId": "no-run-id-provided" } }, { @@ -2696,7 +2729,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-non-incremental-lineage" + "runId": "dbt-test-with-non-incremental-lineage", + "lastRunId": "no-run-id-provided" } }, { @@ -2723,7 +2757,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-non-incremental-lineage" + "runId": "dbt-test-with-non-incremental-lineage", + "lastRunId": "no-run-id-provided" } }, { @@ -2738,7 +2773,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-non-incremental-lineage" + "runId": "dbt-test-with-non-incremental-lineage", + "lastRunId": "no-run-id-provided" } }, { @@ -2753,7 +2789,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-non-incremental-lineage" + "runId": "dbt-test-with-non-incremental-lineage", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_with_target_platform_instance_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_with_target_platform_instance_mces_golden.json index 7c024f93641b9b..6aa59168077ff9 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_with_target_platform_instance_mces_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_with_target_platform_instance_mces_golden.json @@ -14,7 +14,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-target-platform-instance" + "runId": "dbt-test-with-target-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -141,7 +142,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-target-platform-instance" + "runId": "dbt-test-with-target-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -160,7 +162,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-target-platform-instance" + "runId": "dbt-test-with-target-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -303,7 +306,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-target-platform-instance" + "runId": "dbt-test-with-target-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -321,7 +325,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-target-platform-instance" + "runId": "dbt-test-with-target-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -523,7 +528,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-target-platform-instance" + "runId": "dbt-test-with-target-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -542,7 +548,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-target-platform-instance" + "runId": "dbt-test-with-target-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -660,7 +667,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-target-platform-instance" + "runId": "dbt-test-with-target-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -678,7 +686,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-target-platform-instance" + "runId": "dbt-test-with-target-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -827,7 +836,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-target-platform-instance" + "runId": "dbt-test-with-target-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -845,7 +855,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-target-platform-instance" + "runId": "dbt-test-with-target-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -1015,7 +1026,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-target-platform-instance" + "runId": "dbt-test-with-target-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -1033,7 +1045,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-target-platform-instance" + "runId": "dbt-test-with-target-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -1143,7 +1156,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-target-platform-instance" + "runId": "dbt-test-with-target-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -1161,7 +1175,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-target-platform-instance" + "runId": "dbt-test-with-target-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -1283,7 +1298,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-target-platform-instance" + "runId": "dbt-test-with-target-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -1301,7 +1317,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-target-platform-instance" + "runId": "dbt-test-with-target-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -1428,7 +1445,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-target-platform-instance" + "runId": "dbt-test-with-target-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -1446,7 +1464,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-target-platform-instance" + "runId": "dbt-test-with-target-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -1640,7 +1659,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-target-platform-instance" + "runId": "dbt-test-with-target-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -1658,7 +1678,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-target-platform-instance" + "runId": "dbt-test-with-target-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -1804,7 +1825,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-target-platform-instance" + "runId": "dbt-test-with-target-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -1822,7 +1844,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-target-platform-instance" + "runId": "dbt-test-with-target-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -1986,7 +2009,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-target-platform-instance" + "runId": "dbt-test-with-target-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -2004,7 +2028,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-target-platform-instance" + "runId": "dbt-test-with-target-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -2150,7 +2175,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-target-platform-instance" + "runId": "dbt-test-with-target-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -2168,7 +2194,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-target-platform-instance" + "runId": "dbt-test-with-target-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -2314,7 +2341,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-target-platform-instance" + "runId": "dbt-test-with-target-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -2332,7 +2360,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-target-platform-instance" + "runId": "dbt-test-with-target-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -2478,7 +2507,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-target-platform-instance" + "runId": "dbt-test-with-target-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -2496,7 +2526,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-target-platform-instance" + "runId": "dbt-test-with-target-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -2642,7 +2673,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-target-platform-instance" + "runId": "dbt-test-with-target-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -2651,12 +2683,25 @@ "changeType": "PATCH", "aspectName": "upstreamLineage", "aspect": { - "value": "[{\"op\": \"add\", \"path\": \"/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Cpagila.dbt_postgres.an-aliased-view-for-monthly-billing%2CPROD%29\", \"value\": {\"auditStamp\": {\"time\": 1643871600000, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD)\", \"type\": \"TRANSFORMED\"}}]", - "contentType": "application/json-patch+json" + "json": [ + { + "op": "add", + "path": "/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Cpagila.dbt_postgres.an-aliased-view-for-monthly-billing%2CPROD%29", + "value": { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD)", + "type": "TRANSFORMED" + } + } + ] }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-target-platform-instance" + "runId": "dbt-test-with-target-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -2665,12 +2710,25 @@ "changeType": "PATCH", "aspectName": "upstreamLineage", "aspect": { - "value": "[{\"op\": \"add\", \"path\": \"/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Cpagila.dbt_postgres.an-aliased-view-for-payments%2CPROD%29\", \"value\": {\"auditStamp\": {\"time\": 1643871600000, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD)\", \"type\": \"TRANSFORMED\"}}]", - "contentType": "application/json-patch+json" + "json": [ + { + "op": "add", + "path": "/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Cpagila.dbt_postgres.an-aliased-view-for-payments%2CPROD%29", + "value": { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD)", + "type": "TRANSFORMED" + } + } + ] }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-target-platform-instance" + "runId": "dbt-test-with-target-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -2679,12 +2737,25 @@ "changeType": "PATCH", "aspectName": "upstreamLineage", "aspect": { - "value": "[{\"op\": \"add\", \"path\": \"/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Cpagila.dbt_postgres.payments_by_customer_by_month%2CPROD%29\", \"value\": {\"auditStamp\": {\"time\": 1643871600000, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.payments_by_customer_by_month,PROD)\", \"type\": \"TRANSFORMED\"}}]", - "contentType": "application/json-patch+json" + "json": [ + { + "op": "add", + "path": "/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adbt%2Cpagila.dbt_postgres.payments_by_customer_by_month%2CPROD%29", + "value": { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.payments_by_customer_by_month,PROD)", + "type": "TRANSFORMED" + } + } + ] }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-target-platform-instance" + "runId": "dbt-test-with-target-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -2699,7 +2770,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-target-platform-instance" + "runId": "dbt-test-with-target-platform-instance", + "lastRunId": "no-run-id-provided" } }, { @@ -2714,7 +2786,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "dbt-test-with-target-platform-instance" + "runId": "dbt-test-with-target-platform-instance", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file From 5377b1230353dabb51e0cfdbbae1a9f3793b4dc0 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 8 Nov 2023 12:53:52 -0800 Subject: [PATCH 18/21] update test goldens --- .../dbt_enabled_with_schemas_mces_golden.json | 87 +++++++++++++++- .../dbt_test_column_meta_mapping_golden.json | 99 ++++++++++++++++++- ...th_complex_owner_patterns_mces_golden.json | 87 +++++++++++++++- ...th_data_platform_instance_mces_golden.json | 87 +++++++++++++++- ...h_non_incremental_lineage_mces_golden.json | 87 +++++++++++++++- ..._target_platform_instance_mces_golden.json | 87 +++++++++++++++- 6 files changed, 528 insertions(+), 6 deletions(-) diff --git a/metadata-ingestion/tests/integration/dbt/dbt_enabled_with_schemas_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_enabled_with_schemas_mces_golden.json index 23ce6bc4505e56..e4f01ef7a6c537 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_enabled_with_schemas_mces_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_enabled_with_schemas_mces_golden.json @@ -132,7 +132,92 @@ "tableSchema": "" } }, - "fields": [] + "fields": [ + { + "fieldPath": "customer_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INT", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "full_name", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "email", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "TEXT", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "address", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "TEXT", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "city", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "TEXT", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "postal_code", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "TEXT", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "phone", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "TEXT", + "recursive": false, + "isPartOfKey": false + } + ] } }, { diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_column_meta_mapping_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_column_meta_mapping_golden.json index 50a4afb551250b..4d5b008b695f97 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_column_meta_mapping_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_column_meta_mapping_golden.json @@ -66,7 +66,104 @@ "tableSchema": "" } }, - "fields": [] + "fields": [ + { + "fieldPath": "customer_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INT", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "full_name", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "initial_full_name", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NullType": {} + } + }, + "nativeDataType": "", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "email", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "TEXT", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "address", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "TEXT", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "city", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "TEXT", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "postal_code", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "TEXT", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "phone", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "TEXT", + "recursive": false, + "isPartOfKey": false + } + ] } }, { diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_with_complex_owner_patterns_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_with_complex_owner_patterns_mces_golden.json index f08c5c30bdb165..0bdd5e3c895c27 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_with_complex_owner_patterns_mces_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_with_complex_owner_patterns_mces_golden.json @@ -96,7 +96,92 @@ "tableSchema": "" } }, - "fields": [] + "fields": [ + { + "fieldPath": "customer_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INT", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "full_name", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "email", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "TEXT", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "address", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "TEXT", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "city", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "TEXT", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "postal_code", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "TEXT", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "phone", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "TEXT", + "recursive": false, + "isPartOfKey": false + } + ] } }, { diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_with_data_platform_instance_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_with_data_platform_instance_mces_golden.json index 1f797c1adf394f..5ab0b11e377716 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_with_data_platform_instance_mces_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_with_data_platform_instance_mces_golden.json @@ -97,7 +97,92 @@ "tableSchema": "" } }, - "fields": [] + "fields": [ + { + "fieldPath": "customer_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INT", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "full_name", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "email", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "TEXT", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "address", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "TEXT", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "city", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "TEXT", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "postal_code", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "TEXT", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "phone", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "TEXT", + "recursive": false, + "isPartOfKey": false + } + ] } }, { diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_with_non_incremental_lineage_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_with_non_incremental_lineage_mces_golden.json index bcf11f16d042ad..3725e590fee9e4 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_with_non_incremental_lineage_mces_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_with_non_incremental_lineage_mces_golden.json @@ -97,7 +97,92 @@ "tableSchema": "" } }, - "fields": [] + "fields": [ + { + "fieldPath": "customer_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INT", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "full_name", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "email", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "TEXT", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "address", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "TEXT", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "city", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "TEXT", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "postal_code", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "TEXT", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "phone", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "TEXT", + "recursive": false, + "isPartOfKey": false + } + ] } }, { diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_with_target_platform_instance_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_with_target_platform_instance_mces_golden.json index 6aa59168077ff9..a47abab6b40f7a 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_with_target_platform_instance_mces_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_with_target_platform_instance_mces_golden.json @@ -97,7 +97,92 @@ "tableSchema": "" } }, - "fields": [] + "fields": [ + { + "fieldPath": "customer_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INT", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "full_name", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "email", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "TEXT", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "address", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "TEXT", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "city", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "TEXT", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "postal_code", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "TEXT", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "phone", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "TEXT", + "recursive": false, + "isPartOfKey": false + } + ] } }, { From a5c5e4ad8d74bf9b63b738d8d8ad053a0aea0615 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Mon, 13 Nov 2023 16:37:13 -0500 Subject: [PATCH 19/21] tweak incremental lineage impl --- .../api/incremental_lineage_helper.py | 16 ++-- .../ingestion/source/dbt/dbt_common.py | 91 +++++++++++-------- 2 files changed, 61 insertions(+), 46 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/api/incremental_lineage_helper.py b/metadata-ingestion/src/datahub/ingestion/api/incremental_lineage_helper.py index 945b201ca5758c..479486ce228998 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/incremental_lineage_helper.py +++ b/metadata-ingestion/src/datahub/ingestion/api/incremental_lineage_helper.py @@ -15,7 +15,7 @@ from datahub.specific.dataset import DatasetPatchBuilder -def _convert_upstream_lineage_to_patch( +def convert_upstream_lineage_to_patch( urn: str, aspect: UpstreamLineageClass, system_metadata: Optional[SystemMetadataClass], @@ -86,16 +86,11 @@ def _merge_upstream_lineage( def _lineage_wu_via_read_modify_write( - graph: Optional[DataHubGraph], + graph: DataHubGraph, urn: str, aspect: UpstreamLineageClass, system_metadata: Optional[SystemMetadataClass], ) -> MetadataWorkUnit: - if graph is None: - raise ValueError( - "Failed to handle incremental lineage, DataHubGraph is missing. " - "Use `datahub-rest` sink OR provide `datahub-api` config in recipe. " - ) gms_aspect = graph.get_aspect(urn, UpstreamLineageClass) if gms_aspect: new_aspect = _merge_upstream_lineage(aspect, gms_aspect) @@ -131,11 +126,16 @@ def auto_incremental_lineage( yield wu if lineage_aspect.fineGrainedLineages: + if graph is None: + raise ValueError( + "Failed to handle incremental lineage, DataHubGraph is missing. " + "Use `datahub-rest` sink OR provide `datahub-api` config in recipe. " + ) yield _lineage_wu_via_read_modify_write( graph, urn, lineage_aspect, wu.metadata.systemMetadata ) elif lineage_aspect.upstreams: - yield _convert_upstream_lineage_to_patch( + yield convert_upstream_lineage_to_patch( urn, lineage_aspect, wu.metadata.systemMetadata ) else: diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index 63d48be1ef6637..94b10657142831 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -31,6 +31,9 @@ platform_name, support_status, ) +from datahub.ingestion.api.incremental_lineage_helper import ( + convert_upstream_lineage_to_patch, +) from datahub.ingestion.api.source import MetadataWorkUnitProcessor from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source.common.subtypes import DatasetSubTypes @@ -104,7 +107,6 @@ UpstreamLineageClass, ViewPropertiesClass, ) -from datahub.specific.dataset import DatasetPatchBuilder from datahub.utilities.mapping import Constants, OperationProcessor from datahub.utilities.sqlglot_lineage import ( SchemaInfo, @@ -305,7 +307,7 @@ class DBTCommonConfig( # override default value to True. incremental_lineage: bool = Field( default=True, - description="When enabled, emits lineage as incremental to existing lineage already in DataHub. When disabled, re-states lineage on each run.", + description="When enabled, emits incremental/patch lineage for non-dbt entities. When disabled, re-states lineage on each run.", ) @validator("target_platform") @@ -370,6 +372,13 @@ def validate_include_column_lineage( raise ValueError( "`infer_dbt_schemas` must be enabled to use `include_column_lineage`" ) + + if include_column_lineage and values.get("incremental_lineage"): + # TODO allow this if the graph instance is also available + raise ValueError( + "cannot enable both `include_column_lineage` and `incremental_lineage`" + ) + return include_column_lineage @@ -440,8 +449,9 @@ class DBTNode: @staticmethod def _join_parts(parts: List[Optional[str]]) -> str: - assert parts - return ".".join([part for part in parts if part]) + joined = ".".join([part for part in parts if part]) + assert joined + return joined def get_db_fqn(self) -> str: # Database might be None, but schema and name should always be present. @@ -519,11 +529,9 @@ def get_urn_for_upstream_lineage( def exists_in_target_platform(self): return not (self.is_ephemeral_model() or self.node_type == "test") - def merge_schema_fields(self, schema_fields: List[SchemaField]) -> None: + def columns_setdefault(self, schema_fields: List[SchemaField]) -> None: """ - Merges the schema fields into the DBTNode. - - This will prefer the dbt columns info over the schema metadata info. + Update the column list if they are not already set. """ if self.columns: @@ -855,6 +863,19 @@ def _to_schema_info(schema_fields: List[SchemaField]) -> SchemaInfo: return {column.fieldPath: column.nativeDataType for column in schema_fields} def _infer_schemas_and_update_cll(self, all_nodes_map: Dict[str, DBTNode]) -> None: + """Annotate the DBTNode objects with schema information and column-level lineage. + + Note that this mutates the DBTNode objects directly. + + This method does the following: + 1. Iterate over the dbt nodes in topological order. + 2. For each node, either load the schema from the graph or from the dbt catalog info. + We also add this schema to the schema resolver. + 3. Run sql parser to infer the schema + generate column lineage. + 4. Write the schema and column lineage back to the DBTNode object. + 5. If we haven't already added the node's schema to the schema resolver, do that. + """ + if not self.config.infer_dbt_schemas: if self.config.include_column_lineage: raise ConfigurationError( @@ -877,10 +898,9 @@ def _infer_schemas_and_update_cll(self, all_nodes_map: Dict[str, DBTNode]) -> No for dbt_name in topological_sort( list(all_nodes_map.keys()), edges=list( - itertools.chain.from_iterable( - [(upstream, node.dbt_name) for upstream in node.upstream_nodes] - for node in all_nodes_map.values() - ) + (upstream, node.dbt_name) + for node in all_nodes_map.values() + for upstream in node.upstream_nodes ), ): node = all_nodes_map[dbt_name] @@ -915,12 +935,7 @@ def _infer_schemas_and_update_cll(self, all_nodes_map: Dict[str, DBTNode]) -> No schema_fields: Optional[List[SchemaField]] = None # Fetch the schema from the graph. - if ( - not schema_fields - and target_node_urn - and should_fetch_target_node_schema - and graph - ): + if target_node_urn and should_fetch_target_node_schema and graph: schema_metadata = graph.get_aspect(target_node_urn, SchemaMetadata) if schema_metadata: schema_fields = schema_metadata.fields @@ -999,13 +1014,13 @@ def _infer_schemas_and_update_cll(self, all_nodes_map: Dict[str, DBTNode]) -> No for upstream_column in column_lineage_info.upstreams # Only include the CLL if the table in in the upstream list. if target_platform_urn_to_dbt_name.get(upstream_column.table) - and target_platform_urn_to_dbt_name[upstream_column.table] in node.upstream_nodes ] # If we didn't fetch the schema from the graph, use the inferred schema. - if not schema_fields and sql_result and sql_result.column_lineage: - schema_fields = [ + inferred_schema_fields = None + if sql_result and sql_result.column_lineage: + inferred_schema_fields = [ SchemaField( fieldPath=column_lineage.downstream.column, type=column_lineage.downstream.column_type @@ -1017,14 +1032,18 @@ def _infer_schemas_and_update_cll(self, all_nodes_map: Dict[str, DBTNode]) -> No ] # Conditionally add the inferred schema to the schema resolver. - if not added_to_schema_resolver and target_node_urn and schema_fields: + if ( + not added_to_schema_resolver + and target_node_urn + and inferred_schema_fields + ): schema_resolver.add_raw_schema_info( - target_node_urn, self._to_schema_info(schema_fields) + target_node_urn, self._to_schema_info(inferred_schema_fields) ) - # Merge the schema fields into the dbt node. - if schema_fields: - node.merge_schema_fields(schema_fields) + # Save the inferred schema fields into the dbt node. + if inferred_schema_fields: + node.columns_setdefault(inferred_schema_fields) def create_platform_mces( self, @@ -1118,19 +1137,15 @@ def create_platform_mces( self.config.platform_instance, ) upstreams_lineage_class = get_upstream_lineage([upstream_dbt_urn]) - if self.config.incremental_lineage: - patch_builder: DatasetPatchBuilder = DatasetPatchBuilder( - urn=node_datahub_urn + if not is_primary_source and self.config.incremental_lineage: + # We only generate incremental lineage for non-dbt nodes. + wu = convert_upstream_lineage_to_patch( + urn=node_datahub_urn, + aspect=upstreams_lineage_class, + system_metadata=None, ) - for upstream in upstreams_lineage_class.upstreams: - patch_builder.add_upstream_lineage(upstream) - - for mcp in patch_builder.build(): - yield MetadataWorkUnit( - id=f"upstreamLineage-for-{node_datahub_urn}", - mcp_raw=mcp, - is_primary_source=is_primary_source, - ) + wu.is_primary_source = is_primary_source + yield wu else: aspects.append(upstreams_lineage_class) From 184bc2f3a41a3443590e1c9c070a7c06a8c18dcf Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Mon, 13 Nov 2023 16:37:24 -0500 Subject: [PATCH 20/21] refactor --- .../src/datahub/ingestion/source/dbt/dbt_common.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index 94b10657142831..baf6423f2be403 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -1100,7 +1100,7 @@ def create_platform_mces( ) # mutates meta_aspects if mce_platform == DBT_PLATFORM: - aspects = self._generate_base_aspects( + aspects = self._generate_base_dbt_aspects( node, additional_custom_props_filtered, mce_platform, meta_aspects ) @@ -1252,7 +1252,7 @@ def _create_view_properties_aspect( ) return view_properties - def _generate_base_aspects( + def _generate_base_dbt_aspects( self, node: DBTNode, additional_custom_props_filtered: Dict[str, str], @@ -1260,8 +1260,7 @@ def _generate_base_aspects( meta_aspects: Dict[str, Any], ) -> List[Any]: """ - There are some common aspects that get generated for both dbt node and platform node depending on whether dbt - node creation is enabled or not. + Some common aspects that get generated for dbt nodes. """ # create an empty list of aspects and keep adding to it. Initializing with Any to avoid a From 18cb777da4390ca04a81146b59833bc73462ec3d Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Mon, 13 Nov 2023 16:47:41 -0500 Subject: [PATCH 21/21] fix --- .../src/datahub/ingestion/source/dbt/dbt_common.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index baf6423f2be403..94df0a4f8a166e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -373,12 +373,6 @@ def validate_include_column_lineage( "`infer_dbt_schemas` must be enabled to use `include_column_lineage`" ) - if include_column_lineage and values.get("incremental_lineage"): - # TODO allow this if the graph instance is also available - raise ValueError( - "cannot enable both `include_column_lineage` and `incremental_lineage`" - ) - return include_column_lineage