From c3f9a9206d505d49e70b1795476c4928c87f2a42 Mon Sep 17 00:00:00 2001 From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com> Date: Fri, 22 Nov 2024 20:32:24 +0530 Subject: [PATCH] feat(ingest/mssql): include stored procedure lineage (#11912) Co-authored-by: Harshal Sheth --- .../ingestion/source/sql/mssql/job_models.py | 1 + .../ingestion/source/sql/mssql/source.py | 151 +- .../sql/mssql/stored_procedure_lineage.py | 84 + .../ingestion/source/sql/sql_common.py | 12 +- .../src/datahub/sql_parsing/datajob.py | 50 + .../src/datahub/sql_parsing/query_types.py | 11 +- .../datahub/sql_parsing/split_statements.py | 163 + .../sql_parsing/sql_parsing_aggregator.py | 1 - .../integration/sql_server/docker-compose.yml | 2 +- .../golden_mces_mssql_no_db_to_file.json | 396 ++- .../golden_mces_mssql_no_db_with_filter.json | 178 +- .../golden_mces_mssql_to_file.json | 178 +- ...golden_mces_mssql_with_lower_case_urn.json | 2649 ++++++++++++++++- .../procedures/DemoData.Foo.NewProc.json | 57 + .../procedures/demodata.foo.proc2.json | 57 + .../procedures/DemoData.Foo.NewProc.sql | 37 + .../procedures/demodata.foo.proc2.sql | 36 + .../integration/sql_server/setup/setup.sql | 53 +- .../mssql_with_lower_case_urn.yml | 1 - .../integration/sql_server/test_sql_server.py | 54 + .../unit/sql_parsing/test_split_statements.py | 51 + 21 files changed, 4074 insertions(+), 148 deletions(-) create mode 100644 metadata-ingestion/src/datahub/ingestion/source/sql/mssql/stored_procedure_lineage.py create mode 100644 metadata-ingestion/src/datahub/sql_parsing/datajob.py create mode 100644 metadata-ingestion/src/datahub/sql_parsing/split_statements.py create mode 100644 metadata-ingestion/tests/integration/sql_server/golden_files/procedures/DemoData.Foo.NewProc.json create mode 100644 metadata-ingestion/tests/integration/sql_server/golden_files/procedures/demodata.foo.proc2.json create mode 100644 metadata-ingestion/tests/integration/sql_server/procedures/DemoData.Foo.NewProc.sql create mode 100644 metadata-ingestion/tests/integration/sql_server/procedures/demodata.foo.proc2.sql create mode 100644 metadata-ingestion/tests/unit/sql_parsing/test_split_statements.py diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/job_models.py b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/job_models.py index 21e7fad334331..5107a4e38f64d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/job_models.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/job_models.py @@ -101,6 +101,7 @@ class StoredProcedure: flow: Union[MSSQLJob, MSSQLProceduresContainer] type: str = "STORED_PROCEDURE" source: str = "mssql" + code: Optional[str] = None @property def full_type(self) -> str: diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py index c19b22a8622ca..9ab9c76c30417 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py @@ -24,6 +24,8 @@ platform_name, support_status, ) +from datahub.ingestion.api.source import StructuredLogLevel +from datahub.ingestion.api.source_helpers import auto_workunit from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source.sql.mssql.job_models import ( JobStep, @@ -36,6 +38,9 @@ ProcedureParameter, StoredProcedure, ) +from datahub.ingestion.source.sql.mssql.stored_procedure_lineage import ( + generate_procedure_lineage, +) from datahub.ingestion.source.sql.sql_common import ( SQLAlchemySource, SqlWorkUnit, @@ -51,6 +56,7 @@ StringTypeClass, UnionTypeClass, ) +from datahub.utilities.file_backed_collections import FileBackedList logger: logging.Logger = logging.getLogger(__name__) @@ -99,6 +105,10 @@ class SQLServerConfig(BasicSQLAlchemyConfig): default=False, description="Enable to convert the SQL Server assets urns to lowercase", ) + include_lineage: bool = Field( + default=True, + description="Enable lineage extraction for stored procedures", + ) @pydantic.validator("uri_args") def passwords_match(cls, v, values, **kwargs): @@ -161,6 +171,7 @@ def __init__(self, config: SQLServerConfig, ctx: PipelineContext): self.current_database = None self.table_descriptions: Dict[str, str] = {} self.column_descriptions: Dict[str, str] = {} + self.stored_procedures: FileBackedList[StoredProcedure] = FileBackedList() if self.config.include_descriptions: for inspector in self.get_inspectors(): db_name: str = self.get_db_name(inspector) @@ -374,7 +385,7 @@ def loop_jobs( def loop_job_steps( self, job: MSSQLJob, job_steps: Dict[str, Any] ) -> Iterable[MetadataWorkUnit]: - for step_id, step_data in job_steps.items(): + for _step_id, step_data in job_steps.items(): step = JobStep( job_name=job.formatted_name, step_name=step_data["step_name"], @@ -412,37 +423,44 @@ def loop_stored_procedures( # noqa: C901 if procedures: yield from self.construct_flow_workunits(data_flow=data_flow) for procedure in procedures: - upstream = self._get_procedure_upstream(conn, procedure) - downstream = self._get_procedure_downstream(conn, procedure) - data_job = MSSQLDataJob( - entity=procedure, - ) - # TODO: because of this upstream and downstream are more dependencies, - # can't be used as DataJobInputOutput. - # Should be reorganized into lineage. - data_job.add_property("procedure_depends_on", str(upstream.as_property)) - data_job.add_property( - "depending_on_procedure", str(downstream.as_property) - ) - procedure_definition, procedure_code = self._get_procedure_code( - conn, procedure - ) - if procedure_definition: - data_job.add_property("definition", procedure_definition) - if sql_config.include_stored_procedures_code and procedure_code: - data_job.add_property("code", procedure_code) - procedure_inputs = self._get_procedure_inputs(conn, procedure) - properties = self._get_procedure_properties(conn, procedure) - data_job.add_property( - "input parameters", str([param.name for param in procedure_inputs]) - ) - for param in procedure_inputs: - data_job.add_property( - f"parameter {param.name}", str(param.properties) - ) - for property_name, property_value in properties.items(): - data_job.add_property(property_name, str(property_value)) - yield from self.construct_job_workunits(data_job) + yield from self._process_stored_procedure(conn, procedure) + + def _process_stored_procedure( + self, conn: Connection, procedure: StoredProcedure + ) -> Iterable[MetadataWorkUnit]: + upstream = self._get_procedure_upstream(conn, procedure) + downstream = self._get_procedure_downstream(conn, procedure) + data_job = MSSQLDataJob( + entity=procedure, + ) + # TODO: because of this upstream and downstream are more dependencies, + # can't be used as DataJobInputOutput. + # Should be reorganized into lineage. + data_job.add_property("procedure_depends_on", str(upstream.as_property)) + data_job.add_property("depending_on_procedure", str(downstream.as_property)) + procedure_definition, procedure_code = self._get_procedure_code(conn, procedure) + procedure.code = procedure_code + if procedure_definition: + data_job.add_property("definition", procedure_definition) + if procedure_code and self.config.include_stored_procedures_code: + data_job.add_property("code", procedure_code) + procedure_inputs = self._get_procedure_inputs(conn, procedure) + properties = self._get_procedure_properties(conn, procedure) + data_job.add_property( + "input parameters", str([param.name for param in procedure_inputs]) + ) + for param in procedure_inputs: + data_job.add_property(f"parameter {param.name}", str(param.properties)) + for property_name, property_value in properties.items(): + data_job.add_property(property_name, str(property_value)) + if self.config.include_lineage: + # These will be used to construct lineage + self.stored_procedures.append(procedure) + yield from self.construct_job_workunits( + data_job, + # For stored procedure lineage is ingested later + include_lineage=False, + ) @staticmethod def _get_procedure_downstream( @@ -546,8 +564,8 @@ def _get_procedure_code( code_list.append(row["Text"]) if code_slice_text in re.sub(" +", " ", row["Text"].lower()).strip(): code_slice_index = index - definition = "\n".join(code_list[:code_slice_index]) - code = "\n".join(code_list[code_slice_index:]) + definition = "".join(code_list[:code_slice_index]) + code = "".join(code_list[code_slice_index:]) except ResourceClosedError: logger.warning( "Connection was closed from procedure '%s'", @@ -602,16 +620,18 @@ def _get_stored_procedures( def construct_job_workunits( self, data_job: MSSQLDataJob, + include_lineage: bool = True, ) -> Iterable[MetadataWorkUnit]: yield MetadataChangeProposalWrapper( entityUrn=data_job.urn, aspect=data_job.as_datajob_info_aspect, ).as_workunit() - yield MetadataChangeProposalWrapper( - entityUrn=data_job.urn, - aspect=data_job.as_datajob_input_output_aspect, - ).as_workunit() + if include_lineage: + yield MetadataChangeProposalWrapper( + entityUrn=data_job.urn, + aspect=data_job.as_datajob_input_output_aspect, + ).as_workunit() # TODO: Add SubType when it appear def construct_flow_workunits( @@ -664,3 +684,58 @@ def get_identifier( if self.config.convert_urns_to_lowercase else qualified_table_name ) + + def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: + yield from super().get_workunits_internal() + + # This is done at the end so that we will have access to tables + # from all databases in schema_resolver and discovered_tables + for procedure in self.stored_procedures: + with self.report.report_exc( + message="Failed to parse stored procedure lineage", + context=procedure.full_name, + level=StructuredLogLevel.WARN, + ): + yield from auto_workunit( + generate_procedure_lineage( + schema_resolver=self.schema_resolver, + procedure=procedure, + procedure_job_urn=MSSQLDataJob(entity=procedure).urn, + is_temp_table=self.is_temp_table, + ) + ) + + def is_temp_table(self, name: str) -> bool: + try: + parts = name.split(".") + table_name = parts[-1] + schema_name = parts[-2] + db_name = parts[-3] + + if table_name.startswith("#"): + return True + + # This is also a temp table if + # 1. this name would be allowed by the dataset patterns, and + # 2. we have a list of discovered tables, and + # 3. it's not in the discovered tables list + if ( + self.config.database_pattern.allowed(db_name) + and self.config.schema_pattern.allowed(schema_name) + and self.config.table_pattern.allowed(name) + and self.standardize_identifier_case(name) + not in self.discovered_datasets + ): + logger.debug(f"inferred as temp table {name}") + return True + + except Exception: + logger.warning(f"Error parsing table name {name} ") + return False + + def standardize_identifier_case(self, table_ref_str: str) -> str: + return ( + table_ref_str.lower() + if self.config.convert_urns_to_lowercase + else table_ref_str + ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/stored_procedure_lineage.py b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/stored_procedure_lineage.py new file mode 100644 index 0000000000000..b979a270a5528 --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/stored_procedure_lineage.py @@ -0,0 +1,84 @@ +import logging +from typing import Callable, Iterable, Optional + +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.ingestion.source.sql.mssql.job_models import StoredProcedure +from datahub.metadata.schema_classes import DataJobInputOutputClass +from datahub.sql_parsing.datajob import to_datajob_input_output +from datahub.sql_parsing.schema_resolver import SchemaResolver +from datahub.sql_parsing.split_statements import split_statements +from datahub.sql_parsing.sql_parsing_aggregator import ( + ObservedQuery, + SqlParsingAggregator, +) + +logger = logging.getLogger(__name__) + + +def parse_procedure_code( + *, + schema_resolver: SchemaResolver, + default_db: Optional[str], + default_schema: Optional[str], + code: str, + is_temp_table: Callable[[str], bool], + raise_: bool = False, +) -> Optional[DataJobInputOutputClass]: + aggregator = SqlParsingAggregator( + platform=schema_resolver.platform, + env=schema_resolver.env, + schema_resolver=schema_resolver, + generate_lineage=True, + generate_queries=False, + generate_usage_statistics=False, + generate_operations=False, + generate_query_subject_fields=False, + generate_query_usage_statistics=False, + is_temp_table=is_temp_table, + ) + for query in split_statements(code): + # TODO: We should take into account `USE x` statements. + aggregator.add_observed_query( + observed=ObservedQuery( + default_db=default_db, + default_schema=default_schema, + query=query, + ) + ) + if aggregator.report.num_observed_queries_failed and raise_: + logger.info(aggregator.report.as_string()) + raise ValueError( + f"Failed to parse {aggregator.report.num_observed_queries_failed} queries." + ) + + mcps = list(aggregator.gen_metadata()) + return to_datajob_input_output( + mcps=mcps, + ignore_extra_mcps=True, + ) + + +# Is procedure handling generic enough to be added to SqlParsingAggregator? +def generate_procedure_lineage( + *, + schema_resolver: SchemaResolver, + procedure: StoredProcedure, + procedure_job_urn: str, + is_temp_table: Callable[[str], bool] = lambda _: False, + raise_: bool = False, +) -> Iterable[MetadataChangeProposalWrapper]: + if procedure.code: + datajob_input_output = parse_procedure_code( + schema_resolver=schema_resolver, + default_db=procedure.db, + default_schema=procedure.schema, + code=procedure.code, + is_temp_table=is_temp_table, + raise_=raise_, + ) + + if datajob_input_output: + yield MetadataChangeProposalWrapper( + entityUrn=procedure_job_urn, + aspect=datajob_input_output, + ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py index e5779791ed412..ae6116326da33 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py @@ -392,6 +392,7 @@ def __init__(self, config: SQLCommonConfig, ctx: PipelineContext, platform: str) platform_instance=self.config.platform_instance, env=self.config.env, ) + self.discovered_datasets: Set[str] = set() self._view_definition_cache: MutableMapping[str, str] if self.config.use_file_backed_cache: self._view_definition_cache = FileBackedDict[str]() @@ -831,8 +832,9 @@ def _process_table( self._classify(dataset_name, schema, table, data_reader, schema_metadata) dataset_snapshot.aspects.append(schema_metadata) - if self.config.include_view_lineage: + if self._save_schema_to_resolver(): self.schema_resolver.add_schema_metadata(dataset_urn, schema_metadata) + self.discovered_datasets.add(dataset_name) db_name = self.get_db_name(inspector) yield from self.add_table_to_schema_container( @@ -1126,8 +1128,9 @@ def _process_view( columns, canonical_schema=schema_fields, ) - if self.config.include_view_lineage: + if self._save_schema_to_resolver(): self.schema_resolver.add_schema_metadata(dataset_urn, schema_metadata) + self.discovered_datasets.add(dataset_name) description, properties, _ = self.get_table_properties(inspector, schema, view) try: view_definition = inspector.get_view_definition(view, schema) @@ -1190,6 +1193,11 @@ def _process_view( domain_registry=self.domain_registry, ) + def _save_schema_to_resolver(self): + return self.config.include_view_lineage or ( + hasattr(self.config, "include_lineage") and self.config.include_lineage + ) + def _run_sql_parser( self, view_identifier: str, query: str, schema_resolver: SchemaResolver ) -> Optional[SqlParsingResult]: diff --git a/metadata-ingestion/src/datahub/sql_parsing/datajob.py b/metadata-ingestion/src/datahub/sql_parsing/datajob.py new file mode 100644 index 0000000000000..215b207c3dcf5 --- /dev/null +++ b/metadata-ingestion/src/datahub/sql_parsing/datajob.py @@ -0,0 +1,50 @@ +import logging +from typing import Iterable, List, Optional + +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.metadata.schema_classes import ( + DataJobInputOutputClass, + FineGrainedLineageClass, + UpstreamLineageClass, +) + +logger = logging.getLogger(__name__) + + +def to_datajob_input_output( + *, mcps: Iterable[MetadataChangeProposalWrapper], ignore_extra_mcps: bool = True +) -> Optional[DataJobInputOutputClass]: + inputDatasets: List[str] = [] + outputDatasets: List[str] = [] + fineGrainedLineages: List[FineGrainedLineageClass] = [] + for mcp in mcps: + # TODO: Represent simple write operations without lineage as outputDatasets. + + upstream_lineage = mcp.as_workunit().get_aspect_of_type(UpstreamLineageClass) + if upstream_lineage is not None: + if mcp.entityUrn and mcp.entityUrn not in outputDatasets: + outputDatasets.append(mcp.entityUrn) + + for upstream in upstream_lineage.upstreams: + if upstream.dataset not in inputDatasets: + inputDatasets.append(upstream.dataset) + + if upstream_lineage.fineGrainedLineages: + for fineGrainedLineage in upstream_lineage.fineGrainedLineages: + fineGrainedLineages.append(fineGrainedLineage) + + elif ignore_extra_mcps: + pass + else: + raise ValueError( + f"Expected an upstreamLineage aspect, got {mcp.aspectName} for {mcp.entityUrn}" + ) + + if not inputDatasets and not outputDatasets: + return None + + return DataJobInputOutputClass( + inputDatasets=inputDatasets, + outputDatasets=outputDatasets, + fineGrainedLineages=fineGrainedLineages, + ) diff --git a/metadata-ingestion/src/datahub/sql_parsing/query_types.py b/metadata-ingestion/src/datahub/sql_parsing/query_types.py index 2acad19418c11..802fb3e993f42 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/query_types.py +++ b/metadata-ingestion/src/datahub/sql_parsing/query_types.py @@ -14,7 +14,16 @@ def _is_temp_table(table: sqlglot.exp.Table, dialect: sqlglot.Dialect) -> bool: identifier: sqlglot.exp.Identifier = table.this return identifier.args.get("temporary") or ( - is_dialect_instance(dialect, "redshift") and identifier.name.startswith("#") + # These dialects use # as a prefix for temp tables. + is_dialect_instance( + dialect, + [ + "redshift", + "mssql", + # sybase is another one, but we don't support that dialect yet. + ], + ) + and identifier.name.startswith("#") ) diff --git a/metadata-ingestion/src/datahub/sql_parsing/split_statements.py b/metadata-ingestion/src/datahub/sql_parsing/split_statements.py new file mode 100644 index 0000000000000..42dda4e62158b --- /dev/null +++ b/metadata-ingestion/src/datahub/sql_parsing/split_statements.py @@ -0,0 +1,163 @@ +import re +from enum import Enum +from typing import Generator, List, Tuple + +CONTROL_FLOW_KEYWORDS = [ + "GO", + r"BEGIN\w+TRY", + r"BEGIN\w+CATCH", + "BEGIN", + r"END\w+TRY", + r"END\w+CATCH", + "END", +] + +# There's an exception to this rule, which is when the statement +# is preceeded by a CTE. +FORCE_NEW_STATEMENT_KEYWORDS = [ + # SELECT is used inside queries as well, so we can't include it here. + "INSERT", + "UPDATE", + "DELETE", + "MERGE", +] + + +class ParserState(Enum): + NORMAL = 1 + STRING = 2 + COMMENT = 3 + MULTILINE_COMMENT = 4 + + +def _is_keyword_at_position(sql: str, pos: int, keyword: str) -> bool: + """ + Check if a keyword exists at the given position using regex word boundaries. + """ + if pos + len(keyword) > len(sql): + return False + + # If we're not at a word boundary, we can't generate a keyword. + if pos > 0 and not ( + bool(re.match(r"\w\W", sql[pos - 1 : pos + 1])) + or bool(re.match(r"\W\w", sql[pos - 1 : pos + 1])) + ): + return False + + pattern = rf"^{re.escape(keyword)}\b" + match = re.match(pattern, sql[pos:], re.IGNORECASE) + return bool(match) + + +def _look_ahead_for_keywords( + sql: str, pos: int, keywords: List[str] +) -> Tuple[bool, str, int]: + """ + Look ahead for SQL keywords at the current position. + """ + + for keyword in keywords: + if _is_keyword_at_position(sql, pos, keyword): + return True, keyword, len(keyword) + return False, "", 0 + + +def split_statements(sql: str) -> Generator[str, None, None]: + """ + Split T-SQL code into individual statements, handling various SQL constructs. + """ + if not sql or not sql.strip(): + return + + current_statement: List[str] = [] + state = ParserState.NORMAL + i = 0 + + def yield_if_complete() -> Generator[str, None, None]: + statement = "".join(current_statement).strip() + if statement: + yield statement + current_statement.clear() + + prev_real_char = "\0" # the most recent non-whitespace, non-comment character + while i < len(sql): + c = sql[i] + next_char = sql[i + 1] if i < len(sql) - 1 else "\0" + + if state == ParserState.NORMAL: + if c == "'": + state = ParserState.STRING + current_statement.append(c) + prev_real_char = c + elif c == "-" and next_char == "-": + state = ParserState.COMMENT + current_statement.append(c) + current_statement.append(next_char) + i += 1 + elif c == "/" and next_char == "*": + state = ParserState.MULTILINE_COMMENT + current_statement.append(c) + current_statement.append(next_char) + i += 1 + else: + most_recent_real_char = prev_real_char + if not c.isspace(): + prev_real_char = c + + is_control_keyword, keyword, keyword_len = _look_ahead_for_keywords( + sql, i, keywords=CONTROL_FLOW_KEYWORDS + ) + if is_control_keyword: + # Yield current statement if any + yield from yield_if_complete() + # Yield keyword as its own statement + yield keyword + i += keyword_len + continue + + ( + is_force_new_statement_keyword, + keyword, + keyword_len, + ) = _look_ahead_for_keywords( + sql, i, keywords=FORCE_NEW_STATEMENT_KEYWORDS + ) + if ( + is_force_new_statement_keyword and most_recent_real_char != ")" + ): # usually we'd have a close paren that closes a CTE + # Force termination of current statement + yield from yield_if_complete() + + current_statement.append(keyword) + i += keyword_len + continue + + elif c == ";": + yield from yield_if_complete() + else: + current_statement.append(c) + + elif state == ParserState.STRING: + current_statement.append(c) + if c == "'" and next_char == "'": + current_statement.append(next_char) + i += 1 + elif c == "'": + state = ParserState.NORMAL + + elif state == ParserState.COMMENT: + current_statement.append(c) + if c == "\n": + state = ParserState.NORMAL + + elif state == ParserState.MULTILINE_COMMENT: + current_statement.append(c) + if c == "*" and next_char == "/": + current_statement.append(next_char) + i += 1 + state = ParserState.NORMAL + + i += 1 + + # Handle the last statement + yield from yield_if_complete() diff --git a/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py b/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py index 360ccd7bf3507..44f0d7be7aad9 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py +++ b/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py @@ -762,7 +762,6 @@ def add_observed_query( This assumes that queries come in order of increasing timestamps. """ - self.report.num_observed_queries += 1 # All queries with no session ID are assumed to be part of the same session. diff --git a/metadata-ingestion/tests/integration/sql_server/docker-compose.yml b/metadata-ingestion/tests/integration/sql_server/docker-compose.yml index 1046321e4f720..aed70503903c0 100644 --- a/metadata-ingestion/tests/integration/sql_server/docker-compose.yml +++ b/metadata-ingestion/tests/integration/sql_server/docker-compose.yml @@ -1,7 +1,7 @@ version: "3" services: testsqlserver: - image: "mcr.microsoft.com/mssql/server:latest" + image: "mcr.microsoft.com/mssql/server:2022-latest" platform: linux/amd64 container_name: "testsqlserver" environment: diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json index 4302c41140dc6..54821347fd28b 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json @@ -113,11 +113,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "01afcab8-187c-459f-828e-727196a1832d", + "job_id": "4130c37d-146c-43da-a671-dd9a413a44b3", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-11-21 21:01:26.550000", - "date_modified": "2024-11-21 21:01:26.690000", + "date_created": "2024-11-22 12:58:03.260000", + "date_modified": "2024-11-22 12:58:03.440000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -1496,6 +1496,138 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": {}, + "name": "age_dist", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "DemoData.Foo.age_dist", + "platform": "urn:li:dataPlatform:mssql", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "Age", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "Count", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + }, + { + "id": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", + "urn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.Items,PROD)", @@ -2150,8 +2282,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-11-21 21:01:26.483000", - "date_modified": "2024-11-21 21:01:26.483000" + "date_created": "2024-11-22 12:58:03.137000", + "date_modified": "2024-11-22 12:58:03.137000" }, "externalUrl": "", "name": "DemoData.Foo.Proc.With.SpecialChar", @@ -2168,14 +2300,24 @@ }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", "changeType": "UPSERT", - "aspectName": "dataJobInputOutput", + "aspectName": "dataJobInfo", "aspect": { "json": { - "inputDatasets": [], - "outputDatasets": [], - "inputDatajobs": [] + "customProperties": { + "procedure_depends_on": "{'DemoData.Foo.age_dist': 'USER_TABLE', 'DemoData.Foo.Items': 'USER_TABLE', 'DemoData.Foo.Persons': 'USER_TABLE', 'DemoData.Foo.SalesReason': 'USER_TABLE'}", + "depending_on_procedure": "{}", + "code": "CREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS NULL\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n -- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n\n SELECT ID, Age INTO #TEMPTABLE FROM NewData.FooNew.PersonsNew\n \n UPDATE DemoData.Foo.Persons\n SET Age = t.Age\n FROM DemoData.Foo.Persons p\n JOIN #TEMPTABLE t ON p.ID = t.ID\n\n END\n", + "input parameters": "[]", + "date_created": "2024-11-22 12:58:03.140000", + "date_modified": "2024-11-22 12:58:03.140000" + }, + "externalUrl": "", + "name": "DemoData.Foo.NewProc", + "type": { + "string": "MSSQL_STORED_PROCEDURE" + } } }, "systemMetadata": { @@ -4256,6 +4398,159 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,NewData.FooNew.View1,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f721da08adde46586c0f113287cb60d1" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,NewData.FooNew.View1,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "view_definition": "CREATE VIEW FooNew.View1 AS\nSELECT LastName, FirstName\nFROM FooNew.PersonsNew\nWHERE Age > 18\n", + "is_view": "True" + }, + "name": "View1", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "NewData.FooNew.View1", + "platform": "urn:li:dataPlatform:mssql", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "LastName", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255) COLLATE SQL_Latin1_General_CP1_CI_AS", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "FirstName", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255) COLLATE SQL_Latin1_General_CP1_CI_AS", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,NewData.FooNew.View1,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,NewData.FooNew.View1,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "CREATE VIEW FooNew.View1 AS\nSELECT LastName, FirstName\nFROM FooNew.PersonsNew\nWHERE Age > 18\n", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,NewData.FooNew.View1,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + }, + { + "id": "urn:li:container:f721da08adde46586c0f113287cb60d1", + "urn": "urn:li:container:f721da08adde46586c0f113287cb60d1" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:f3cb304e29e178d0615ed5ee6aa4ad58", @@ -4611,6 +4906,55 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD),firstname)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD),firstname)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD),lastname)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD),lastname)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataFlow", "entityUrn": "urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD)", @@ -4643,6 +4987,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", @@ -4690,5 +5050,21 @@ "runId": "mssql-test", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json index 0a50556edc638..3836e587ef8cf 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json @@ -113,11 +113,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "01afcab8-187c-459f-828e-727196a1832d", + "job_id": "4130c37d-146c-43da-a671-dd9a413a44b3", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-11-21 21:01:26.550000", - "date_modified": "2024-11-21 21:01:26.690000", + "date_created": "2024-11-22 12:58:03.260000", + "date_modified": "2024-11-22 12:58:03.440000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -1496,6 +1496,138 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": {}, + "name": "age_dist", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "DemoData.Foo.age_dist", + "platform": "urn:li:dataPlatform:mssql", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "Age", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "Count", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + }, + { + "id": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", + "urn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.Items,PROD)", @@ -2150,8 +2282,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-11-21 21:01:26.483000", - "date_modified": "2024-11-21 21:01:26.483000" + "date_created": "2024-11-22 12:58:03.137000", + "date_modified": "2024-11-22 12:58:03.137000" }, "externalUrl": "", "name": "DemoData.Foo.Proc.With.SpecialChar", @@ -2168,14 +2300,24 @@ }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", "changeType": "UPSERT", - "aspectName": "dataJobInputOutput", + "aspectName": "dataJobInfo", "aspect": { "json": { - "inputDatasets": [], - "outputDatasets": [], - "inputDatajobs": [] + "customProperties": { + "procedure_depends_on": "{'DemoData.Foo.age_dist': 'USER_TABLE', 'DemoData.Foo.Items': 'USER_TABLE', 'DemoData.Foo.Persons': 'USER_TABLE', 'DemoData.Foo.SalesReason': 'USER_TABLE'}", + "depending_on_procedure": "{}", + "code": "CREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS NULL\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n -- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n\n SELECT ID, Age INTO #TEMPTABLE FROM NewData.FooNew.PersonsNew\n \n UPDATE DemoData.Foo.Persons\n SET Age = t.Age\n FROM DemoData.Foo.Persons p\n JOIN #TEMPTABLE t ON p.ID = t.ID\n\n END\n", + "input parameters": "[]", + "date_created": "2024-11-22 12:58:03.140000", + "date_modified": "2024-11-22 12:58:03.140000" + }, + "externalUrl": "", + "name": "DemoData.Foo.NewProc", + "type": { + "string": "MSSQL_STORED_PROCEDURE" + } } }, "systemMetadata": { @@ -2571,6 +2713,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json index 0a50556edc638..3836e587ef8cf 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json @@ -113,11 +113,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "01afcab8-187c-459f-828e-727196a1832d", + "job_id": "4130c37d-146c-43da-a671-dd9a413a44b3", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-11-21 21:01:26.550000", - "date_modified": "2024-11-21 21:01:26.690000", + "date_created": "2024-11-22 12:58:03.260000", + "date_modified": "2024-11-22 12:58:03.440000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -1496,6 +1496,138 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": {}, + "name": "age_dist", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "DemoData.Foo.age_dist", + "platform": "urn:li:dataPlatform:mssql", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "Age", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "Count", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + }, + { + "id": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", + "urn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.Items,PROD)", @@ -2150,8 +2282,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-11-21 21:01:26.483000", - "date_modified": "2024-11-21 21:01:26.483000" + "date_created": "2024-11-22 12:58:03.137000", + "date_modified": "2024-11-22 12:58:03.137000" }, "externalUrl": "", "name": "DemoData.Foo.Proc.With.SpecialChar", @@ -2168,14 +2300,24 @@ }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", "changeType": "UPSERT", - "aspectName": "dataJobInputOutput", + "aspectName": "dataJobInfo", "aspect": { "json": { - "inputDatasets": [], - "outputDatasets": [], - "inputDatajobs": [] + "customProperties": { + "procedure_depends_on": "{'DemoData.Foo.age_dist': 'USER_TABLE', 'DemoData.Foo.Items': 'USER_TABLE', 'DemoData.Foo.Persons': 'USER_TABLE', 'DemoData.Foo.SalesReason': 'USER_TABLE'}", + "depending_on_procedure": "{}", + "code": "CREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS NULL\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n -- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n\n SELECT ID, Age INTO #TEMPTABLE FROM NewData.FooNew.PersonsNew\n \n UPDATE DemoData.Foo.Persons\n SET Age = t.Age\n FROM DemoData.Foo.Persons p\n JOIN #TEMPTABLE t ON p.ID = t.ID\n\n END\n", + "input parameters": "[]", + "date_created": "2024-11-22 12:58:03.140000", + "date_modified": "2024-11-22 12:58:03.140000" + }, + "externalUrl": "", + "name": "DemoData.Foo.NewProc", + "type": { + "string": "MSSQL_STORED_PROCEDURE" + } } }, "systemMetadata": { @@ -2571,6 +2713,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json index 0279a94084ce5..ebcadcc11dcbf 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json @@ -113,11 +113,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "01afcab8-187c-459f-828e-727196a1832d", + "job_id": "4130c37d-146c-43da-a671-dd9a413a44b3", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-11-21 21:01:26.550000", - "date_modified": "2024-11-21 21:01:26.690000", + "date_created": "2024-11-22 12:58:03.260000", + "date_modified": "2024-11-22 12:58:03.440000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -1496,6 +1496,138 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": {}, + "name": "age_dist", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "demodata.foo.age_dist", + "platform": "urn:li:dataPlatform:mssql", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "Age", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "Count", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + }, + { + "id": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", + "urn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)", @@ -2150,8 +2282,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-11-21 21:01:26.483000", - "date_modified": "2024-11-21 21:01:26.483000" + "date_created": "2024-11-22 12:58:03.137000", + "date_modified": "2024-11-22 12:58:03.137000" }, "externalUrl": "", "name": "DemoData.Foo.Proc.With.SpecialChar", @@ -2168,14 +2300,24 @@ }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", "changeType": "UPSERT", - "aspectName": "dataJobInputOutput", + "aspectName": "dataJobInfo", "aspect": { "json": { - "inputDatasets": [], - "outputDatasets": [], - "inputDatajobs": [] + "customProperties": { + "procedure_depends_on": "{'DemoData.Foo.age_dist': 'USER_TABLE', 'DemoData.Foo.Items': 'USER_TABLE', 'DemoData.Foo.Persons': 'USER_TABLE', 'DemoData.Foo.SalesReason': 'USER_TABLE'}", + "depending_on_procedure": "{}", + "code": "CREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS NULL\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n -- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n\n SELECT ID, Age INTO #TEMPTABLE FROM NewData.FooNew.PersonsNew\n \n UPDATE DemoData.Foo.Persons\n SET Age = t.Age\n FROM DemoData.Foo.Persons p\n JOIN #TEMPTABLE t ON p.ID = t.ID\n\n END\n", + "input parameters": "[]", + "date_created": "2024-11-22 12:58:03.140000", + "date_modified": "2024-11-22 12:58:03.140000" + }, + "externalUrl": "", + "name": "DemoData.Foo.NewProc", + "type": { + "string": "MSSQL_STORED_PROCEDURE" + } } }, "systemMetadata": { @@ -2515,68 +2657,19 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "containerProperties", "aspect": { "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)", - "type": "VIEW" - } - ], - "fineGrainedLineages": [ - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),Age)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD),Age)" - ], - "confidenceScore": 1.0 - }, - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),FirstName)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD),FirstName)" - ], - "confidenceScore": 1.0 - }, - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),ID)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD),ID)" - ], - "confidenceScore": 1.0 - }, - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),LastName)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD),LastName)" - ], - "confidenceScore": 1.0 - } - ] + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData" + }, + "name": "NewData", + "env": "PROD" } }, "systemMetadata": { @@ -2586,8 +2679,8 @@ } }, { - "entityType": "dataFlow", - "entityUrn": "urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2602,8 +2695,2418 @@ } }, { - "entityType": "dataFlow", - "entityUrn": "urn:li:dataFlow:(mssql,Weekly Demo Data Backup,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Database" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:8b7691fec458d7383d5bc4e213831375", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:8b7691fec458d7383d5bc4e213831375", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "db_accessadmin" + }, + "name": "db_accessadmin", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:8b7691fec458d7383d5bc4e213831375", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:8b7691fec458d7383d5bc4e213831375", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:8b7691fec458d7383d5bc4e213831375", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:8b7691fec458d7383d5bc4e213831375", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:523d13eddd725607ec835a2459b05c9c", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:523d13eddd725607ec835a2459b05c9c", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "db_backupoperator" + }, + "name": "db_backupoperator", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:523d13eddd725607ec835a2459b05c9c", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:523d13eddd725607ec835a2459b05c9c", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:523d13eddd725607ec835a2459b05c9c", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:523d13eddd725607ec835a2459b05c9c", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:29bd421b2225a415df9c750e77404c66", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:29bd421b2225a415df9c750e77404c66", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "db_datareader" + }, + "name": "db_datareader", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:29bd421b2225a415df9c750e77404c66", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:29bd421b2225a415df9c750e77404c66", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:29bd421b2225a415df9c750e77404c66", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:29bd421b2225a415df9c750e77404c66", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a3c02df4bcc7280a89f539b793b04197", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a3c02df4bcc7280a89f539b793b04197", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "db_datawriter" + }, + "name": "db_datawriter", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a3c02df4bcc7280a89f539b793b04197", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a3c02df4bcc7280a89f539b793b04197", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a3c02df4bcc7280a89f539b793b04197", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a3c02df4bcc7280a89f539b793b04197", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c3b5d1cdc69a7d8faf0e1981e89b89d1", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c3b5d1cdc69a7d8faf0e1981e89b89d1", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "db_ddladmin" + }, + "name": "db_ddladmin", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c3b5d1cdc69a7d8faf0e1981e89b89d1", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c3b5d1cdc69a7d8faf0e1981e89b89d1", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c3b5d1cdc69a7d8faf0e1981e89b89d1", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c3b5d1cdc69a7d8faf0e1981e89b89d1", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:2b937d85ae7545dc769766008a332f42", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:2b937d85ae7545dc769766008a332f42", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "db_denydatareader" + }, + "name": "db_denydatareader", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:2b937d85ae7545dc769766008a332f42", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:2b937d85ae7545dc769766008a332f42", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:2b937d85ae7545dc769766008a332f42", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:2b937d85ae7545dc769766008a332f42", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a399d8bb765028abb9e55ae39846ca5e", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a399d8bb765028abb9e55ae39846ca5e", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "db_denydatawriter" + }, + "name": "db_denydatawriter", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a399d8bb765028abb9e55ae39846ca5e", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a399d8bb765028abb9e55ae39846ca5e", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a399d8bb765028abb9e55ae39846ca5e", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a399d8bb765028abb9e55ae39846ca5e", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:457efe38f0aec2af9ad681cf1b43b1cb", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:457efe38f0aec2af9ad681cf1b43b1cb", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "db_owner" + }, + "name": "db_owner", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:457efe38f0aec2af9ad681cf1b43b1cb", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:457efe38f0aec2af9ad681cf1b43b1cb", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:457efe38f0aec2af9ad681cf1b43b1cb", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:457efe38f0aec2af9ad681cf1b43b1cb", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:1d87783ffe7e82210365dff4ca8ee7d1", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:1d87783ffe7e82210365dff4ca8ee7d1", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "db_securityadmin" + }, + "name": "db_securityadmin", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:1d87783ffe7e82210365dff4ca8ee7d1", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:1d87783ffe7e82210365dff4ca8ee7d1", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:1d87783ffe7e82210365dff4ca8ee7d1", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:1d87783ffe7e82210365dff4ca8ee7d1", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:269d0067d130eda0399a534fc787054c", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:269d0067d130eda0399a534fc787054c", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "dbo" + }, + "name": "dbo", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:269d0067d130eda0399a534fc787054c", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:269d0067d130eda0399a534fc787054c", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:269d0067d130eda0399a534fc787054c", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:269d0067d130eda0399a534fc787054c", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.dbo.productsnew,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:269d0067d130eda0399a534fc787054c" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.dbo.productsnew,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": {}, + "name": "ProductsNew", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "newdata.dbo.productsnew", + "platform": "urn:li:dataPlatform:mssql", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "ID", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "ProductName", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "NVARCHAR(max) COLLATE SQL_Latin1_General_CP1_CI_AS", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "Price", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "MONEY", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.dbo.productsnew,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.dbo.productsnew,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + }, + { + "id": "urn:li:container:269d0067d130eda0399a534fc787054c", + "urn": "urn:li:container:269d0067d130eda0399a534fc787054c" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f721da08adde46586c0f113287cb60d1", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f721da08adde46586c0f113287cb60d1", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "FooNew" + }, + "name": "FooNew", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f721da08adde46586c0f113287cb60d1", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f721da08adde46586c0f113287cb60d1", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f721da08adde46586c0f113287cb60d1", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f721da08adde46586c0f113287cb60d1", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.itemsnew,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f721da08adde46586c0f113287cb60d1" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.itemsnew,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": {}, + "name": "ItemsNew", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "newdata.foonew.itemsnew", + "platform": "urn:li:dataPlatform:mssql", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "ID", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "ItemName", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "NVARCHAR(max) COLLATE SQL_Latin1_General_CP1_CI_AS", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "Price", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "SMALLMONEY", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.itemsnew,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.itemsnew,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + }, + { + "id": "urn:li:container:f721da08adde46586c0f113287cb60d1", + "urn": "urn:li:container:f721da08adde46586c0f113287cb60d1" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f721da08adde46586c0f113287cb60d1" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": {}, + "name": "PersonsNew", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "newdata.foonew.personsnew", + "platform": "urn:li:dataPlatform:mssql", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "ID", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": true + }, + { + "fieldPath": "LastName", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255) COLLATE SQL_Latin1_General_CP1_CI_AS", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "FirstName", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255) COLLATE SQL_Latin1_General_CP1_CI_AS", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "Age", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + }, + { + "id": "urn:li:container:f721da08adde46586c0f113287cb60d1", + "urn": "urn:li:container:f721da08adde46586c0f113287cb60d1" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f721da08adde46586c0f113287cb60d1" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "view_definition": "CREATE VIEW FooNew.View1 AS\nSELECT LastName, FirstName\nFROM FooNew.PersonsNew\nWHERE Age > 18\n", + "is_view": "True" + }, + "name": "View1", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "newdata.foonew.view1", + "platform": "urn:li:dataPlatform:mssql", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "LastName", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255) COLLATE SQL_Latin1_General_CP1_CI_AS", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "FirstName", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255) COLLATE SQL_Latin1_General_CP1_CI_AS", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "CREATE VIEW FooNew.View1 AS\nSELECT LastName, FirstName\nFROM FooNew.PersonsNew\nWHERE Age > 18\n", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + }, + { + "id": "urn:li:container:f721da08adde46586c0f113287cb60d1", + "urn": "urn:li:container:f721da08adde46586c0f113287cb60d1" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f3cb304e29e178d0615ed5ee6aa4ad58", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f3cb304e29e178d0615ed5ee6aa4ad58", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "guest" + }, + "name": "guest", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f3cb304e29e178d0615ed5ee6aa4ad58", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f3cb304e29e178d0615ed5ee6aa4ad58", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f3cb304e29e178d0615ed5ee6aa4ad58", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f3cb304e29e178d0615ed5ee6aa4ad58", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:752bb2abafeb2dae8f4adc7ffd547780", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:752bb2abafeb2dae8f4adc7ffd547780", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "INFORMATION_SCHEMA" + }, + "name": "INFORMATION_SCHEMA", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:752bb2abafeb2dae8f4adc7ffd547780", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:752bb2abafeb2dae8f4adc7ffd547780", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:752bb2abafeb2dae8f4adc7ffd547780", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:752bb2abafeb2dae8f4adc7ffd547780", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46b713e3c7754c51649899f0f284ce34", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46b713e3c7754c51649899f0f284ce34", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "sys" + }, + "name": "sys", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46b713e3c7754c51649899f0f284ce34", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46b713e3c7754c51649899f0f284ce34", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46b713e3c7754c51649899f0f284ce34", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46b713e3c7754c51649899f0f284ce34", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),Age)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD),Age)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),FirstName)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD),FirstName)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),ID)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD),ID)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),LastName)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD),LastName)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD),FirstName)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD),FirstName)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD),LastName)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD),LastName)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", + "changeType": "UPSERT", + "aspectName": "dataJobInputOutput", + "aspect": { + "json": { + "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD)" + ], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)" + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),Age)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD),Age)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD),TempID)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD),tempid)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD),Name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD),name)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD),Age)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),Age)" + ], + "confidenceScore": 0.35 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(mssql,Weekly Demo Data Backup,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", "changeType": "UPSERT", "aspectName": "status", "aspect": { diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/procedures/DemoData.Foo.NewProc.json b/metadata-ingestion/tests/integration/sql_server/golden_files/procedures/DemoData.Foo.NewProc.json new file mode 100644 index 0000000000000..609e3a6f42945 --- /dev/null +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/procedures/DemoData.Foo.NewProc.json @@ -0,0 +1,57 @@ +[ +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", + "changeType": "UPSERT", + "aspectName": "dataJobInputOutput", + "aspect": { + "json": { + "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD)" + ], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)" + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),age)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD),age)" + ], + "confidenceScore": 0.2 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD),tempid)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD),tempid)" + ], + "confidenceScore": 0.2 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD),name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD),name)" + ], + "confidenceScore": 0.2 + } + ] + } + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/procedures/demodata.foo.proc2.json b/metadata-ingestion/tests/integration/sql_server/golden_files/procedures/demodata.foo.proc2.json new file mode 100644 index 0000000000000..8ebd1c065ebf9 --- /dev/null +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/procedures/demodata.foo.proc2.json @@ -0,0 +1,57 @@ +[ +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,demodata.foo.stored_procedures,PROD),proc2)", + "changeType": "UPSERT", + "aspectName": "dataJobInputOutput", + "aspect": { + "json": { + "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD)" + ], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)" + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),age)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD),age)" + ], + "confidenceScore": 0.2 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD),tempid)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD),tempid)" + ], + "confidenceScore": 0.2 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD),name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD),name)" + ], + "confidenceScore": 0.2 + } + ] + } + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/sql_server/procedures/DemoData.Foo.NewProc.sql b/metadata-ingestion/tests/integration/sql_server/procedures/DemoData.Foo.NewProc.sql new file mode 100644 index 0000000000000..52a8d1327653b --- /dev/null +++ b/metadata-ingestion/tests/integration/sql_server/procedures/DemoData.Foo.NewProc.sql @@ -0,0 +1,37 @@ +CREATE PROCEDURE [Foo].[NewProc] + AS + BEGIN + --insert into items table from salesreason table + insert into Foo.Items (ID, ItemName) + SELECT TempID, Name + FROM Foo.SalesReason; + + + IF OBJECT_ID('Foo.age_dist', 'U') IS NULL + + BEGIN + -- Create and populate if table doesn't exist + SELECT Age, COUNT(*) as Count + INTO Foo.age_dist + FROM Foo.Persons + GROUP BY Age + END + ELSE + BEGIN + -- Update existing table + TRUNCATE TABLE Foo.age_dist; + + INSERT INTO Foo.age_dist (Age, Count) + SELECT Age, COUNT(*) as Count + FROM Foo.Persons + GROUP BY Age + END + + SELECT * INTO #TempTable FROM NewData.FooNew.PersonsNew + + UPDATE DemoData.Foo.Persons + SET Age = t.Age + FROM DemoData.Foo.Persons p + JOIN #TempTable t ON p.ID = t.ID + + END \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/sql_server/procedures/demodata.foo.proc2.sql b/metadata-ingestion/tests/integration/sql_server/procedures/demodata.foo.proc2.sql new file mode 100644 index 0000000000000..69194a8d2c546 --- /dev/null +++ b/metadata-ingestion/tests/integration/sql_server/procedures/demodata.foo.proc2.sql @@ -0,0 +1,36 @@ +CREATE PROCEDURE [foo].[proc2] + AS + BEGIN + --insert into items table from salesreason table + insert into foo.items (id, itemame) + SELECT tempid, name + FROM foo.salesreason; + + + IF OBJECT_ID('foo.age_dist', 'U') IS NULL + + BEGIN + -- Create and populate if table doesn't exist + SELECT age, COUNT(*) as count + INTO foo.age_dist + FROM foo.persons + GROUP BY age + END + ELSE + BEGIN + -- Update existing table + TRUNCATE TABLE foo.age_dist; + + INSERT INTO foo.age_dist (age, count) + SELECT age, COUNT(*) as count + FROM foo.persons + GROUP BY age + END + + SELECT * INTO #temptable FROM newdata.foonew.personsnew + + UPDATE demodata.foo.persons + SET age = t.age + FROM demodata.foo.persons p + JOIN #temptable t ON p.ID = t.ID + END \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/sql_server/setup/setup.sql b/metadata-ingestion/tests/integration/sql_server/setup/setup.sql index f495db3b91cfa..0c3c7ee2fd29e 100644 --- a/metadata-ingestion/tests/integration/sql_server/setup/setup.sql +++ b/metadata-ingestion/tests/integration/sql_server/setup/setup.sql @@ -1,3 +1,4 @@ +DROP DATABASE IF EXISTS NewData; CREATE DATABASE NewData; GO USE NewData; @@ -14,7 +15,14 @@ CREATE TABLE FooNew.PersonsNew ( FirstName varchar(255), Age int ); +GO +CREATE VIEW FooNew.View1 AS +SELECT LastName, FirstName +FROM FooNew.PersonsNew +WHERE Age > 18 +GO +DROP DATABASE IF EXISTS DemoData; CREATE DATABASE DemoData; GO USE DemoData; @@ -47,11 +55,54 @@ CREATE TABLE Foo.SalesReason ) ; GO +DROP PROCEDURE IF EXISTS [Foo].[Proc.With.SpecialChar]; +GO CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT AS SELECT @ID AS ThatDB; GO +DROP PROCEDURE IF EXISTS [Foo].[NewProc]; +GO +CREATE PROCEDURE [Foo].[NewProc] + AS + BEGIN + --insert into items table from salesreason table + insert into Foo.Items (ID, ItemName) + SELECT TempID, Name + FROM Foo.SalesReason; + + + IF OBJECT_ID('Foo.age_dist', 'U') IS NULL + BEGIN + -- Create and populate if table doesn't exist + SELECT Age, COUNT(*) as Count + INTO Foo.age_dist + FROM Foo.Persons + GROUP BY Age + END + ELSE + BEGIN + -- Update existing table + TRUNCATE TABLE Foo.age_dist; + + INSERT INTO Foo.age_dist (Age, Count) + SELECT Age, COUNT(*) as Count + FROM Foo.Persons + GROUP BY Age + END + + SELECT ID, Age INTO #TEMPTABLE FROM NewData.FooNew.PersonsNew + + UPDATE DemoData.Foo.Persons + SET Age = t.Age + FROM DemoData.Foo.Persons p + JOIN #TEMPTABLE t ON p.ID = t.ID + + END +GO + +EXEC Foo.NewProc GO EXEC sys.sp_addextendedproperty @name = N'MS_Description', @@ -93,4 +144,4 @@ EXEC sp_attach_schedule GO EXEC dbo.sp_add_jobserver @job_name = N'Weekly Demo Data Backup' -GO +GO \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/sql_server/source_files/mssql_with_lower_case_urn.yml b/metadata-ingestion/tests/integration/sql_server/source_files/mssql_with_lower_case_urn.yml index ff1179034833f..94128810f026b 100644 --- a/metadata-ingestion/tests/integration/sql_server/source_files/mssql_with_lower_case_urn.yml +++ b/metadata-ingestion/tests/integration/sql_server/source_files/mssql_with_lower_case_urn.yml @@ -5,7 +5,6 @@ source: config: username: sa password: test!Password - database: DemoData host_port: localhost:21433 convert_urns_to_lowercase: true # use_odbc: True diff --git a/metadata-ingestion/tests/integration/sql_server/test_sql_server.py b/metadata-ingestion/tests/integration/sql_server/test_sql_server.py index 1f418ffbd32ea..b969f77b4c3c1 100644 --- a/metadata-ingestion/tests/integration/sql_server/test_sql_server.py +++ b/metadata-ingestion/tests/integration/sql_server/test_sql_server.py @@ -1,9 +1,16 @@ import os +import pathlib import subprocess import time +from pathlib import Path import pytest +from datahub.ingestion.source.sql.mssql.job_models import StoredProcedure +from datahub.ingestion.source.sql.mssql.stored_procedure_lineage import ( + generate_procedure_lineage, +) +from datahub.sql_parsing.schema_resolver import SchemaResolver from tests.test_helpers import mce_helpers from tests.test_helpers.click_helpers import run_datahub_cmd from tests.test_helpers.docker_helpers import cleanup_image, wait_for_port @@ -57,3 +64,50 @@ def test_mssql_ingest(mssql_runner, pytestconfig, tmp_path, mock_time, config_fi r"root\[\d+\]\['aspect'\]\['json'\]\['customProperties'\]\['date_modified'\]", ], ) + + +PROCEDURE_SQLS_DIR = pathlib.Path(__file__).parent / "procedures" +PROCEDURES_GOLDEN_DIR = pathlib.Path(__file__).parent / "golden_files/procedures/" +procedure_sqls = [sql_file.name for sql_file in PROCEDURE_SQLS_DIR.iterdir()] + + +@pytest.mark.parametrize("procedure_sql_file", procedure_sqls) +@pytest.mark.integration +def test_stored_procedure_lineage( + pytestconfig: pytest.Config, procedure_sql_file: str +) -> None: + sql_file_path = PROCEDURE_SQLS_DIR / procedure_sql_file + procedure_code = sql_file_path.read_text() + + # Procedure file is named as .. + splits = procedure_sql_file.split(".") + db = splits[0] + schema = splits[1] + name = splits[2] + + procedure = StoredProcedure( + db=db, + schema=schema, + name=name, + flow=None, # type: ignore # flow is not used in this test + code=procedure_code, + ) + data_job_urn = f"urn:li:dataJob:(urn:li:dataFlow:(mssql,{db}.{schema}.stored_procedures,PROD),{name})" + + schema_resolver = SchemaResolver(platform="mssql") + + mcps = list( + generate_procedure_lineage( + schema_resolver=schema_resolver, + procedure=procedure, + procedure_job_urn=data_job_urn, + is_temp_table=lambda name: "temp" in name.lower(), + ) + ) + mce_helpers.check_goldens_stream( + pytestconfig, + outputs=mcps, + golden_path=( + PROCEDURES_GOLDEN_DIR / Path(procedure_sql_file).with_suffix(".json") + ), + ) diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_split_statements.py b/metadata-ingestion/tests/unit/sql_parsing/test_split_statements.py new file mode 100644 index 0000000000000..06e0e84ede554 --- /dev/null +++ b/metadata-ingestion/tests/unit/sql_parsing/test_split_statements.py @@ -0,0 +1,51 @@ +from datahub.sql_parsing.split_statements import split_statements + + +def test_split_statements_complex() -> None: + test_sql = """ + CREATE TABLE Users (Id INT); + -- Comment here + INSERT INTO Users VALUES (1); + BEGIN + UPDATE Users SET Id = 2; + /* Multi-line + comment */ + DELETE FROM /* inline DELETE comment */ Users; + END + GO + SELECT * FROM Users + """ + + statements = [statement.strip() for statement in split_statements(test_sql)] + assert statements == [ + "CREATE TABLE Users (Id INT)", + "-- Comment here", + "INSERT INTO Users VALUES (1)", + "BEGIN", + "UPDATE Users SET Id = 2", + "/* Multi-line\n comment */", + "DELETE FROM /* inline DELETE comment */ Users", + "END", + "GO", + "SELECT * FROM Users", + ] + + +def test_split_statements_cte() -> None: + # SQL example from https://stackoverflow.com/a/11562724 + test_sql = """\ +WITH T AS +( SELECT InvoiceNumber, + DocTotal, + SUM(Sale + VAT) OVER(PARTITION BY InvoiceNumber) AS NewDocTotal + FROM PEDI_InvoiceDetail +) +-- comment +/* multi-line +comment */ +UPDATE T +SET DocTotal = NewDocTotal""" + statements = [statement.strip() for statement in split_statements(test_sql)] + assert statements == [ + test_sql, + ]