From d36edcace9f8525a80859c0a7674446779e0727b Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Tue, 13 Aug 2024 15:53:23 -0500 Subject: [PATCH 1/3] test(smoke-test): updates to smoke-tests (#11152) --- docker/profiles/docker-compose.frontend.yml | 1 + docker/profiles/docker-compose.gms.yml | 1 + .../profiles/docker-compose.prerequisites.yml | 2 +- smoke-test/build.gradle | 35 +++++++++---------- smoke-test/cypress-dev.sh | 6 ++-- smoke-test/run-quickstart.sh | 6 ++-- smoke-test/set-cypress-creds.sh | 2 +- smoke-test/set-test-env-vars.sh | 3 ++ smoke-test/smoke.sh | 15 +++++--- smoke-test/test_e2e.py | 14 ++++++-- smoke-test/tests/consistency_utils.py | 27 +++++++++++--- 11 files changed, 76 insertions(+), 36 deletions(-) create mode 100644 smoke-test/set-test-env-vars.sh diff --git a/docker/profiles/docker-compose.frontend.yml b/docker/profiles/docker-compose.frontend.yml index b43db8297cb1e..b5b2d50143927 100644 --- a/docker/profiles/docker-compose.frontend.yml +++ b/docker/profiles/docker-compose.frontend.yml @@ -10,6 +10,7 @@ x-datahub-frontend-service: &datahub-frontend-service - ${DATAHUB_LOCAL_FRONTEND_ENV:-empty2.env} environment: &datahub-frontend-service-env KAFKA_BOOTSTRAP_SERVER: broker:29092 + DATAHUB_GMS_HOST: ${DATAHUB_GMS_HOST:-datahub-gms} volumes: - ${HOME}/.datahub/plugins:/etc/datahub/plugins diff --git a/docker/profiles/docker-compose.gms.yml b/docker/profiles/docker-compose.gms.yml index 8cfff2280e2fe..c9448fa34c687 100644 --- a/docker/profiles/docker-compose.gms.yml +++ b/docker/profiles/docker-compose.gms.yml @@ -40,6 +40,7 @@ x-kafka-env: &kafka-env # KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 SCHEMA_REGISTRY_TYPE: INTERNAL KAFKA_SCHEMAREGISTRY_URL: http://datahub-gms:8080/schema-registry/api/ + SPRING_KAFKA_CONSUMER_AUTO_OFFSET_RESET: ${SPRING_KAFKA_CONSUMER_AUTO_OFFSET_RESET:-earliest} x-datahub-quickstart-telemetry-env: &datahub-quickstart-telemetry-env DATAHUB_SERVER_TYPE: ${DATAHUB_SERVER_TYPE:-quickstart} diff --git a/docker/profiles/docker-compose.prerequisites.yml b/docker/profiles/docker-compose.prerequisites.yml index 7cd9c9039539c..eed23a749628f 100644 --- a/docker/profiles/docker-compose.prerequisites.yml +++ b/docker/profiles/docker-compose.prerequisites.yml @@ -234,7 +234,7 @@ services: env_file: kafka-broker/env/docker.env environment: KAFKA_NODE_ID: 1 - KAFKA_ADVERTISED_LISTENERS: BROKER://broker:29092,EXTERNAL://broker:9092 + KAFKA_ADVERTISED_LISTENERS: BROKER://broker:29092,EXTERNAL://localhost:9092 KAFKA_LISTENERS: BROKER://broker:29092,EXTERNAL://broker:9092,CONTROLLER://broker:39092 KAFKA_INTER_BROKER_LISTENER_NAME: BROKER KAFKA_CONTROLLER_LISTENER_NAMES: CONTROLLER diff --git a/smoke-test/build.gradle b/smoke-test/build.gradle index 95f3ba8ed56d6..a9e5a8942b71e 100644 --- a/smoke-test/build.gradle +++ b/smoke-test/build.gradle @@ -44,12 +44,19 @@ task yarnInstall(type: YarnTask) { environment = ['NODE_OPTIONS': '--openssl-legacy-provider'] args = ['install', '--cwd', "${project.rootDir}/smoke-test/tests/cypress"] } + task cypressLint(type: YarnTask, dependsOn: yarnInstall) { environment = ['NODE_OPTIONS': '--openssl-legacy-provider'] // TODO: Run a full lint instead of just format. args = ['--cwd', "${project.rootDir}/smoke-test/tests/cypress", 'run', 'format'] } +task cypressLintFix(type: YarnTask, dependsOn: yarnInstall) { + environment = ['NODE_OPTIONS': '--openssl-legacy-provider'] + // TODO: Run a full lint instead of just format. + args = ['--cwd', "${project.rootDir}/smoke-test/tests/cypress", 'run', 'format', '--write'] +} + task installDev(type: Exec) { inputs.file file('pyproject.toml') inputs.file file('requirements.txt') @@ -86,10 +93,7 @@ task pythonLintFix(type: Exec, dependsOn: installDev) { */ task noCypressSuite0(type: Exec, dependsOn: [installDev, ':metadata-ingestion:installDev']) { environment 'RUN_QUICKSTART', 'false' - environment 'DATAHUB_KAFKA_SCHEMA_REGISTRY_URL', 'http://localhost:8080/schema-registry/api/' - environment 'KAFKA_BROKER_CONTAINER', 'datahub-kafka-broker-1' environment 'TEST_STRATEGY', 'no_cypress_suite0' - environment "ELASTIC_ID_HASH_ALGO", "MD5" workingDir = project.projectDir commandLine 'bash', '-c', @@ -99,10 +103,7 @@ task noCypressSuite0(type: Exec, dependsOn: [installDev, ':metadata-ingestion:in task noCypressSuite1(type: Exec, dependsOn: [installDev, ':metadata-ingestion:installDev']) { environment 'RUN_QUICKSTART', 'false' - environment 'DATAHUB_KAFKA_SCHEMA_REGISTRY_URL', 'http://localhost:8080/schema-registry/api/' - environment 'KAFKA_BROKER_CONTAINER', 'datahub-kafka-broker-1' environment 'TEST_STRATEGY', 'no_cypress_suite1' - environment "ELASTIC_ID_HASH_ALGO", "MD5" workingDir = project.projectDir commandLine 'bash', '-c', @@ -112,10 +113,7 @@ task noCypressSuite1(type: Exec, dependsOn: [installDev, ':metadata-ingestion:in task cypressSuite1(type: Exec, dependsOn: [installDev, ':metadata-ingestion:installDev']) { environment 'RUN_QUICKSTART', 'false' - environment 'DATAHUB_KAFKA_SCHEMA_REGISTRY_URL', 'http://localhost:8080/schema-registry/api/' - environment 'KAFKA_BROKER_CONTAINER', 'datahub-kafka-broker-1' environment 'TEST_STRATEGY', 'cypress_suite1' - environment "ELASTIC_ID_HASH_ALGO", "MD5" workingDir = project.projectDir commandLine 'bash', '-c', @@ -125,10 +123,7 @@ task cypressSuite1(type: Exec, dependsOn: [installDev, ':metadata-ingestion:inst task cypressRest(type: Exec, dependsOn: [installDev, ':metadata-ingestion:installDev']) { environment 'RUN_QUICKSTART', 'false' - environment 'DATAHUB_KAFKA_SCHEMA_REGISTRY_URL', 'http://localhost:8080/schema-registry/api/' - environment 'KAFKA_BROKER_CONTAINER', 'datahub-kafka-broker-1' environment 'TEST_STRATEGY', 'cypress_rest' - environment "ELASTIC_ID_HASH_ALGO", "MD5" workingDir = project.projectDir commandLine 'bash', '-c', @@ -141,9 +136,6 @@ task cypressRest(type: Exec, dependsOn: [installDev, ':metadata-ingestion:instal */ task cypressDev(type: Exec, dependsOn: [installDev, ':metadata-ingestion:installDev']) { environment 'RUN_QUICKSTART', 'false' - environment 'DATAHUB_KAFKA_SCHEMA_REGISTRY_URL', 'http://localhost:8080/schema-registry/api/' - environment 'KAFKA_BROKER_CONTAINER', 'datahub-kafka-broker-1' - environment "ELASTIC_ID_HASH_ALGO", "MD5" workingDir = project.projectDir commandLine 'bash', '-c', @@ -156,13 +148,18 @@ task cypressDev(type: Exec, dependsOn: [installDev, ':metadata-ingestion:install */ task cypressData(type: Exec, dependsOn: [installDev, ':metadata-ingestion:installDev']) { environment 'RUN_QUICKSTART', 'false' - environment 'DATAHUB_KAFKA_SCHEMA_REGISTRY_URL', 'http://localhost:8080/schema-registry/api/' - environment 'KAFKA_BROKER_CONTAINER', 'datahub-kafka-broker-1' environment 'RUN_UI', 'false' - environment "ELASTIC_ID_HASH_ALGO", "MD5" workingDir = project.projectDir commandLine 'bash', '-c', "source ${venv_name}/bin/activate && set -x && " + "./cypress-dev.sh" -} \ No newline at end of file +} + +task lint { + dependsOn pythonLint, cypressLint +} + +task lintFix { + dependsOn pythonLintFix +} diff --git a/smoke-test/cypress-dev.sh b/smoke-test/cypress-dev.sh index 59346b2606905..bce2d794b1869 100755 --- a/smoke-test/cypress-dev.sh +++ b/smoke-test/cypress-dev.sh @@ -10,9 +10,9 @@ fi source venv/bin/activate -export KAFKA_BROKER_CONTAINER="datahub-kafka-broker-1" -export KAFKA_BOOTSTRAP_SERVER="broker:9092" -export ELASTIC_ID_HASH_ALGO="MD5" +# set environment variables for the test +source ./set-test-env-vars.sh + python -c 'from tests.cypress.integration_test import ingest_data; ingest_data()' cd tests/cypress diff --git a/smoke-test/run-quickstart.sh b/smoke-test/run-quickstart.sh index 1923d42eb5e93..2bf5cdf8ca9c4 100755 --- a/smoke-test/run-quickstart.sh +++ b/smoke-test/run-quickstart.sh @@ -10,6 +10,7 @@ source venv/bin/activate mkdir -p ~/.datahub/plugins/frontend/auth/ echo "test_user:test_pass" >> ~/.datahub/plugins/frontend/auth/user.props +echo "DATAHUB_VERSION = $DATAHUB_VERSION" DATAHUB_SEARCH_IMAGE="${DATAHUB_SEARCH_IMAGE:=opensearchproject/opensearch}" DATAHUB_SEARCH_TAG="${DATAHUB_SEARCH_TAG:=2.9.0}" XPACK_SECURITY_ENABLED="${XPACK_SECURITY_ENABLED:=plugins.security.disabled=true}" @@ -17,10 +18,11 @@ ELASTICSEARCH_USE_SSL="${ELASTICSEARCH_USE_SSL:=false}" USE_AWS_ELASTICSEARCH="${USE_AWS_ELASTICSEARCH:=true}" ELASTIC_ID_HASH_ALGO="${ELASTIC_ID_HASH_ALGO:=MD5}" -echo "DATAHUB_VERSION = $DATAHUB_VERSION" + DATAHUB_TELEMETRY_ENABLED=false \ DOCKER_COMPOSE_BASE="file://$( dirname "$DIR" )" \ DATAHUB_SEARCH_IMAGE="$DATAHUB_SEARCH_IMAGE" DATAHUB_SEARCH_TAG="$DATAHUB_SEARCH_TAG" \ XPACK_SECURITY_ENABLED="$XPACK_SECURITY_ENABLED" ELASTICSEARCH_USE_SSL="$ELASTICSEARCH_USE_SSL" \ USE_AWS_ELASTICSEARCH="$USE_AWS_ELASTICSEARCH" \ -datahub docker quickstart --version ${DATAHUB_VERSION} --standalone_consumers --dump-logs-on-failure --kafka-setup +DATAHUB_VERSION=${DATAHUB_VERSION} \ +docker compose --project-directory ../docker/profiles --profile quickstart-consumers up -d --quiet-pull --wait --wait-timeout 900 diff --git a/smoke-test/set-cypress-creds.sh b/smoke-test/set-cypress-creds.sh index 82fe736b0a7e1..fc6e7dd42f5de 100644 --- a/smoke-test/set-cypress-creds.sh +++ b/smoke-test/set-cypress-creds.sh @@ -2,4 +2,4 @@ export CYPRESS_ADMIN_USERNAME=${ADMIN_USERNAME:-datahub} export CYPRESS_ADMIN_PASSWORD=${ADMIN_PASSWORD:-datahub} -export CYPRESS_ADMIN_DISPLAYNAME=${ADMIN_DISPLAYNAME:-DataHub} \ No newline at end of file +export CYPRESS_ADMIN_DISPLAYNAME=${ADMIN_DISPLAYNAME:-DataHub} diff --git a/smoke-test/set-test-env-vars.sh b/smoke-test/set-test-env-vars.sh new file mode 100644 index 0000000000000..dee3af2b68747 --- /dev/null +++ b/smoke-test/set-test-env-vars.sh @@ -0,0 +1,3 @@ +export DATAHUB_KAFKA_SCHEMA_REGISTRY_URL=http://localhost:8080/schema-registry/api +export DATAHUB_GMS_URL=http://localhost:8080 +export ELASTIC_ID_HASH_ALGO="MD5" \ No newline at end of file diff --git a/smoke-test/smoke.sh b/smoke-test/smoke.sh index c16865fe1e71e..5b3e8a9377a6c 100755 --- a/smoke-test/smoke.sh +++ b/smoke-test/smoke.sh @@ -16,16 +16,23 @@ cd "$DIR" if [ "${RUN_QUICKSTART:-true}" == "true" ]; then source ./run-quickstart.sh +else + mkdir -p ~/.datahub/plugins/frontend/auth/ + echo "test_user:test_pass" >> ~/.datahub/plugins/frontend/auth/user.props + echo "datahub:datahub" > ~/.datahub/plugins/frontend/auth/user.props + + python3 -m venv venv + source venv/bin/activate + python -m pip install --upgrade pip uv>=0.1.10 wheel setuptools + uv pip install -r requirements.txt fi -source venv/bin/activate - (cd ..; ./gradlew :smoke-test:yarnInstall) source ./set-cypress-creds.sh -export DATAHUB_GMS_URL=http://localhost:8080 -export ELASTIC_ID_HASH_ALGO="MD5" +# set environment variables for the test +source ./set-test-env-vars.sh # no_cypress_suite0, no_cypress_suite1, cypress_suite1, cypress_rest if [[ -z "${TEST_STRATEGY}" ]]; then diff --git a/smoke-test/test_e2e.py b/smoke-test/test_e2e.py index abb4841314c4a..74d64a8193173 100644 --- a/smoke-test/test_e2e.py +++ b/smoke-test/test_e2e.py @@ -21,6 +21,7 @@ get_frontend_session, get_admin_credentials, get_root_urn, + wait_for_writes_to_sync, ) bootstrap_sample_data = "../metadata-ingestion/examples/mce_files/bootstrap_mce.json" @@ -150,11 +151,13 @@ def _ensure_group_not_present(urn: str, frontend_session) -> Any: def test_ingestion_via_rest(wait_for_healthchecks): ingest_file_via_rest(bootstrap_sample_data) _ensure_user_present(urn=get_root_urn()) + wait_for_writes_to_sync() @pytest.mark.dependency(depends=["test_healthchecks"]) def test_ingestion_usage_via_rest(wait_for_healthchecks): ingest_file_via_rest(usage_sample_data) + wait_for_writes_to_sync() @pytest.mark.dependency(depends=["test_healthchecks"]) @@ -185,6 +188,7 @@ def test_ingestion_via_kafka(wait_for_healthchecks): # Since Kafka emission is asynchronous, we must wait a little bit so that # the changes are actually processed. time.sleep(kafka_post_ingestion_wait_sec) + wait_for_writes_to_sync() @pytest.mark.dependency( @@ -196,6 +200,7 @@ def test_ingestion_via_kafka(wait_for_healthchecks): ) def test_run_ingestion(wait_for_healthchecks): # Dummy test so that future ones can just depend on this one. + wait_for_writes_to_sync() pass @@ -1384,7 +1389,9 @@ def test_native_user_endpoints(frontend_session): unauthenticated_get_invite_token_response = unauthenticated_session.post( f"{get_frontend_url()}/api/v2/graphql", json=get_invite_token_json ) - assert unauthenticated_get_invite_token_response.status_code == HTTPStatus.UNAUTHORIZED + assert ( + unauthenticated_get_invite_token_response.status_code == HTTPStatus.UNAUTHORIZED + ) unauthenticated_create_reset_token_json = { "query": """mutation createNativeUserResetToken($input: CreateNativeUserResetTokenInput!) {\n @@ -1399,7 +1406,10 @@ def test_native_user_endpoints(frontend_session): f"{get_frontend_url()}/api/v2/graphql", json=unauthenticated_create_reset_token_json, ) - assert unauthenticated_create_reset_token_response.status_code == HTTPStatus.UNAUTHORIZED + assert ( + unauthenticated_create_reset_token_response.status_code + == HTTPStatus.UNAUTHORIZED + ) # cleanup steps json = { diff --git a/smoke-test/tests/consistency_utils.py b/smoke-test/tests/consistency_utils.py index 4335e2a874c1e..1eddc46bb220b 100644 --- a/smoke-test/tests/consistency_utils.py +++ b/smoke-test/tests/consistency_utils.py @@ -8,14 +8,31 @@ ELASTICSEARCH_REFRESH_INTERVAL_SECONDS: int = int( os.getenv("ELASTICSEARCH_REFRESH_INTERVAL_SECONDS", 5) ) -KAFKA_BROKER_CONTAINER: str = str( - os.getenv("KAFKA_BROKER_CONTAINER", "datahub-broker-1") -) KAFKA_BOOTSTRAP_SERVER: str = str(os.getenv("KAFKA_BOOTSTRAP_SERVER", "broker:29092")) logger = logging.getLogger(__name__) +def infer_kafka_broker_container() -> str: + cmd = "docker ps --format '{{.Names}}' | grep broker" + completed_process = subprocess.run( + cmd, + capture_output=True, + shell=True, + text=True, + ) + result = str(completed_process.stdout) + lines = result.splitlines() + if len(lines) == 0: + raise ValueError("No Kafka broker containers found") + return lines[0] + + +KAFKA_BROKER_CONTAINER: str = str( + os.getenv("KAFKA_BROKER_CONTAINER", infer_kafka_broker_container()) +) + + def wait_for_writes_to_sync(max_timeout_in_sec: int = 120) -> None: if USE_STATIC_SLEEP: time.sleep(ELASTICSEARCH_REFRESH_INTERVAL_SECONDS) @@ -44,7 +61,9 @@ def wait_for_writes_to_sync(max_timeout_in_sec: int = 120) -> None: if maximum_lag == 0: lag_zero = True except ValueError: - logger.warning(f"Error reading kafka lag using command: {cmd}") + logger.warning( + f"Error reading kafka lag using command: {cmd}", exc_info=True + ) if not lag_zero: logger.warning( From 897173f270e780e08f936b219ee156fd3ca5a8db Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Tue, 13 Aug 2024 13:54:50 -0700 Subject: [PATCH 2/3] feat(dbt): support prefer_sql_parser_lineage with sources enabled (#11168) --- .../ingestion/source/dbt/dbt_common.py | 43 +- ...test_prefer_sql_parser_lineage_golden.json | 3031 ++++++++++++++++- .../tests/integration/dbt/test_dbt.py | 2 +- .../tests/unit/test_dbt_source.py | 1 - 4 files changed, 3022 insertions(+), 55 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index e2b5f8378732c..d2b41323e5115 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -276,6 +276,12 @@ class DBTCommonConfig( DBTEntitiesEnabled(), description="Controls for enabling / disabling metadata emission for different dbt entities (models, test definitions, test results, etc.)", ) + prefer_sql_parser_lineage: bool = Field( + default=False, + description="Normally we use dbt's metadata to generate table lineage. When enabled, we prefer results from the SQL parser when generating lineage instead. " + "This can be useful when dbt models reference tables directly, instead of using the ref() macro. " + "This requires that `skip_sources_in_lineage` is enabled.", + ) skip_sources_in_lineage: bool = Field( default=False, description="[Experimental] When enabled, dbt sources will not be included in the lineage graph. " @@ -366,13 +372,6 @@ class DBTCommonConfig( description="When enabled, includes the compiled code in the emitted metadata.", ) - prefer_sql_parser_lineage: bool = Field( - default=False, - description="Normally we use dbt's metadata to generate table lineage. When enabled, we prefer results from the SQL parser when generating lineage instead. " - "This can be useful when dbt models reference tables directly, instead of using the ref() macro. " - "This requires that `skip_sources_in_lineage` is enabled.", - ) - @validator("target_platform") def validate_target_platform_value(cls, target_platform: str) -> str: if target_platform.lower() == DBT_PLATFORM: @@ -438,15 +437,27 @@ def validate_include_column_lineage( return include_column_lineage - @validator("skip_sources_in_lineage") + @validator("skip_sources_in_lineage", always=True) def validate_skip_sources_in_lineage( cls, skip_sources_in_lineage: bool, values: Dict ) -> bool: - entites_enabled: Optional[DBTEntitiesEnabled] = values.get("entities_enabled") + entities_enabled: Optional[DBTEntitiesEnabled] = values.get("entities_enabled") + prefer_sql_parser_lineage: Optional[bool] = values.get( + "prefer_sql_parser_lineage" + ) + + if prefer_sql_parser_lineage and not skip_sources_in_lineage: + raise ValueError( + "`prefer_sql_parser_lineage` requires that `skip_sources_in_lineage` is enabled." + ) + if ( skip_sources_in_lineage - and entites_enabled - and entites_enabled.sources == EmitDirective.YES + and entities_enabled + and entities_enabled.sources == EmitDirective.YES + # When `prefer_sql_parser_lineage` is enabled, it's ok to have `skip_sources_in_lineage` enabled + # without also disabling sources. + and not prefer_sql_parser_lineage ): raise ValueError( "When `skip_sources_in_lineage` is enabled, `entities_enabled.sources` must be set to NO." @@ -454,16 +465,6 @@ def validate_skip_sources_in_lineage( return skip_sources_in_lineage - @validator("prefer_sql_parser_lineage") - def validate_prefer_sql_parser_lineage( - cls, prefer_sql_parser_lineage: bool, values: Dict - ) -> bool: - if prefer_sql_parser_lineage and not values.get("skip_sources_in_lineage"): - raise ValueError( - "`prefer_sql_parser_lineage` requires that `skip_sources_in_lineage` is enabled." - ) - return prefer_sql_parser_lineage - @dataclass class DBTColumn: diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_prefer_sql_parser_lineage_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_prefer_sql_parser_lineage_golden.json index 81754fd6cbcac..d421fc4ba42f5 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_prefer_sql_parser_lineage_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_prefer_sql_parser_lineage_golden.json @@ -638,8 +638,8 @@ "json": { "timestampMillis": 1663355198240, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "STARTED" } @@ -659,8 +659,8 @@ "json": { "timestampMillis": 1663355198242, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "COMPLETE", "result": { @@ -1097,8 +1097,8 @@ "json": { "timestampMillis": 1663355198240, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "STARTED" } @@ -1118,8 +1118,8 @@ "json": { "timestampMillis": 1663355198242, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "COMPLETE", "result": { @@ -1420,8 +1420,8 @@ "json": { "timestampMillis": 1663355198240, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "STARTED" } @@ -1441,8 +1441,8 @@ "json": { "timestampMillis": 1663355198242, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "COMPLETE", "result": { @@ -1944,8 +1944,8 @@ "json": { "timestampMillis": 1663355198240, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "STARTED" } @@ -1965,8 +1965,8 @@ "json": { "timestampMillis": 1663355198242, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "COMPLETE", "result": { @@ -1982,6 +1982,2973 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.actor,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.actor,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.actor,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "model_maturity": "in dev", + "owner": "@alice", + "some_other_property": "test 1", + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "language": "sql", + "dbt_unique_id": "source.sample_dbt.pagila.actor", + "dbt_package_name": "sample_dbt", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "name": "actor", + "description": "description for actor table from dbt", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:@alice", + "type": "DATAOWNER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "source.sample_dbt.pagila.actor", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 1581759273000, + "actor": "urn:li:corpuser:dbt_executor" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "actor_id", + "nullable": false, + "description": "description for actor_id column from dbt", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "first_name", + "nullable": false, + "description": "dbt comment: Actors column \u2013 from postgres\n\ndbt model description: description for first_name from dbt", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "last_name", + "nullable": false, + "description": "description for last_name from dbt", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "last_update", + "nullable": false, + "description": "description for last_update from dbt", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.actor,PROD)", + "type": "COPY" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.actor,PROD),actor_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.actor,PROD),actor_id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.actor,PROD),first_name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.actor,PROD),first_name)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.actor,PROD),last_name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.actor,PROD),last_name)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.actor,PROD),last_update)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.actor,PROD),last_update)" + ], + "confidenceScore": 1.0 + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "language": "sql", + "dbt_unique_id": "source.sample_dbt.pagila.address", + "dbt_package_name": "sample_dbt", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "name": "address", + "description": "a user's address", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "source.sample_dbt.pagila.address", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 1581759930000, + "actor": "urn:li:corpuser:dbt_executor" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "address", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "address2", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "address_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "city_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "district", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "last_update", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "phone", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "postal_code", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.address,PROD)", + "type": "COPY" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.address,PROD),address)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),address)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.address,PROD),address2)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),address2)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.address,PROD),address_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),address_id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.address,PROD),city_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),city_id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.address,PROD),district)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),district)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.address,PROD),last_update)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),last_update)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.address,PROD),phone)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),phone)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.address,PROD),postal_code)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),postal_code)" + ], + "confidenceScore": 1.0 + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.category,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.category,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.category,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "language": "sql", + "dbt_unique_id": "source.sample_dbt.pagila.category", + "dbt_package_name": "sample_dbt", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "name": "category", + "description": "a user's category", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "source.sample_dbt.pagila.category", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 1581759987000, + "actor": "urn:li:corpuser:dbt_executor" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "category_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "last_update", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "name", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.category,PROD)", + "type": "COPY" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.category,PROD),category_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.category,PROD),category_id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.category,PROD),last_update)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.category,PROD),last_update)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.category,PROD),name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.category,PROD),name)" + ], + "confidenceScore": 1.0 + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.city,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.city,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.city,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "language": "sql", + "dbt_unique_id": "source.sample_dbt.pagila.city", + "dbt_package_name": "sample_dbt", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "name": "city", + "description": "", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "source.sample_dbt.pagila.city", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 1581759925000, + "actor": "urn:li:corpuser:dbt_executor" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "city", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "city_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "country_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "last_update", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.city,PROD)", + "type": "COPY" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.city,PROD),city)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.city,PROD),city)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.city,PROD),city_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.city,PROD),city_id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.city,PROD),country_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.city,PROD),country_id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.city,PROD),last_update)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.city,PROD),last_update)" + ], + "confidenceScore": 1.0 + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.country,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.country,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.country,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "model_maturity": "in prod", + "owner": "@bob", + "some_other_property": "test 2", + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "language": "sql", + "dbt_unique_id": "source.sample_dbt.pagila.country", + "dbt_package_name": "sample_dbt", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "name": "country", + "description": "", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:@bob", + "type": "DATAOWNER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "source.sample_dbt.pagila.country", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 1581759840000, + "actor": "urn:li:corpuser:dbt_executor" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "country", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "country_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "last_update", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.country,PROD)", + "type": "COPY" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.country,PROD),country)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.country,PROD),country)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.country,PROD),country_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.country,PROD),country_id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.country,PROD),last_update)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.country,PROD),last_update)" + ], + "confidenceScore": 1.0 + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "language": "sql", + "dbt_unique_id": "source.sample_dbt.pagila.customer", + "dbt_package_name": "sample_dbt", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "name": "customer", + "description": "description for customer table from dbt", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "source.sample_dbt.pagila.customer", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 1581760640000, + "actor": "urn:li:corpuser:dbt_executor" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "active", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "activebool", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.BooleanType": {} + } + }, + "nativeDataType": "boolean", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "address_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "create_date", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.DateType": {} + } + }, + "nativeDataType": "date", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "customer_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "email", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "first_name", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "last_name", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "last_update", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "store_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer,PROD)", + "type": "COPY" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer,PROD),active)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),active)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer,PROD),activebool)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),activebool)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer,PROD),address_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),address_id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer,PROD),create_date)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),create_date)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer,PROD),customer_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),customer_id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer,PROD),email)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),email)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer,PROD),first_name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),first_name)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer,PROD),last_name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),last_name)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer,PROD),last_update)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),last_update)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer,PROD),store_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),store_id)" + ], + "confidenceScore": 1.0 + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "language": "sql", + "dbt_unique_id": "source.sample_dbt.pagila.payment_p2020_01", + "dbt_package_name": "sample_dbt", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "name": "payment_p2020_01", + "description": "", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "source.sample_dbt.pagila.payment_p2020_01", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 1580505371997, + "actor": "urn:li:corpuser:dbt_executor" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "amount", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "numeric(5,2)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "customer_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "payment_date", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "payment_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "rental_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "staff_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_01,PROD)", + "type": "COPY" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_01,PROD),amount)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),amount)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_01,PROD),customer_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),customer_id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_01,PROD),payment_date)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),payment_date)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_01,PROD),payment_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),payment_id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_01,PROD),rental_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),rental_id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_01,PROD),staff_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),staff_id)" + ], + "confidenceScore": 1.0 + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "an_array_property": "['alpha', 'beta', 'charlie']", + "model_maturity": "in prod", + "owner": "@charles", + "some_other_property": "test 3", + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "language": "sql", + "dbt_unique_id": "source.sample_dbt.pagila.payment_p2020_02", + "dbt_package_name": "sample_dbt", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "name": "payment_p2020_02", + "description": "", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:@charles", + "type": "DATAOWNER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "source.sample_dbt.pagila.payment_p2020_02", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 1582319845997, + "actor": "urn:li:corpuser:dbt_executor" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "amount", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "numeric(5,2)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "customer_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "payment_date", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "payment_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "rental_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "staff_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_02,PROD)", + "type": "COPY" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_02,PROD),amount)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),amount)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_02,PROD),customer_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),customer_id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_02,PROD),payment_date)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),payment_date)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_02,PROD),payment_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),payment_id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_02,PROD),rental_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),rental_id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_02,PROD),staff_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),staff_id)" + ], + "confidenceScore": 1.0 + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "language": "sql", + "dbt_unique_id": "source.sample_dbt.pagila.payment_p2020_03", + "dbt_package_name": "sample_dbt", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "name": "payment_p2020_03", + "description": "", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "source.sample_dbt.pagila.payment_p2020_03", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 1584998318997, + "actor": "urn:li:corpuser:dbt_executor" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "amount", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "numeric(5,2)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "customer_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "payment_date", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "payment_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "rental_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "staff_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_03,PROD)", + "type": "COPY" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_03,PROD),amount)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),amount)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_03,PROD),customer_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),customer_id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_03,PROD),payment_date)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),payment_date)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_03,PROD),payment_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),payment_id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_03,PROD),rental_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),rental_id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_03,PROD),staff_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),staff_id)" + ], + "confidenceScore": 1.0 + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "language": "sql", + "dbt_unique_id": "source.sample_dbt.pagila.payment_p2020_04", + "dbt_package_name": "sample_dbt", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "name": "payment_p2020_04", + "description": "", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "source.sample_dbt.pagila.payment_p2020_04", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 1588287228997, + "actor": "urn:li:corpuser:dbt_executor" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "amount", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "numeric(5,2)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "customer_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "payment_date", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "payment_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "rental_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "staff_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_04,PROD)", + "type": "COPY" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_04,PROD),amount)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),amount)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_04,PROD),customer_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),customer_id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_04,PROD),payment_date)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),payment_date)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_04,PROD),payment_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),payment_id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_04,PROD),rental_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),rental_id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_04,PROD),staff_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),staff_id)" + ], + "confidenceScore": 1.0 + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "language": "sql", + "dbt_unique_id": "source.sample_dbt.pagila.payment_p2020_05", + "dbt_package_name": "sample_dbt", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "name": "payment_p2020_05", + "description": "a payment", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "source.sample_dbt.pagila.payment_p2020_05", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 1589460269997, + "actor": "urn:li:corpuser:dbt_executor" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "amount", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "numeric(5,2)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "customer_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "payment_date", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "payment_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "rental_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "staff_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_05,PROD)", + "type": "COPY" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_05,PROD),amount)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),amount)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_05,PROD),customer_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),customer_id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_05,PROD),payment_date)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),payment_date)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_05,PROD),payment_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),payment_id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_05,PROD),rental_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),rental_id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_05,PROD),staff_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),staff_id)" + ], + "confidenceScore": 1.0 + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "language": "sql", + "dbt_unique_id": "source.sample_dbt.pagila.payment_p2020_06", + "dbt_package_name": "sample_dbt", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "name": "payment_p2020_06", + "description": "", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "source.sample_dbt.pagila.payment_p2020_06", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": -62135596800000, + "actor": "urn:li:corpuser:dbt_executor" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "amount", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "numeric(5,2)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "customer_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "payment_date", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "payment_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "rental_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "staff_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_06,PROD)", + "type": "COPY" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_06,PROD),amount)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),amount)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_06,PROD),customer_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),customer_id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_06,PROD),payment_date)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),payment_date)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_06,PROD),payment_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),payment_id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_06,PROD),rental_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),rental_id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_06,PROD),staff_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),staff_id)" + ], + "confidenceScore": 1.0 + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.an-aliased-view-for-monthly-billing,PROD)", @@ -2344,8 +5311,8 @@ }, "assertionUrn": "urn:li:assertion:ba2c6ba830d407d539452f4cf46c92a6", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2426,8 +5393,8 @@ }, "assertionUrn": "urn:li:assertion:10f2a119dedcaab43afc47ff13d9cb5b", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2507,8 +5474,8 @@ }, "assertionUrn": "urn:li:assertion:c456eccf6440c6e3388c584689a74d91", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2588,8 +5555,8 @@ }, "assertionUrn": "urn:li:assertion:f812b73477d81e6af283d918cb59e7bf", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2678,8 +5645,8 @@ }, "assertionUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2752,8 +5719,8 @@ }, "assertionUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2833,8 +5800,8 @@ }, "assertionUrn": "urn:li:assertion:f6a1fde3ab4919abcc04bdee93144958", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2920,8 +5887,8 @@ }, "assertionUrn": "urn:li:assertion:60ce4aad7ff6dbff7004da0f2258c9df", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, diff --git a/metadata-ingestion/tests/integration/dbt/test_dbt.py b/metadata-ingestion/tests/integration/dbt/test_dbt.py index a46da9707679c..d213cffa78045 100644 --- a/metadata-ingestion/tests/integration/dbt/test_dbt.py +++ b/metadata-ingestion/tests/integration/dbt/test_dbt.py @@ -227,7 +227,7 @@ def set_paths( source_config_modifiers={ "prefer_sql_parser_lineage": True, "skip_sources_in_lineage": True, - "entities_enabled": {"sources": "NO"}, + # "entities_enabled": {"sources": "NO"}, }, ), ], diff --git a/metadata-ingestion/tests/unit/test_dbt_source.py b/metadata-ingestion/tests/unit/test_dbt_source.py index 01d7a4809b01b..90ff78b16f652 100644 --- a/metadata-ingestion/tests/unit/test_dbt_source.py +++ b/metadata-ingestion/tests/unit/test_dbt_source.py @@ -247,7 +247,6 @@ def test_dbt_config_prefer_sql_parser_lineage(): "catalog_path": "dummy_path", "target_platform": "dummy_platform", "skip_sources_in_lineage": True, - "entities_enabled": {"sources": "NO"}, "prefer_sql_parser_lineage": True, } config = DBTCoreConfig.parse_obj(config_dict) From 18ce10448fbff2b861dc64c8c20c1dec5e0c1741 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Tue, 13 Aug 2024 19:14:40 -0500 Subject: [PATCH 3/3] feat(actions): updates to gha workflows (#11150) --- .github/actions/ci-optimization/action.yml | 7 +-- .../docker-custom-build-and-push/action.yml | 31 +++++++---- .github/scripts/docker_helpers.sh | 12 ++++- .github/workflows/build-and-test.yml | 5 ++ .github/workflows/docker-unified.yml | 51 +++++++++---------- .github/workflows/lint-actions.yml | 5 ++ .github/workflows/metadata-ingestion.yml | 5 ++ .github/workflows/metadata-io.yml | 5 ++ .github/workflows/metadata-model.yml | 2 +- .github/workflows/publish-datahub-jars.yml | 3 ++ .github/workflows/spark-smoke-test.yml | 7 ++- .github/workflows/test-results.yml | 5 ++ 12 files changed, 93 insertions(+), 45 deletions(-) diff --git a/.github/actions/ci-optimization/action.yml b/.github/actions/ci-optimization/action.yml index 2f677a0e552c2..ae429c8d8b9fe 100644 --- a/.github/actions/ci-optimization/action.yml +++ b/.github/actions/ci-optimization/action.yml @@ -1,5 +1,5 @@ -name: 'Identify CI Optimizations' -description: 'Determine if code changes are specific to certain modules.' +name: "Identify CI Optimizations" +description: "Determine if code changes are specific to certain modules." outputs: frontend-only: @@ -44,9 +44,10 @@ outputs: runs: using: "composite" steps: - - uses: dorny/paths-filter@v2 + - uses: dorny/paths-filter@v3 id: filter with: + token: "" # Empty token forces it to use raw git commands. filters: | frontend: - "datahub-frontend/**" diff --git a/.github/actions/docker-custom-build-and-push/action.yml b/.github/actions/docker-custom-build-and-push/action.yml index 1c4a777c14802..763cd29343f5d 100644 --- a/.github/actions/docker-custom-build-and-push/action.yml +++ b/.github/actions/docker-custom-build-and-push/action.yml @@ -26,10 +26,13 @@ inputs: build-args: description: "List of build-time variables. Same as docker/build-push-action" required: false - tags: - # e.g. latest,head,sha12345 - description: "List of tags to use for the Docker image" + image_tag: + # e.g. pr12345 OR head OR v0.1.2.3 + description: "Main tag to use for the Docker image" required: true + flavor: + description: 'Image flavor (e.g., slim, full)' + required: false target: description: "Sets the target stage to build" required: false @@ -45,13 +48,17 @@ runs: steps: - name: Docker meta id: docker_meta - uses: crazy-max/ghaction-docker-meta@v1 + uses: docker/metadata-action@v5 with: - # list of Docker images to use as base name for tags images: ${{ inputs.images }} - # add git short SHA as Docker tag - tag-custom: ${{ inputs.tags }} - tag-custom-only: true + flavor: | + latest=false + suffix=${{ inputs.flavor && format('-{0}', inputs.flavor) || '' }} + tags: | + type=raw,value=${{ inputs.image_tag }} + type=raw,value=head,enable=${{ github.ref == format('refs/heads/{0}', 'acryl-main') }} + type=ref,event=pr,prefix=pr + type=sha,prefix=,format=short # Code for testing the build when not pushing to Docker Hub. - name: Build and Load image for testing (if not publishing) @@ -74,11 +81,13 @@ runs: if: ${{ inputs.publish != 'true' }} shell: bash run: | + IMAGES=""" + ${{ inputs.images }} + """ TAGS=""" - ${{ steps.docker_meta.outputs.tags }} + ${{ inputs.image_tag }} """ - echo "SINGLE_TAG=$(echo $TAGS | tr '\n' ' ' | awk -F' ' '{ print $1 }')" >> $GITHUB_OUTPUT - id: single_tag + echo "SINGLE_TAG=$(echo $IMAGES | tr '\n' ' ' | awk -F' ' '{ print $1 }'):$(echo $TAGS | tr '\n' ' ' | awk -F' ' '{ print $1 }')" >> $GITHUB_OUTPUT - name: Upload image locally for testing (if not publishing) uses: ishworkh/docker-image-artifact-upload@v1 if: ${{ inputs.publish != 'true' }} diff --git a/.github/scripts/docker_helpers.sh b/.github/scripts/docker_helpers.sh index e031a6d2a4d84..421a77ce4df4c 100755 --- a/.github/scripts/docker_helpers.sh +++ b/.github/scripts/docker_helpers.sh @@ -5,14 +5,14 @@ export MAIN_BRANCH="master" export MAIN_BRANCH_TAG="head" function get_short_sha { - echo $(git rev-parse --short "$GITHUB_SHA") + echo $(git rev-parse --short "$GITHUB_SHA"|head -c7) } export SHORT_SHA=$(get_short_sha) echo "SHORT_SHA: $SHORT_SHA" function get_tag { - echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG},g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1,g'),${SHORT_SHA} + echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG},g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1,g') } function get_tag_slim { @@ -38,3 +38,11 @@ function get_unique_tag_slim { function get_unique_tag_full { echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${SHORT_SHA}-full,g" -e 's,refs/tags/\(.*\),\1-full,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-full,g') } + +function get_platforms_based_on_branch { + if [ "${{ github.event_name }}" == 'push' && "${{ github.ref }}" == "refs/heads/${MAIN_BRANCH}" ]; then + echo "linux/amd64,linux/arm64" + else + echo "linux/amd64" + fi +} diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index c93267947b65a..b0666f4a42aac 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -57,6 +57,11 @@ jobs: timeout-minutes: 60 needs: setup steps: + - name: Free up disk space + run: | + sudo apt-get remove 'dotnet-*' azure-cli || true + sudo rm -rf /usr/local/lib/android/ || true + sudo docker image prune -a -f || true - uses: szenius/set-timezone@v1.2 with: timezoneLinux: ${{ matrix.timezone }} diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml index 9487e71e8da3d..c708b562864c5 100644 --- a/.github/workflows/docker-unified.yml +++ b/.github/workflows/docker-unified.yml @@ -47,7 +47,6 @@ jobs: publish: ${{ steps.publish.outputs.publish }} pr-publish: ${{ steps.pr-publish.outputs.publish }} python_release_version: ${{ steps.tag.outputs.python_release_version }} - short_sha: ${{ steps.tag.outputs.short_sha }} branch_name: ${{ steps.tag.outputs.branch_name }} repository_name: ${{ steps.tag.outputs.repository_name }} frontend_change: ${{ steps.ci-optimize.outputs.frontend-change == 'true' }} @@ -157,7 +156,7 @@ jobs: with: images: | ${{ env.DATAHUB_GMS_IMAGE }} - tags: ${{ needs.setup.outputs.tag }} + image_tag: ${{ needs.setup.outputs.tag }} username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} @@ -221,7 +220,7 @@ jobs: with: images: | ${{ env.DATAHUB_MAE_CONSUMER_IMAGE }} - tags: ${{ needs.setup.outputs.tag }} + image_tag: ${{ needs.setup.outputs.tag }} username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} @@ -285,7 +284,7 @@ jobs: with: images: | ${{ env.DATAHUB_MCE_CONSUMER_IMAGE }} - tags: ${{ needs.setup.outputs.tag }} + image_tag: ${{ needs.setup.outputs.tag }} username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} @@ -349,7 +348,7 @@ jobs: with: images: | ${{ env.DATAHUB_UPGRADE_IMAGE }} - tags: ${{ needs.setup.outputs.tag }} + image_tag: ${{ needs.setup.outputs.tag }} username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} @@ -394,7 +393,7 @@ jobs: name: Build and Push DataHub Frontend Docker Image runs-on: ubuntu-latest needs: setup - if: ${{ needs.setup.outputs.frontend_change == 'true' || needs.setup.outputs.publish == 'true' }} + if: ${{ needs.setup.outputs.frontend_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true'}} steps: - name: Set up JDK 17 uses: actions/setup-java@v3 @@ -415,7 +414,7 @@ jobs: with: images: | ${{ env.DATAHUB_FRONTEND_IMAGE }} - tags: ${{ needs.setup.outputs.tag }} + image_tag: ${{ needs.setup.outputs.tag }} username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} @@ -469,7 +468,7 @@ jobs: with: images: | ${{ env.DATAHUB_KAFKA_SETUP_IMAGE }} - tags: ${{ needs.setup.outputs.tag }} + image_tag: ${{ needs.setup.outputs.tag }} username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} @@ -490,7 +489,7 @@ jobs: with: images: | ${{ env.DATAHUB_MYSQL_SETUP_IMAGE }} - tags: ${{ needs.setup.outputs.tag }} + image_tag: ${{ needs.setup.outputs.tag }} username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} @@ -511,7 +510,7 @@ jobs: with: images: | ${{ env.DATAHUB_ELASTIC_SETUP_IMAGE }} - tags: ${{ needs.setup.outputs.tag }} + image_tag: ${{ needs.setup.outputs.tag }} username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} @@ -525,7 +524,7 @@ jobs: outputs: tag: ${{ steps.tag.outputs.tag }} needs: setup - if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' }} + if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} steps: - name: Check out the repo uses: acryldata/sane-checkout-action@v3 @@ -536,7 +535,7 @@ jobs: target: base images: | ${{ env.DATAHUB_INGESTION_BASE_IMAGE }} - tags: ${{ needs.setup.outputs.tag }} + image_tag: ${{ needs.setup.outputs.tag }} username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} @@ -552,7 +551,7 @@ jobs: outputs: tag: ${{ steps.tag.outputs.tag }} needs: [setup, datahub_ingestion_base_build] - if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' }} + if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} steps: - name: Check out the repo uses: acryldata/sane-checkout-action@v3 @@ -574,7 +573,7 @@ jobs: target: slim-install images: | ${{ env.DATAHUB_INGESTION_BASE_IMAGE }} - tags: ${{ needs.setup.outputs.slim_tag }} + image_tag: ${{ needs.setup.outputs.slim_tag }} username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} build-args: | @@ -593,7 +592,7 @@ jobs: outputs: tag: ${{ steps.tag.outputs.tag }} needs: [setup, datahub_ingestion_base_build] - if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' }} + if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} steps: - name: Check out the repo uses: acryldata/sane-checkout-action@v3 @@ -636,7 +635,7 @@ jobs: tag: ${{ steps.tag.outputs.tag }} needs_artifact_download: ${{ needs.setup.outputs.ingestion_change == 'true' && ( needs.setup.outputs.publish != 'true' && needs.setup.outputs.pr-publish != 'true') }} needs: [setup, datahub_ingestion_base_slim_build] - if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' }} + if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} steps: - name: Set up JDK 17 uses: actions/setup-java@v3 @@ -647,7 +646,7 @@ jobs: - name: Check out the repo uses: acryldata/sane-checkout-action@v3 - name: Build codegen - if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' }} + if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish =='true' }} run: ./gradlew :metadata-ingestion:codegen - name: Download Base Image uses: ishworkh/docker-image-artifact-download@v1 @@ -661,7 +660,7 @@ jobs: username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} - name: Build and push Slim Image - if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' }} + if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} uses: ./.github/actions/docker-custom-build-and-push with: target: final @@ -672,7 +671,7 @@ jobs: DOCKER_VERSION=${{ needs.setup.outputs.ingestion_base_change == 'true' && needs.setup.outputs.unique_slim_tag || 'head-slim' }} RELEASE_VERSION=${{ needs.setup.outputs.python_release_version }} APP_ENV=slim - tags: ${{ needs.setup.outputs.slim_tag }} + image_tag: ${{ needs.setup.outputs.slim_tag }} username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} @@ -723,7 +722,7 @@ jobs: tag: ${{ steps.tag.outputs.tag }} needs_artifact_download: ${{ needs.setup.outputs.ingestion_change == 'true' && ( needs.setup.outputs.publish != 'true' && needs.setup.outputs.pr-publish != 'true' ) }} needs: [setup, datahub_ingestion_base_full_build] - if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' }} + if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} steps: - name: Set up JDK 17 uses: actions/setup-java@v3 @@ -734,7 +733,7 @@ jobs: - name: Check out the repo uses: acryldata/sane-checkout-action@v3 - name: Build codegen - if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' }} + if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} run: ./gradlew :metadata-ingestion:codegen - name: Download Base Image uses: ishworkh/docker-image-artifact-download@v1 @@ -748,7 +747,7 @@ jobs: username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} - name: Build and push Full Image - if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' }} + if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} uses: ./.github/actions/docker-custom-build-and-push with: target: final @@ -758,7 +757,7 @@ jobs: BASE_IMAGE=${{ env.DATAHUB_INGESTION_BASE_IMAGE }} DOCKER_VERSION=${{ needs.setup.outputs.ingestion_base_change == 'true' && needs.setup.outputs.unique_tag || 'head' }} RELEASE_VERSION=${{ needs.setup.outputs.python_release_version }} - tags: ${{ needs.setup.outputs.tag }} + image_tag: ${{ needs.setup.outputs.tag }} username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} @@ -776,7 +775,7 @@ jobs: name: "[Monitoring] Scan Datahub Ingestion images for vulnerabilities" runs-on: ubuntu-latest needs: [setup, datahub_ingestion_full_build] - if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' }} + if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} steps: - name: Checkout # adding checkout step just to make trivy upload happy uses: acryldata/sane-checkout-action@v3 @@ -965,7 +964,7 @@ jobs: echo 'datahub-ingestion head-slim images' docker pull '${{ env.DATAHUB_INGESTION_IMAGE }}:head-slim' if [ '${{ needs.datahub_ingestion_slim_build.outputs.tag || 'head-slim' }}' != 'head-slim' ]; then - docker tag '${{ env.DATAHUB_INGESTION_IMAGE }}:head-slim' '${{ env.DATAHUB_INGESTION_IMAGE }}:${{ needs.datahub_ingestion_slim_build.outputs.tag }}' + docker tag '${{ env.DATAHUB_INGESTION_IMAGE }}:head-slim' '${{ env.DATAHUB_INGESTION_IMAGE }}:${{ needs.setup.outputs.unique_tag }}' fi fi - name: Disk Check @@ -1049,7 +1048,7 @@ jobs: runs-on: ubuntu-latest needs: [setup, smoke_test] steps: - - uses: aws-actions/configure-aws-credentials@v1 + - uses: aws-actions/configure-aws-credentials@v4 if: ${{ needs.setup.outputs.publish != 'false' && github.repository_owner == 'datahub-project' && needs.setup.outputs.repository_name == 'datahub' }} with: aws-access-key-id: ${{ secrets.AWS_SQS_ACCESS_KEY_ID }} diff --git a/.github/workflows/lint-actions.yml b/.github/workflows/lint-actions.yml index 4d83adbeba08a..8a1777522f416 100644 --- a/.github/workflows/lint-actions.yml +++ b/.github/workflows/lint-actions.yml @@ -14,3 +14,8 @@ jobs: - uses: reviewdog/action-actionlint@v1 with: reporter: github-pr-review + permissions: + contents: read + checks: write + pull-requests: write + issues: write diff --git a/.github/workflows/metadata-ingestion.yml b/.github/workflows/metadata-ingestion.yml index 51b97552eb150..a27013c4bf488 100644 --- a/.github/workflows/metadata-ingestion.yml +++ b/.github/workflows/metadata-ingestion.yml @@ -46,6 +46,11 @@ jobs: - python-version: "3.10" fail-fast: false steps: + - name: Free up disk space + run: | + sudo apt-get remove 'dotnet-*' azure-cli || true + sudo rm -rf /usr/local/lib/android/ || true + sudo docker image prune -a -f || true - name: Set up JDK 17 uses: actions/setup-java@v3 with: diff --git a/.github/workflows/metadata-io.yml b/.github/workflows/metadata-io.yml index 6797c7ad67c0b..332330b4ed898 100644 --- a/.github/workflows/metadata-io.yml +++ b/.github/workflows/metadata-io.yml @@ -47,6 +47,11 @@ jobs: timeout-minutes: 60 needs: setup steps: + - name: Free up disk space + run: | + sudo apt-get remove 'dotnet-*' azure-cli || true + sudo rm -rf /usr/local/lib/android/ || true + sudo docker image prune -a -f || true - uses: acryldata/sane-checkout-action@v3 - name: Set up JDK 17 uses: actions/setup-java@v3 diff --git a/.github/workflows/metadata-model.yml b/.github/workflows/metadata-model.yml index 558b7c80f727c..d62c03057db3f 100644 --- a/.github/workflows/metadata-model.yml +++ b/.github/workflows/metadata-model.yml @@ -49,7 +49,7 @@ jobs: run: ./gradlew :metadata-ingestion:modelDocGen - name: Configure AWS Credentials if: ${{ needs.setup.outputs.publish == 'true' }} - uses: aws-actions/configure-aws-credentials@v3 + uses: aws-actions/configure-aws-credentials@v4 with: aws-access-key-id: ${{ secrets.ACRYL_CI_ARTIFACTS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.ACRYL_CI_ARTIFACTS_ACCESS_KEY }} diff --git a/.github/workflows/publish-datahub-jars.yml b/.github/workflows/publish-datahub-jars.yml index 7137302c73564..aceee756339ad 100644 --- a/.github/workflows/publish-datahub-jars.yml +++ b/.github/workflows/publish-datahub-jars.yml @@ -45,6 +45,9 @@ jobs: echo "tag=$TAG" >> $GITHUB_OUTPUT publish: runs-on: ubuntu-latest + permissions: + id-token: write + contents: read needs: ["check-secret", "setup"] if: ${{ needs.check-secret.outputs.publish-enabled == 'true' }} steps: diff --git a/.github/workflows/spark-smoke-test.yml b/.github/workflows/spark-smoke-test.yml index 8ffc8420ba941..d1618c6528577 100644 --- a/.github/workflows/spark-smoke-test.yml +++ b/.github/workflows/spark-smoke-test.yml @@ -44,8 +44,11 @@ jobs: run: ./metadata-ingestion/scripts/install_deps.sh - name: Disk Check run: df -h . && docker images - - name: Remove images - run: docker image prune -a -f || true + - name: Free up disk space + run: | + sudo apt-get remove 'dotnet-*' azure-cli || true + sudo rm -rf /usr/local/lib/android/ || true + sudo docker image prune -a -f || true - name: Disk Check run: df -h . && docker images - name: Smoke test diff --git a/.github/workflows/test-results.yml b/.github/workflows/test-results.yml index c94a5fc340f47..a122ef3835f4d 100644 --- a/.github/workflows/test-results.yml +++ b/.github/workflows/test-results.yml @@ -10,6 +10,11 @@ jobs: unit-test-results: name: Unit Test Results runs-on: ubuntu-latest + permissions: + contents: read + actions: read + checks: write + issues: read if: github.event.workflow_run.conclusion != 'skipped' steps: