From e45f548910834dc5f2a61d0cd2168b69ec1172b2 Mon Sep 17 00:00:00 2001 From: skrydal Date: Thu, 19 Dec 2024 16:25:59 +0100 Subject: [PATCH 01/11] feat(ingest/iceberg): Improve iceberg connector (#12163) --- .../ingestion/source/iceberg/iceberg.py | 28 ++- .../source/iceberg/iceberg_common.py | 4 + metadata-ingestion/tests/unit/test_iceberg.py | 168 ++++++++++++++++-- 3 files changed, 189 insertions(+), 11 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg.py b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg.py index 5931873f54236..76f24bfd63d47 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg.py +++ b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg.py @@ -10,6 +10,7 @@ NoSuchNamespaceError, NoSuchPropertyException, NoSuchTableError, + ServerError, ) from pyiceberg.schema import Schema, SchemaVisitorPerPrimitiveType, visit from pyiceberg.table import Table @@ -145,6 +146,13 @@ def _get_datasets(self, catalog: Catalog) -> Iterable[Identifier]: self.report.report_no_listed_namespaces(len(namespaces)) tables_count = 0 for namespace in namespaces: + namespace_repr = ".".join(namespace) + if not self.config.namespace_pattern.allowed(namespace_repr): + LOGGER.info( + f"Namespace {namespace_repr} is not allowed by config pattern, skipping" + ) + self.report.report_dropped(f"{namespace_repr}.*") + continue try: tables = catalog.list_tables(namespace) tables_count += len(tables) @@ -181,6 +189,9 @@ def _process_dataset(dataset_path: Identifier) -> Iterable[MetadataWorkUnit]: if not self.config.table_pattern.allowed(dataset_name): # Dataset name is rejected by pattern, report as dropped. self.report.report_dropped(dataset_name) + LOGGER.debug( + f"Skipping table {dataset_name} due to not being allowed by the config pattern" + ) return try: if not hasattr(thread_local, "local_catalog"): @@ -219,6 +230,22 @@ def _process_dataset(dataset_path: Identifier) -> Iterable[MetadataWorkUnit]: LOGGER.warning( f"NoSuchTableError while processing table {dataset_path}, skipping it.", ) + except FileNotFoundError as e: + self.report.report_warning( + "file-not-found", + f"Encountered FileNotFoundError when trying to read manifest file for {dataset_name}. {e}", + ) + LOGGER.warning( + f"FileNotFoundError while processing table {dataset_path}, skipping it." + ) + except ServerError as e: + self.report.report_warning( + "iceberg-rest-server-error", + f"Iceberg Rest Catalog returned 500 status due to an unhandled exception for {dataset_name}. Exception: {e}", + ) + LOGGER.warning( + f"Iceberg Rest Catalog server error (500 status) encountered when processing table {dataset_path}, skipping it." + ) except Exception as e: self.report.report_failure("general", f"Failed to create workunit: {e}") LOGGER.exception( @@ -269,7 +296,6 @@ def _create_iceberg_workunit( ] = table.current_snapshot().manifest_list dataset_properties = DatasetPropertiesClass( name=table.name()[-1], - tags=[], description=table.metadata.properties.get("comment", None), customProperties=custom_properties, ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py index 98ad9e552d35c..4a7f6bf4d60c1 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py @@ -68,6 +68,10 @@ class IcebergSourceConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin) default=AllowDenyPattern.allow_all(), description="Regex patterns for tables to filter in ingestion.", ) + namespace_pattern: AllowDenyPattern = Field( + default=AllowDenyPattern.allow_all(), + description="Regex patterns for namespaces to filter in ingestion.", + ) user_ownership_property: Optional[str] = Field( default="owner", description="Iceberg table property to look for a `CorpUser` owner. Can only hold a single user value. If property has no value, no owner information will be emitted.", diff --git a/metadata-ingestion/tests/unit/test_iceberg.py b/metadata-ingestion/tests/unit/test_iceberg.py index b8a136586a2bf..3afa26b35dfe9 100644 --- a/metadata-ingestion/tests/unit/test_iceberg.py +++ b/metadata-ingestion/tests/unit/test_iceberg.py @@ -10,6 +10,8 @@ NoSuchIcebergTableError, NoSuchNamespaceError, NoSuchPropertyException, + NoSuchTableError, + ServerError, ) from pyiceberg.io.pyarrow import PyArrowFileIO from pyiceberg.partitioning import PartitionSpec @@ -39,6 +41,7 @@ UUIDType, ) +from datahub.configuration.common import AllowDenyPattern from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source.iceberg.iceberg import ( @@ -62,12 +65,12 @@ ) -def with_iceberg_source(processing_threads: int = 1) -> IcebergSource: +def with_iceberg_source(processing_threads: int = 1, **kwargs: Any) -> IcebergSource: catalog = {"test": {"type": "rest"}} return IcebergSource( ctx=PipelineContext(run_id="iceberg-source-test"), config=IcebergSourceConfig( - catalog=catalog, processing_threads=processing_threads + catalog=catalog, processing_threads=processing_threads, **kwargs ), ) @@ -542,11 +545,11 @@ def __init__(self, tables: Dict[str, Dict[str, Callable[[], Table]]]): """ self.tables = tables - def list_namespaces(self) -> Iterable[str]: - return [*self.tables.keys()] + def list_namespaces(self) -> Iterable[Tuple[str]]: + return [*[(key,) for key in self.tables.keys()]] def list_tables(self, namespace: str) -> Iterable[Tuple[str, str]]: - return [(namespace, table) for table in self.tables[namespace].keys()] + return [(namespace[0], table) for table in self.tables[namespace[0]].keys()] def load_table(self, dataset_path: Tuple[str, str]) -> Table: return self.tables[dataset_path[0]][dataset_path[1]]() @@ -554,15 +557,15 @@ def load_table(self, dataset_path: Tuple[str, str]) -> Table: class MockCatalogExceptionListingTables(MockCatalog): def list_tables(self, namespace: str) -> Iterable[Tuple[str, str]]: - if namespace == "no_such_namespace": + if namespace == ("no_such_namespace",): raise NoSuchNamespaceError() - if namespace == "generic_exception": + if namespace == ("generic_exception",): raise Exception() return super().list_tables(namespace) class MockCatalogExceptionListingNamespaces(MockCatalog): - def list_namespaces(self) -> Iterable[str]: + def list_namespaces(self) -> Iterable[Tuple[str]]: raise Exception() @@ -814,15 +817,157 @@ def test_proper_run_with_multiple_namespaces() -> None: ) +def test_filtering() -> None: + source = with_iceberg_source( + processing_threads=1, + table_pattern=AllowDenyPattern(deny=[".*abcd.*"]), + namespace_pattern=AllowDenyPattern(allow=["namespace1"]), + ) + mock_catalog = MockCatalog( + { + "namespace1": { + "table_xyz": lambda: Table( + identifier=("namespace1", "table_xyz"), + metadata=TableMetadataV2( + partition_specs=[PartitionSpec(spec_id=0)], + location="s3://abcdefg/namespace1/table_xyz", + last_column_id=0, + schemas=[Schema(schema_id=0)], + ), + metadata_location="s3://abcdefg/namespace1/table_xyz", + io=PyArrowFileIO(), + catalog=None, + ), + "JKLtable": lambda: Table( + identifier=("namespace1", "JKLtable"), + metadata=TableMetadataV2( + partition_specs=[PartitionSpec(spec_id=0)], + location="s3://abcdefg/namespace1/JKLtable", + last_column_id=0, + schemas=[Schema(schema_id=0)], + ), + metadata_location="s3://abcdefg/namespace1/JKLtable", + io=PyArrowFileIO(), + catalog=None, + ), + "table_abcd": lambda: Table( + identifier=("namespace1", "table_abcd"), + metadata=TableMetadataV2( + partition_specs=[PartitionSpec(spec_id=0)], + location="s3://abcdefg/namespace1/table_abcd", + last_column_id=0, + schemas=[Schema(schema_id=0)], + ), + metadata_location="s3://abcdefg/namespace1/table_abcd", + io=PyArrowFileIO(), + catalog=None, + ), + "aaabcd": lambda: Table( + identifier=("namespace1", "aaabcd"), + metadata=TableMetadataV2( + partition_specs=[PartitionSpec(spec_id=0)], + location="s3://abcdefg/namespace1/aaabcd", + last_column_id=0, + schemas=[Schema(schema_id=0)], + ), + metadata_location="s3://abcdefg/namespace1/aaabcd", + io=PyArrowFileIO(), + catalog=None, + ), + }, + "namespace2": { + "foo": lambda: Table( + identifier=("namespace2", "foo"), + metadata=TableMetadataV2( + partition_specs=[PartitionSpec(spec_id=0)], + location="s3://abcdefg/namespace2/foo", + last_column_id=0, + schemas=[Schema(schema_id=0)], + ), + metadata_location="s3://abcdefg/namespace2/foo", + io=PyArrowFileIO(), + catalog=None, + ), + "bar": lambda: Table( + identifier=("namespace2", "bar"), + metadata=TableMetadataV2( + partition_specs=[PartitionSpec(spec_id=0)], + location="s3://abcdefg/namespace2/bar", + last_column_id=0, + schemas=[Schema(schema_id=0)], + ), + metadata_location="s3://abcdefg/namespace2/bar", + io=PyArrowFileIO(), + catalog=None, + ), + }, + "namespace3": { + "sales": lambda: Table( + identifier=("namespace3", "sales"), + metadata=TableMetadataV2( + partition_specs=[PartitionSpec(spec_id=0)], + location="s3://abcdefg/namespace3/sales", + last_column_id=0, + schemas=[Schema(schema_id=0)], + ), + metadata_location="s3://abcdefg/namespace3/sales", + io=PyArrowFileIO(), + catalog=None, + ), + "products": lambda: Table( + identifier=("namespace2", "bar"), + metadata=TableMetadataV2( + partition_specs=[PartitionSpec(spec_id=0)], + location="s3://abcdefg/namespace3/products", + last_column_id=0, + schemas=[Schema(schema_id=0)], + ), + metadata_location="s3://abcdefg/namespace3/products", + io=PyArrowFileIO(), + catalog=None, + ), + }, + } + ) + with patch( + "datahub.ingestion.source.iceberg.iceberg.IcebergSourceConfig.get_catalog" + ) as get_catalog: + get_catalog.return_value = mock_catalog + wu: List[MetadataWorkUnit] = [*source.get_workunits_internal()] + assert len(wu) == 2 + urns = [] + for unit in wu: + assert isinstance(unit.metadata, MetadataChangeEvent) + assert isinstance(unit.metadata.proposedSnapshot, DatasetSnapshotClass) + urns.append(unit.metadata.proposedSnapshot.urn) + TestCase().assertCountEqual( + urns, + [ + "urn:li:dataset:(urn:li:dataPlatform:iceberg,namespace1.table_xyz,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:iceberg,namespace1.JKLtable,PROD)", + ], + ) + assert source.report.tables_scanned == 2 + + def test_handle_expected_exceptions() -> None: source = with_iceberg_source(processing_threads=3) def _raise_no_such_property_exception(): raise NoSuchPropertyException() - def _raise_no_such_table_exception(): + def _raise_no_such_iceberg_table_exception(): raise NoSuchIcebergTableError() + def _raise_file_not_found_error(): + raise FileNotFoundError() + + def _raise_no_such_table_exception(): + raise NoSuchTableError() + + def _raise_server_error(): + raise ServerError() + mock_catalog = MockCatalog( { "namespaceA": { @@ -876,6 +1021,9 @@ def _raise_no_such_table_exception(): ), "table5": _raise_no_such_property_exception, "table6": _raise_no_such_table_exception, + "table7": _raise_file_not_found_error, + "table8": _raise_no_such_iceberg_table_exception, + "table9": _raise_server_error, } } ) @@ -899,7 +1047,7 @@ def _raise_no_such_table_exception(): "urn:li:dataset:(urn:li:dataPlatform:iceberg,namespaceA.table4,PROD)", ], ) - assert source.report.warnings.total_elements == 2 + assert source.report.warnings.total_elements == 5 assert source.report.failures.total_elements == 0 assert source.report.tables_scanned == 4 From 08605a95a78df3f2a47c42a1e595b01f52dcc5e5 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 19 Dec 2024 11:02:37 -0500 Subject: [PATCH 02/11] feat(python): split out temp wheel builds (#12157) --- .github/workflows/airflow-plugin.yml | 5 +- .github/workflows/dagster-plugin.yml | 8 +- .github/workflows/gx-plugin.yml | 8 +- .github/workflows/metadata-ingestion.yml | 9 +- .github/workflows/prefect-plugin.yml | 17 +-- .github/workflows/python-build-pages.yml | 64 ++++++++++ docs-website/build.gradle | 6 +- docs-website/generateDocsDir.ts | 24 ++-- metadata-ingestion/build.gradle | 4 +- python-build/.gitignore | 3 + python-build/build.gradle | 27 ++++ python-build/build_site.py | 150 +++++++++++++++++++++++ python-build/copy_wheels.py | 27 ++++ settings.gradle | 1 + 14 files changed, 304 insertions(+), 49 deletions(-) create mode 100644 .github/workflows/python-build-pages.yml create mode 100644 python-build/.gitignore create mode 100644 python-build/build.gradle create mode 100644 python-build/build_site.py create mode 100644 python-build/copy_wheels.py diff --git a/.github/workflows/airflow-plugin.yml b/.github/workflows/airflow-plugin.yml index eefa02be4f1af..26fcceb8aeab7 100644 --- a/.github/workflows/airflow-plugin.yml +++ b/.github/workflows/airflow-plugin.yml @@ -27,7 +27,6 @@ jobs: airflow-plugin: runs-on: ubuntu-latest env: - SPARK_VERSION: 3.0.3 DATAHUB_TELEMETRY_ENABLED: false strategy: matrix: @@ -69,7 +68,7 @@ jobs: - name: pip freeze show list installed if: always() run: source metadata-ingestion-modules/airflow-plugin/venv/bin/activate && uv pip freeze - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 if: ${{ always() && matrix.python-version == '3.10' && matrix.extra_pip_requirements == 'apache-airflow>=2.7.0' }} with: name: Test Results (Airflow Plugin ${{ matrix.python-version}}) @@ -93,7 +92,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Upload - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: Event File path: ${{ github.event_path }} diff --git a/.github/workflows/dagster-plugin.yml b/.github/workflows/dagster-plugin.yml index bee1ec95e7774..d8a9cd7bfd6a3 100644 --- a/.github/workflows/dagster-plugin.yml +++ b/.github/workflows/dagster-plugin.yml @@ -27,7 +27,6 @@ jobs: dagster-plugin: runs-on: ubuntu-latest env: - SPARK_VERSION: 3.0.3 DATAHUB_TELEMETRY_ENABLED: false strategy: matrix: @@ -44,7 +43,8 @@ jobs: with: distribution: "zulu" java-version: 17 - - uses: actions/checkout@v4 + - uses: gradle/actions/setup-gradle@v3 + - uses: acryldata/sane-checkout-action@v3 - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} @@ -56,7 +56,7 @@ jobs: - name: pip freeze show list installed if: always() run: source metadata-ingestion-modules/dagster-plugin/venv/bin/activate && uv pip freeze - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 if: ${{ always() && matrix.python-version == '3.10' && matrix.extraPythonRequirement == 'dagster>=1.3.3' }} with: name: Test Results (dagster Plugin ${{ matrix.python-version}}) @@ -79,7 +79,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Upload - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: Event File path: ${{ github.event_path }} diff --git a/.github/workflows/gx-plugin.yml b/.github/workflows/gx-plugin.yml index 595438bd6e4a9..2fd814a076485 100644 --- a/.github/workflows/gx-plugin.yml +++ b/.github/workflows/gx-plugin.yml @@ -27,7 +27,6 @@ jobs: gx-plugin: runs-on: ubuntu-latest env: - SPARK_VERSION: 3.0.3 DATAHUB_TELEMETRY_ENABLED: false strategy: matrix: @@ -48,7 +47,8 @@ jobs: with: distribution: "zulu" java-version: 17 - - uses: actions/checkout@v4 + - uses: gradle/actions/setup-gradle@v3 + - uses: acryldata/sane-checkout-action@v3 - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} @@ -60,7 +60,7 @@ jobs: - name: pip freeze show list installed if: always() run: source metadata-ingestion-modules/gx-plugin/venv/bin/activate && uv pip freeze - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 if: ${{ always() && matrix.python-version == '3.11' && matrix.extraPythonRequirement == 'great-expectations~=0.17.0' }} with: name: Test Results (GX Plugin ${{ matrix.python-version}}) @@ -83,7 +83,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Upload - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: Event File path: ${{ github.event_path }} diff --git a/.github/workflows/metadata-ingestion.yml b/.github/workflows/metadata-ingestion.yml index 49def2a863c56..ad00c6d1551d1 100644 --- a/.github/workflows/metadata-ingestion.yml +++ b/.github/workflows/metadata-ingestion.yml @@ -28,7 +28,6 @@ jobs: runs-on: ubuntu-latest timeout-minutes: 40 env: - SPARK_VERSION: 3.3.2 DATAHUB_TELEMETRY_ENABLED: false # TODO: Enable this once the test is fixed. # DATAHUB_LOOKML_GIT_TEST_SSH_KEY: ${{ secrets.DATAHUB_LOOKML_GIT_TEST_SSH_KEY }} @@ -84,9 +83,9 @@ jobs: df -hl docker image ls docker system df - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: - name: Test Results (metadata ingestion ${{ matrix.python-version }}) + name: Test Results (metadata ingestion ${{ matrix.python-version }} ${{ matrix.command }}) path: | **/build/reports/tests/test/** **/build/test-results/test/** @@ -100,14 +99,14 @@ jobs: directory: ./build/coverage-reports/ fail_ci_if_error: false flags: pytest-${{ matrix.command }} - name: pytest-${{ matrix.command }} + name: pytest-${{ matrix.python-version }}-${{ matrix.command }} verbose: true event-file: runs-on: ubuntu-latest steps: - name: Upload - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: Event File path: ${{ github.event_path }} diff --git a/.github/workflows/prefect-plugin.yml b/.github/workflows/prefect-plugin.yml index 3c75e8fe9a62f..e4a70426f3a61 100644 --- a/.github/workflows/prefect-plugin.yml +++ b/.github/workflows/prefect-plugin.yml @@ -27,25 +27,20 @@ jobs: prefect-plugin: runs-on: ubuntu-latest env: - SPARK_VERSION: 3.0.3 DATAHUB_TELEMETRY_ENABLED: false strategy: matrix: python-version: ["3.8", "3.9", "3.10"] - include: - - python-version: "3.8" - - python-version: "3.9" - - python-version: "3.10" fail-fast: false steps: - name: Set up JDK 17 - uses: actions/setup-java@v3 + uses: actions/setup-java@v4 with: distribution: "zulu" java-version: 17 - uses: gradle/actions/setup-gradle@v3 - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 + - uses: acryldata/sane-checkout-action@v3 + - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} cache: "pip" @@ -56,7 +51,7 @@ jobs: - name: pip freeze show list installed if: always() run: source metadata-ingestion-modules/prefect-plugin/venv/bin/activate && uv pip freeze - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 if: ${{ always() && matrix.python-version == '3.10'}} with: name: Test Results (Prefect Plugin ${{ matrix.python-version}}) @@ -72,7 +67,7 @@ jobs: token: ${{ secrets.CODECOV_TOKEN }} directory: ./build/coverage-reports/ fail_ci_if_error: false - flags: prefect,prefect-${{ matrix.extra_pip_extras }} + flags: prefect,prefect-${{ matrix.python-version }} name: pytest-prefect-${{ matrix.python-version }} verbose: true @@ -80,7 +75,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Upload - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: Event File path: ${{ github.event_path }} diff --git a/.github/workflows/python-build-pages.yml b/.github/workflows/python-build-pages.yml new file mode 100644 index 0000000000000..8971722c374fb --- /dev/null +++ b/.github/workflows/python-build-pages.yml @@ -0,0 +1,64 @@ +name: Python Build +on: + push: + branches: + - master + paths: + - ".github/workflows/python-build-pages.yml" + - "metadata-ingestion/**" + - "metadata-ingestion-modules/**" + - "metadata-models/**" + pull_request: + branches: + - "**" + paths: + - ".github/workflows/python-build-pages.yml" + - "metadata-ingestion/**" + - "metadata-ingestion-modules/**" + - "metadata-models/**" + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + deploy-pages: + runs-on: ubuntu-latest + if: ${{ vars.CLOUDFLARE_WHEELS_PROJECT_NAME != '' }} + + name: Python Wheels + permissions: + contents: read + pull-requests: read + deployments: write + steps: + - name: Set up JDK 17 + uses: actions/setup-java@v4 + with: + distribution: "zulu" + java-version: 17 + - uses: gradle/actions/setup-gradle@v3 + - uses: acryldata/sane-checkout-action@v3 + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + cache: "pip" + - uses: actions/cache@v4 + with: + path: | + ~/.cache/uv + key: ${{ runner.os }}-uv-${{ hashFiles('**/requirements.txt') }} + - name: Build Python wheel site + run: | + ./gradlew :python-build:buildSite + env: + GITHUB_TOKEN: ${{ github.token }} + - name: Publish + uses: cloudflare/pages-action@v1 + with: + apiToken: ${{ secrets.CLOUDFLARE_API_TOKEN }} + accountId: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }} + projectName: ${{ vars.CLOUDFLARE_WHEELS_PROJECT_NAME }} + workingDirectory: python-build + directory: site + gitHubToken: ${{ github.token }} diff --git a/docs-website/build.gradle b/docs-website/build.gradle index 1860b4a49ae23..797863d2019fb 100644 --- a/docs-website/build.gradle +++ b/docs-website/build.gradle @@ -83,11 +83,7 @@ task yarnInstall(type: YarnTask) { task yarnGenerate(type: YarnTask, dependsOn: [yarnInstall, generateGraphQLSchema, generateJsonSchema, ':metadata-ingestion:modelDocGen', ':metadata-ingestion:docGen', - ':metadata-ingestion:buildWheel', - ':metadata-ingestion-modules:airflow-plugin:buildWheel', - ':metadata-ingestion-modules:dagster-plugin:buildWheel', - ':metadata-ingestion-modules:prefect-plugin:buildWheel', - ':metadata-ingestion-modules:gx-plugin:buildWheel', + ':python-build:buildWheels', ]) { inputs.files(projectMdFiles) outputs.cacheIf { true } diff --git a/docs-website/generateDocsDir.ts b/docs-website/generateDocsDir.ts index 0f7e347da64eb..ad82a85f9e567 100644 --- a/docs-website/generateDocsDir.ts +++ b/docs-website/generateDocsDir.ts @@ -573,26 +573,20 @@ function write_markdown_file( function copy_python_wheels(): void { // Copy the built wheel files to the static directory. - const wheel_dirs = [ - "../metadata-ingestion/dist", - "../metadata-ingestion-modules/airflow-plugin/dist", - "../metadata-ingestion-modules/dagster-plugin/dist", - "../metadata-ingestion-modules/prefect-plugin/dist", - "../metadata-ingestion-modules/gx-plugin/dist", - ]; + // Everything is copied to the python-build directory first, so + // we just need to copy from there. + const wheel_dir = "../python-build/wheels"; const wheel_output_directory = path.join(STATIC_DIRECTORY, "wheels"); fs.mkdirSync(wheel_output_directory, { recursive: true }); - for (const wheel_dir of wheel_dirs) { - const wheel_files = fs.readdirSync(wheel_dir); - for (const wheel_file of wheel_files) { - const src = path.join(wheel_dir, wheel_file); - const dest = path.join(wheel_output_directory, wheel_file); + const wheel_files = fs.readdirSync(wheel_dir); + for (const wheel_file of wheel_files) { + const src = path.join(wheel_dir, wheel_file); + const dest = path.join(wheel_output_directory, wheel_file); - // console.log(`Copying artifact ${src} to ${dest}...`); - fs.copyFileSync(src, dest); - } + // console.log(`Copying artifact ${src} to ${dest}...`); + fs.copyFileSync(src, dest); } } diff --git a/metadata-ingestion/build.gradle b/metadata-ingestion/build.gradle index 2c5d8e6c9646a..fc1409fbed74e 100644 --- a/metadata-ingestion/build.gradle +++ b/metadata-ingestion/build.gradle @@ -23,8 +23,8 @@ task environmentSetup(type: Exec, dependsOn: checkPythonVersion) { inputs.file file('setup.py') outputs.file(sentinel_file) commandLine 'bash', '-c', - "${python_executable} -m venv ${venv_name} && " + - "${venv_name}/bin/python -m pip install --upgrade pip uv wheel 'setuptools>=63.0.0' && " + + "${python_executable} -m venv ${venv_name} && set -x && " + + "${venv_name}/bin/python -m pip install --upgrade uv && " + "touch ${sentinel_file}" } diff --git a/python-build/.gitignore b/python-build/.gitignore new file mode 100644 index 0000000000000..d2de6dec25809 --- /dev/null +++ b/python-build/.gitignore @@ -0,0 +1,3 @@ + +/wheels +/site diff --git a/python-build/build.gradle b/python-build/build.gradle new file mode 100644 index 0000000000000..e90bffd46828c --- /dev/null +++ b/python-build/build.gradle @@ -0,0 +1,27 @@ +plugins { + id 'base' +} + +ext { + python_executable = 'python3' +} + +task checkPythonVersion(type: Exec) { + commandLine python_executable, '-c', + 'import sys; sys.version_info >= (3, 8), f"Python version {sys.version_info} is too old"' +} + +task buildWheels(type: Exec, dependsOn: [ + checkPythonVersion, + ':metadata-ingestion:buildWheel', + ':metadata-ingestion-modules:airflow-plugin:buildWheel', + ':metadata-ingestion-modules:dagster-plugin:buildWheel', + ':metadata-ingestion-modules:prefect-plugin:buildWheel', + ':metadata-ingestion-modules:gx-plugin:buildWheel', +]) { + commandLine python_executable, "copy_wheels.py" +} + +task buildSite(type: Exec, dependsOn: [buildWheels]) { + commandLine python_executable, "build_site.py" +} diff --git a/python-build/build_site.py b/python-build/build_site.py new file mode 100644 index 0000000000000..73941eca9968c --- /dev/null +++ b/python-build/build_site.py @@ -0,0 +1,150 @@ +import contextlib +import json +import os +import pathlib +import shutil +import subprocess +from datetime import datetime, timezone + +PYTHON_BUILD_DIR = pathlib.Path(__file__).parent +WHEEL_DIR = PYTHON_BUILD_DIR / "wheels" +SITE_OUTPUT_DIR = PYTHON_BUILD_DIR / "site" + +shutil.rmtree(SITE_OUTPUT_DIR, ignore_errors=True) +SITE_OUTPUT_DIR.mkdir(parents=True) + +SITE_ARTIFACT_WHEEL_DIR = SITE_OUTPUT_DIR / "artifacts" / "wheels" +SITE_ARTIFACT_WHEEL_DIR.mkdir(parents=True) +for wheel_file in WHEEL_DIR.glob("*"): + shutil.copy(wheel_file, SITE_ARTIFACT_WHEEL_DIR) + + +def package_name(wheel_file: pathlib.Path) -> str: + return wheel_file.name.split("-")[0].replace("_", "-") + + +# Get some extra context about the build +ts = datetime.now(timezone.utc).isoformat() +context_info: dict = { + "timestamp": ts, +} + +# Get branch info. +with contextlib.suppress(Exception): + if branch_info := os.getenv("GITHUB_HEAD_REF"): + pass + else: + branch_info = subprocess.check_output( + ["git", "branch", "--show-current"], text=True + ) + context_info["branch"] = branch_info.strip() + +# Get commit info. +with contextlib.suppress(Exception): + commit_info = subprocess.check_output( + ["git", "log", "-1", "--pretty=%H%n%B"], text=True + ) + commit_hash, commit_msg = commit_info.strip().split("\n", 1) + context_info["commit"] = { + "hash": commit_hash, + "message": commit_msg.strip(), + } + +# Get PR info. +with contextlib.suppress(Exception): + pr_info = "unknown" + if github_ref := os.getenv("GITHUB_REF"): + # e.g. GITHUB_REF=refs/pull/12157/merge + parts = github_ref.split("/") + if parts[1] == "pull": + pull_number = parts[2] + pr_info = json.loads( + subprocess.check_output( + ["gh", "pr", "view", pull_number, "--json", "title,number,url"], + text=True, + ) + ) + else: + # The `gh` CLI might be able to figure it out. + pr_info = json.loads( + subprocess.check_output( + ["gh", "pr", "view", "--json", "title,number,url"], text=True + ) + ) + context_info["pr"] = pr_info + + +newline = "\n" +(SITE_OUTPUT_DIR / "index.html").write_text( + f""" + + + DataHub Python Builds + + + + + + + + + + + +
+

DataHub Python Builds

+

+ These prebuilt wheel files can be used to install our Python packages as of a specific commit. +

+ +

Build context

+

+ Built at {ts}. +

+
{json.dumps(context_info, indent=2)}
+ +

Usage

+

+ Current base URL: unknown +

+ + + + + + + + + + + { + newline.join( + f''' + + + + + + ''' + for wheel_file in sorted(WHEEL_DIR.glob("*.whl")) + ) + } + +
PackageSizeInstall command
{package_name(wheel_file)}{wheel_file.stat().st_size / 1024 / 1024:.3f} MBuv pip install '{package_name(wheel_file)} @ <base-url>/artifacts/wheels/{wheel_file.name}'
+
+ + + +""" +) + +print("DataHub Python wheel site built in", SITE_OUTPUT_DIR) diff --git a/python-build/copy_wheels.py b/python-build/copy_wheels.py new file mode 100644 index 0000000000000..b66662cbfe991 --- /dev/null +++ b/python-build/copy_wheels.py @@ -0,0 +1,27 @@ +import pathlib +import shutil + +PYTHON_BUILD_DIR = pathlib.Path(__file__).parent +ROOT_DIR = PYTHON_BUILD_DIR.parent +WHEEL_OUTPUT_DIR = PYTHON_BUILD_DIR / "wheels" + +# These should line up with the build.gradle file. +wheel_dirs = [ + ROOT_DIR / "metadata-ingestion/dist", + ROOT_DIR / "metadata-ingestion-modules/airflow-plugin/dist", + ROOT_DIR / "metadata-ingestion-modules/dagster-plugin/dist", + ROOT_DIR / "metadata-ingestion-modules/prefect-plugin/dist", + ROOT_DIR / "metadata-ingestion-modules/gx-plugin/dist", +] + +# Delete and recreate the output directory. +if WHEEL_OUTPUT_DIR.exists(): + shutil.rmtree(WHEEL_OUTPUT_DIR) +WHEEL_OUTPUT_DIR.mkdir(parents=True) + +# Copy things over. +for wheel_dir in wheel_dirs: + for wheel_file in wheel_dir.glob("*"): + shutil.copy(wheel_file, WHEEL_OUTPUT_DIR) + +print("Copied wheels to", WHEEL_OUTPUT_DIR) diff --git a/settings.gradle b/settings.gradle index 8756df31c1ac6..b0c2c707d566c 100644 --- a/settings.gradle +++ b/settings.gradle @@ -64,6 +64,7 @@ include 'metadata-ingestion-modules:airflow-plugin' include 'metadata-ingestion-modules:gx-plugin' include 'metadata-ingestion-modules:dagster-plugin' include 'metadata-ingestion-modules:prefect-plugin' +include 'python-build' include 'smoke-test' include 'metadata-auth:auth-api' include 'metadata-service:schema-registry-api' From 89acda66d0d56d01a2645d9c8cced7c593b65e99 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Thu, 19 Dec 2024 10:18:30 -0600 Subject: [PATCH 03/11] docs(release): v0.3.7.7 (#12091) --- docs/managed-datahub/release-notes/v_0_3_7.md | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/docs/managed-datahub/release-notes/v_0_3_7.md b/docs/managed-datahub/release-notes/v_0_3_7.md index be3a2d97514ef..75f5ac21224c2 100644 --- a/docs/managed-datahub/release-notes/v_0_3_7.md +++ b/docs/managed-datahub/release-notes/v_0_3_7.md @@ -13,12 +13,43 @@ If you are using an older CLI/SDK version, then please upgrade it. This applies ## Known Issues +### v0.3.7.7 + * Postgres regression, non-functional when using postgres + ### v0.3.7.3 * Search page fails to render when filters are applied with a query which returns zero results. ## Release Changelog --- +### v0.3.7.8 + +- [Postgres] Fix regression from MySQL fix in v0.3.7.7 + +### v0.3.7.7 + +- [UI] Fix bug showing upstream lineage dbt source leaves +- [UI] Show column-level lineage through transformational home node +- [UI] Browse nodes titles expand to full width of panel +- [UI] Data product preview cards display correctly +- [UI] Fix elasticsearch usage sort field names +- [UI] Add structured property display settings feature +- [Executor] Fix false errors on cli ingestions +- [Search] Schema field boost reduced +- [Search] Search usage ranking null_fill fix +- [Search] Single term with underscores by default no longer considered quoted +- [Metadata Tests] Metadata Test shutdown actions flush +- [Metadata Tests] Add deduplicate logic for MCP batches +- [Metadata Tests] Prevent mutation of systemMetadata in patch batches +- [MAE Consumer] Fix graph edge on container delete exception +- [Notifications] Filter out system ingestion source notifications +- [MySQL] Fix index gap lock deadlock +- [API] DataJobInputOutput finegrained lineage fix + +### v0.3.7.6 + +- [UI] fix(automations): white screen automations with dbt sync + ### v0.3.7.5 - [GMS] Fix upstream lineage patching when path contained encoded slash From 9031b49b2345f79db5504f80432af1cd8a77a5e5 Mon Sep 17 00:00:00 2001 From: John Joyce Date: Thu, 19 Dec 2024 09:07:59 -0800 Subject: [PATCH 04/11] fix(docs): Add improvements in examples for PATCH documentation (#12165) Co-authored-by: John Joyce Co-authored-by: John Joyce --- docs/advanced/patch.md | 110 +++++++++++++----- docs/api/tutorials/custom-properties.md | 4 +- .../dataset_add_custom_properties_patch.py | 19 +++ .../dataset_add_glossary_term_patch.py | 22 ++++ .../library/dataset_add_owner_patch.py | 24 ++++ .../library/dataset_add_properties.py | 44 ------- ...aset_add_remove_custom_properties_patch.py | 19 +++ .../library/dataset_add_remove_properties.py | 46 -------- .../dataset_add_structured_properties.py | 24 ---- ...dataset_add_structured_properties_patch.py | 23 ++++ .../examples/library/dataset_add_tag_patch.py | 22 ++++ .../dataset_add_upstream_lineage_patch.py | 62 ++++++++++ .../dataset_field_add_glossary_term_patch.py | 26 +++++ .../library/dataset_field_add_tag_patch.py | 24 ++++ 14 files changed, 321 insertions(+), 148 deletions(-) create mode 100644 metadata-ingestion/examples/library/dataset_add_custom_properties_patch.py create mode 100644 metadata-ingestion/examples/library/dataset_add_glossary_term_patch.py create mode 100644 metadata-ingestion/examples/library/dataset_add_owner_patch.py delete mode 100644 metadata-ingestion/examples/library/dataset_add_properties.py create mode 100644 metadata-ingestion/examples/library/dataset_add_remove_custom_properties_patch.py delete mode 100644 metadata-ingestion/examples/library/dataset_add_remove_properties.py delete mode 100644 metadata-ingestion/examples/library/dataset_add_structured_properties.py create mode 100644 metadata-ingestion/examples/library/dataset_add_structured_properties_patch.py create mode 100644 metadata-ingestion/examples/library/dataset_add_tag_patch.py create mode 100644 metadata-ingestion/examples/library/dataset_add_upstream_lineage_patch.py create mode 100644 metadata-ingestion/examples/library/dataset_field_add_glossary_term_patch.py create mode 100644 metadata-ingestion/examples/library/dataset_field_add_tag_patch.py diff --git a/docs/advanced/patch.md b/docs/advanced/patch.md index 601d055659313..24e8c68a9168d 100644 --- a/docs/advanced/patch.md +++ b/docs/advanced/patch.md @@ -1,69 +1,120 @@ import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; -# But First, Semantics: Upsert versus Patch +# Emitting Patch Updates to DataHub ## Why Would You Use Patch -By default, most of the SDK tutorials and API-s involve applying full upserts at the aspect level. This means that typically, when you want to change one field within an aspect without modifying others, you need to do a read-modify-write to not overwrite existing fields. -To support these scenarios, DataHub supports PATCH based operations so that targeted changes to single fields or values within arrays of fields are possible without impacting other existing metadata. +By default, most of the SDK tutorials and APIs involve applying full upserts at the aspect level, e.g. replacing the aspect entirely. +This means that when you want to change even a single field within an aspect without modifying others, you need to do a read-modify-write to avoid overwriting existing fields. +To support these scenarios, DataHub supports `PATCH` operations to perform targeted changes for individual fields or values within arrays of fields are possible without impacting other existing metadata. :::note -Currently, PATCH support is only available for a selected set of aspects, so before pinning your hopes on using PATCH as a way to make modifications to aspect values, confirm whether your aspect supports PATCH semantics. The complete list of Aspects that are supported are maintained [here](https://github.com/datahub-project/datahub/blob/9588440549f3d99965085e97b214a7dabc181ed2/entity-registry/src/main/java/com/linkedin/metadata/models/registry/template/AspectTemplateEngine.java#L24). In the near future, we do have plans to automatically support PATCH semantics for aspects by default. +Currently, PATCH support is only available for a selected set of aspects, so before pinning your hopes on using PATCH as a way to make modifications to aspect values, confirm whether your aspect supports PATCH semantics. The complete list of Aspects that are supported are maintained [here](https://github.com/datahub-project/datahub/blob/9588440549f3d99965085e97b214a7dabc181ed2/entity-registry/src/main/java/com/linkedin/metadata/models/registry/template/AspectTemplateEngine.java#L24). ::: -## How To Use Patch +## How To Use Patches -Examples for using Patch are sprinkled throughout the API guides. Here's how to find the appropriate classes for the language for your choice. - - + -The Java Patch builders are aspect-oriented and located in the [datahub-client](https://github.com/datahub-project/datahub/tree/master/metadata-integration/java/datahub-client/src/main/java/datahub/client/patch) module under the `datahub.client.patch` namespace. +The Python Patch builders are entity-oriented and located in the [metadata-ingestion](https://github.com/datahub-project/datahub/tree/9588440549f3d99965085e97b214a7dabc181ed2/metadata-ingestion/src/datahub/specific) module and located in the `datahub.specific` module. +Patch builder helper classes exist for -Here are a few illustrative examples using the Java Patch builders: +- [Datasets](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/specific/dataset.py) +- [Charts](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/specific/chart.py) +- [Dashboards](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/specific/dashboard.py) +- [Data Jobs (Tasks)](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/specific/datajob.py) +- [Data Products](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/specific/dataproduct.py) +And we are gladly accepting contributions for Containers, Data Flows (Pipelines), Tags, Glossary Terms, Domains, and ML Models. -### Add Custom Properties +### Add & Remove Owners for Dataset -```java -{{ inline /metadata-integration/java/examples/src/main/java/io/datahubproject/examples/DatasetCustomPropertiesAdd.java show_path_as_comment }} +To add & remove specific owners for a dataset: + +```python +{{ inline /metadata-ingestion/examples/library/dataset_add_owner_patch.py show_path_as_comment }} ``` -### Add and Remove Custom Properties +### Add & Remove Tags for Dataset -```java -{{ inline /metadata-integration/java/examples/src/main/java/io/datahubproject/examples/DatasetCustomPropertiesAddRemove.java show_path_as_comment }} +To add & remove specific tags for a dataset: + +```python +{{ inline /metadata-ingestion/examples/library/dataset_add_tag_patch.py show_path_as_comment }} ``` -### Add Data Job Lineage +And for a specific schema field within the Dataset: -```java -{{ inline /metadata-integration/java/examples/src/main/java/io/datahubproject/examples/DataJobLineageAdd.java show_path_as_comment }} +```python +{{ inline /metadata-ingestion/examples/library/dataset_field_add_tag_patch.py show_path_as_comment }} ``` - - +### Add & Remove Glossary Terms for Dataset + +To add & remove specific glossary terms for a dataset: + +```python +{{ inline /metadata-ingestion/examples/library/dataset_add_glossary_term_patch.py show_path_as_comment }} +``` + +And for a specific schema field within the Dataset: + +```python +{{ inline /metadata-ingestion/examples/library/dataset_field_add_glossary_term_patch.py show_path_as_comment }} +``` + +### Add & Remove Structured Properties for Dataset -The Python Patch builders are entity-oriented and located in the [metadata-ingestion](https://github.com/datahub-project/datahub/tree/9588440549f3d99965085e97b214a7dabc181ed2/metadata-ingestion/src/datahub/specific) module and located in the `datahub.specific` module. +To add & remove structured properties for a dataset: -Here are a few illustrative examples using the Python Patch builders: +```python +{{ inline /metadata-ingestion/examples/library/dataset_add_structured_properties_patch.py show_path_as_comment }} +``` -### Add Properties to Dataset +### Add & Remove Upstream Lineage for Dataset + +To add & remove a lineage edge connecting a dataset to it's upstream or input at both the dataset and schema field level: ```python -{{ inline /metadata-ingestion/examples/library/dataset_add_properties.py show_path_as_comment }} +{{ inline /metadata-ingestion/examples/library/dataset_add_upstream_lineage_patch.py show_path_as_comment }} +``` + +### Add & Remove Read-Only Custom Properties for Dataset + +To add & remove specific custom properties for a dataset: + +```python +{{ inline /metadata-ingestion/examples/library/dataset_add_remove_custom_properties_patch.py show_path_as_comment }} +``` + + + + +The Java Patch builders are aspect-oriented and located in the [datahub-client](https://github.com/datahub-project/datahub/tree/master/metadata-integration/java/datahub-client/src/main/java/datahub/client/patch) module under the `datahub.client.patch` namespace. + +### Add & Remove Read-Only Custom Properties + +```java +{{ inline /metadata-integration/java/examples/src/main/java/io/datahubproject/examples/DatasetCustomPropertiesAddRemove.java show_path_as_comment }} +``` + +### Add Data Job Lineage + +```java +{{ inline /metadata-integration/java/examples/src/main/java/io/datahubproject/examples/DataJobLineageAdd.java show_path_as_comment }} ``` -## How Patch works +## Advanced: How Patch works To understand how patching works, it's important to understand a bit about our [models](../what/aspect.md). Entities are comprised of Aspects which can be reasoned about as JSON representations of the object models. To be able to patch these we utilize [JsonPatch](https://jsonpatch.com/). The components of a JSON Patch are the path, operation, and value. @@ -73,9 +124,6 @@ which can be reasoned about as JSON representations of the object models. To be The JSON path refers to a value within the schema. This can be a single field or can be an entire object reference depending on what the path is. For our patches we are primarily targeting single fields or even single array elements within a field. To be able to target array elements by id, we go through a translation process of the schema to transform arrays into maps. This allows a path to reference a particular array element by key rather than by index, for example a specific tag urn being added to a dataset. -This is important to note that for some fields in our schema that are arrays which do not necessarily restrict uniqueness, this puts a uniqueness constraint on the key. -The key for objects stored in arrays is determined manually by examining the schema and a long term goal is to make these keys annotation driven to reduce the amount of code needed to support -additional aspects to be patched. There is a generic patch endpoint, but it requires any array field keys to be specified at request time, putting a lot of burden on the API user. #### Examples @@ -87,8 +135,7 @@ Breakdown: * `/upstreams` -> References the upstreams field of the UpstreamLineage aspect, this is an array of Upstream objects where the key is the Urn * `/urn:...` -> The dataset to be targeted by the operation - -A patch path for targeting a fine grained lineage upstream: +A patch path for targeting a fine-grained lineage upstream: `/fineGrainedLineages/TRANSFORM/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD),foo)/urn:li:query:queryId/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created_upstream,PROD),bar)` @@ -118,7 +165,6 @@ using adds, but generally the most useful use case for patch is to add elements Remove operations require the path specified to be present, or an error will be thrown, otherwise they operate as one would expect. The specified path will be removed from the aspect. - ### Value Value is the actual information that will be stored at a path. If the path references an object then this will include the JSON key value pairs for that object. diff --git a/docs/api/tutorials/custom-properties.md b/docs/api/tutorials/custom-properties.md index fe0d7e62dcde8..86b1b2c0c54da 100644 --- a/docs/api/tutorials/custom-properties.md +++ b/docs/api/tutorials/custom-properties.md @@ -74,7 +74,7 @@ The following code adds custom properties `cluster_name` and `retention_time` to ```python -{{ inline /metadata-ingestion/examples/library/dataset_add_properties.py show_path_as_comment }} +{{ inline /metadata-ingestion/examples/library/dataset_add_custom_properties_patch.py show_path_as_comment }} ``` @@ -128,7 +128,7 @@ The following code shows you how can add and remove custom properties in the sam ```python -{{ inline /metadata-ingestion/examples/library/dataset_add_remove_properties.py show_path_as_comment }} +{{ inline /metadata-ingestion/examples/library/dataset_add_remove_custom_properties_patch.py show_path_as_comment }} ``` diff --git a/metadata-ingestion/examples/library/dataset_add_custom_properties_patch.py b/metadata-ingestion/examples/library/dataset_add_custom_properties_patch.py new file mode 100644 index 0000000000000..7231461fea322 --- /dev/null +++ b/metadata-ingestion/examples/library/dataset_add_custom_properties_patch.py @@ -0,0 +1,19 @@ +from datahub.emitter.mce_builder import make_dataset_urn +from datahub.ingestion.graph.client import DataHubGraph, DataHubGraphConfig +from datahub.specific.dataset import DatasetPatchBuilder + +# Create DataHub Client +datahub_client = DataHubGraph(DataHubGraphConfig(server="http://localhost:8080")) + +# Create Dataset URN +dataset_urn = make_dataset_urn(platform="hive", name="fct_users_created", env="PROD") + +# Create Dataset Patch to Add Custom Properties +patch_builder = DatasetPatchBuilder(dataset_urn) +patch_builder.add_custom_property("cluster_name", "datahubproject.acryl.io") +patch_builder.add_custom_property("retention_time", "2 years") +patch_mcps = patch_builder.build() + +# Emit Dataset Patch +for patch_mcp in patch_mcps: + datahub_client.emit(patch_mcp) diff --git a/metadata-ingestion/examples/library/dataset_add_glossary_term_patch.py b/metadata-ingestion/examples/library/dataset_add_glossary_term_patch.py new file mode 100644 index 0000000000000..d0b9a866fde61 --- /dev/null +++ b/metadata-ingestion/examples/library/dataset_add_glossary_term_patch.py @@ -0,0 +1,22 @@ +from datahub.emitter.mce_builder import make_dataset_urn, make_term_urn +from datahub.ingestion.graph.client import DataHubGraph, DataHubGraphConfig +from datahub.metadata.schema_classes import GlossaryTermAssociationClass +from datahub.specific.dataset import DatasetPatchBuilder + +# Create DataHub Client +datahub_client = DataHubGraph(DataHubGraphConfig(server="http://localhost:8080")) + +# Create Dataset URN +dataset_urn = make_dataset_urn( + platform="snowflake", name="fct_users_created", env="PROD" +) + +# Create Dataset Patch to Add + Remove Term for 'profile_id' column +patch_builder = DatasetPatchBuilder(dataset_urn) +patch_builder.add_term(GlossaryTermAssociationClass(make_term_urn("term-to-add-id"))) +patch_builder.remove_term(make_term_urn("term-to-remove-id")) +patch_mcps = patch_builder.build() + +# Emit Dataset Patch +for patch_mcp in patch_mcps: + datahub_client.emit(patch_mcp) diff --git a/metadata-ingestion/examples/library/dataset_add_owner_patch.py b/metadata-ingestion/examples/library/dataset_add_owner_patch.py new file mode 100644 index 0000000000000..8d3130c09c4bb --- /dev/null +++ b/metadata-ingestion/examples/library/dataset_add_owner_patch.py @@ -0,0 +1,24 @@ +from datahub.emitter.mce_builder import make_dataset_urn, make_group_urn, make_user_urn +from datahub.ingestion.graph.client import DataHubGraph, DataHubGraphConfig +from datahub.metadata.schema_classes import OwnerClass, OwnershipTypeClass +from datahub.specific.dataset import DatasetPatchBuilder + +# Create DataHub Client +datahub_client = DataHubGraph(DataHubGraphConfig(server="http://localhost:8080")) + +# Create Dataset URN +dataset_urn = make_dataset_urn( + platform="snowflake", name="fct_users_created", env="PROD" +) + +# Create Dataset Patch to Add + Remove Owners +patch_builder = DatasetPatchBuilder(dataset_urn) +patch_builder.add_owner( + OwnerClass(make_user_urn("user-to-add-id"), OwnershipTypeClass.TECHNICAL_OWNER) +) +patch_builder.remove_owner(make_group_urn("group-to-remove-id")) +patch_mcps = patch_builder.build() + +# Emit Dataset Patch +for patch_mcp in patch_mcps: + datahub_client.emit(patch_mcp) diff --git a/metadata-ingestion/examples/library/dataset_add_properties.py b/metadata-ingestion/examples/library/dataset_add_properties.py deleted file mode 100644 index b72aac5b82800..0000000000000 --- a/metadata-ingestion/examples/library/dataset_add_properties.py +++ /dev/null @@ -1,44 +0,0 @@ -import logging -from typing import Union - -from datahub.configuration.kafka import KafkaProducerConnectionConfig -from datahub.emitter.kafka_emitter import DatahubKafkaEmitter, KafkaEmitterConfig -from datahub.emitter.mce_builder import make_dataset_urn -from datahub.emitter.rest_emitter import DataHubRestEmitter -from datahub.specific.dataset import DatasetPatchBuilder - -log = logging.getLogger(__name__) -logging.basicConfig(level=logging.INFO) - - -# Get an emitter, either REST or Kafka, this example shows you both -def get_emitter() -> Union[DataHubRestEmitter, DatahubKafkaEmitter]: - USE_REST_EMITTER = True - if USE_REST_EMITTER: - gms_endpoint = "http://localhost:8080" - return DataHubRestEmitter(gms_server=gms_endpoint) - else: - kafka_server = "localhost:9092" - schema_registry_url = "http://localhost:8081" - return DatahubKafkaEmitter( - config=KafkaEmitterConfig( - connection=KafkaProducerConnectionConfig( - bootstrap=kafka_server, schema_registry_url=schema_registry_url - ) - ) - ) - - -dataset_urn = make_dataset_urn(platform="hive", name="fct_users_created", env="PROD") - -with get_emitter() as emitter: - for patch_mcp in ( - DatasetPatchBuilder(dataset_urn) - .add_custom_property("cluster_name", "datahubproject.acryl.io") - .add_custom_property("retention_time", "2 years") - .build() - ): - emitter.emit(patch_mcp) - - -log.info(f"Added cluster_name, retention_time properties to dataset {dataset_urn}") diff --git a/metadata-ingestion/examples/library/dataset_add_remove_custom_properties_patch.py b/metadata-ingestion/examples/library/dataset_add_remove_custom_properties_patch.py new file mode 100644 index 0000000000000..c1db9c91d13ec --- /dev/null +++ b/metadata-ingestion/examples/library/dataset_add_remove_custom_properties_patch.py @@ -0,0 +1,19 @@ +from datahub.emitter.mce_builder import make_dataset_urn +from datahub.ingestion.graph.client import DataHubGraph, DataHubGraphConfig +from datahub.specific.dataset import DatasetPatchBuilder + +# Create DataHub Client +datahub_client = DataHubGraph(DataHubGraphConfig(server="http://localhost:8080")) + +# Create Dataset URN +dataset_urn = make_dataset_urn(platform="hive", name="fct_users_created", env="PROD") + +# Create Dataset Patch to Add + Remove Custom Properties +patch_builder = DatasetPatchBuilder(dataset_urn) +patch_builder.add_custom_property("cluster_name", "datahubproject.acryl.io") +patch_builder.remove_custom_property("retention_time") +patch_mcps = patch_builder.build() + +# Emit Dataset Patch +for patch_mcp in patch_mcps: + datahub_client.emit(patch_mcp) diff --git a/metadata-ingestion/examples/library/dataset_add_remove_properties.py b/metadata-ingestion/examples/library/dataset_add_remove_properties.py deleted file mode 100644 index 7109c0264f971..0000000000000 --- a/metadata-ingestion/examples/library/dataset_add_remove_properties.py +++ /dev/null @@ -1,46 +0,0 @@ -import logging -from typing import Union - -from datahub.configuration.kafka import KafkaProducerConnectionConfig -from datahub.emitter.kafka_emitter import DatahubKafkaEmitter, KafkaEmitterConfig -from datahub.emitter.mce_builder import make_dataset_urn -from datahub.emitter.rest_emitter import DataHubRestEmitter -from datahub.specific.dataset import DatasetPatchBuilder - -log = logging.getLogger(__name__) -logging.basicConfig(level=logging.INFO) - - -# Get an emitter, either REST or Kafka, this example shows you both -def get_emitter() -> Union[DataHubRestEmitter, DatahubKafkaEmitter]: - USE_REST_EMITTER = True - if USE_REST_EMITTER: - gms_endpoint = "http://localhost:8080" - return DataHubRestEmitter(gms_server=gms_endpoint) - else: - kafka_server = "localhost:9092" - schema_registry_url = "http://localhost:8081" - return DatahubKafkaEmitter( - config=KafkaEmitterConfig( - connection=KafkaProducerConnectionConfig( - bootstrap=kafka_server, schema_registry_url=schema_registry_url - ) - ) - ) - - -dataset_urn = make_dataset_urn(platform="hive", name="fct_users_created", env="PROD") - -with get_emitter() as emitter: - for patch_mcp in ( - DatasetPatchBuilder(dataset_urn) - .add_custom_property("cluster_name", "datahubproject.acryl.io") - .remove_custom_property("retention_time") - .build() - ): - emitter.emit(patch_mcp) - - -log.info( - f"Added cluster_name property, removed retention_time property from dataset {dataset_urn}" -) diff --git a/metadata-ingestion/examples/library/dataset_add_structured_properties.py b/metadata-ingestion/examples/library/dataset_add_structured_properties.py deleted file mode 100644 index fc2c379340592..0000000000000 --- a/metadata-ingestion/examples/library/dataset_add_structured_properties.py +++ /dev/null @@ -1,24 +0,0 @@ -import logging - -from datahub.emitter.mce_builder import make_dataset_urn -from datahub.emitter.rest_emitter import DataHubRestEmitter -from datahub.specific.dataset import DatasetPatchBuilder - -log = logging.getLogger(__name__) -logging.basicConfig(level=logging.INFO) - -# Create rest emitter -rest_emitter = DataHubRestEmitter(gms_server="http://localhost:8080") - -dataset_urn = make_dataset_urn(platform="hive", name="fct_users_created", env="PROD") - - -for patch_mcp in ( - DatasetPatchBuilder(dataset_urn) - .add_structured_property("io.acryl.dataManagement.replicationSLA", 12) - .build() -): - rest_emitter.emit(patch_mcp) - - -log.info(f"Added cluster_name, retention_time properties to dataset {dataset_urn}") diff --git a/metadata-ingestion/examples/library/dataset_add_structured_properties_patch.py b/metadata-ingestion/examples/library/dataset_add_structured_properties_patch.py new file mode 100644 index 0000000000000..ef72ed58a4b82 --- /dev/null +++ b/metadata-ingestion/examples/library/dataset_add_structured_properties_patch.py @@ -0,0 +1,23 @@ +from datahub.emitter.mce_builder import make_dataset_urn +from datahub.ingestion.graph.client import DataHubGraph, DataHubGraphConfig +from datahub.specific.dataset import DatasetPatchBuilder + +# Create DataHub Client +datahub_client = DataHubGraph(DataHubGraphConfig(server="http://localhost:8080")) + +# Create Dataset URN +dataset_urn = make_dataset_urn(platform="hive", name="fct_users_created", env="PROD") + +# Create Dataset Patch to Add and Remove Structured Properties +patch_builder = DatasetPatchBuilder(dataset_urn) +patch_builder.add_structured_property( + "urn:li:structuredProperty:retentionTimeInDays", 12 +) +patch_builder.remove_structured_property( + "urn:li:structuredProperty:customClassification" +) +patch_mcps = patch_builder.build() + +# Emit Dataset Patch +for patch_mcp in patch_mcps: + datahub_client.emit(patch_mcp) diff --git a/metadata-ingestion/examples/library/dataset_add_tag_patch.py b/metadata-ingestion/examples/library/dataset_add_tag_patch.py new file mode 100644 index 0000000000000..0bc644d6865f6 --- /dev/null +++ b/metadata-ingestion/examples/library/dataset_add_tag_patch.py @@ -0,0 +1,22 @@ +from datahub.emitter.mce_builder import make_dataset_urn, make_tag_urn +from datahub.ingestion.graph.client import DataHubGraph, DataHubGraphConfig +from datahub.metadata.schema_classes import TagAssociationClass +from datahub.specific.dataset import DatasetPatchBuilder + +# Create DataHub Client +datahub_client = DataHubGraph(DataHubGraphConfig(server="http://localhost:8080")) + +# Create Dataset URN +dataset_urn = make_dataset_urn( + platform="snowflake", name="fct_users_created", env="PROD" +) + +# Create Dataset Patch +patch_builder = DatasetPatchBuilder(dataset_urn) +patch_builder.add_tag(TagAssociationClass(make_tag_urn("tag-to-add-id"))) +patch_builder.remove_tag("urn:li:tag:tag-to-remove-id") +patch_mcps = patch_builder.build() + +# Emit Dataset Patch +for patch_mcp in patch_mcps: + datahub_client.emit(patch_mcp) diff --git a/metadata-ingestion/examples/library/dataset_add_upstream_lineage_patch.py b/metadata-ingestion/examples/library/dataset_add_upstream_lineage_patch.py new file mode 100644 index 0000000000000..0b4e5e39bf627 --- /dev/null +++ b/metadata-ingestion/examples/library/dataset_add_upstream_lineage_patch.py @@ -0,0 +1,62 @@ +from datahub.emitter.mce_builder import make_dataset_urn, make_schema_field_urn +from datahub.ingestion.graph.client import DataHubGraph, DataHubGraphConfig +from datahub.metadata.schema_classes import ( + DatasetLineageTypeClass, + FineGrainedLineageClass, + FineGrainedLineageUpstreamTypeClass, + UpstreamClass, +) +from datahub.specific.dataset import DatasetPatchBuilder + +# Create DataHub Client +datahub_client = DataHubGraph(DataHubGraphConfig(server="http://localhost:8080")) + +# Create Dataset URN +dataset_urn = make_dataset_urn( + platform="snowflake", name="fct_users_created", env="PROD" +) +upstream_to_remove_urn = make_dataset_urn( + platform="s3", name="fct_users_old", env="PROD" +) +upstream_to_add_urn = make_dataset_urn(platform="s3", name="fct_users_new", env="PROD") + +# Create Dataset Patch to Add & Remove Upstream Lineage Edges +patch_builder = DatasetPatchBuilder(dataset_urn) +patch_builder.remove_upstream_lineage(upstream_to_remove_urn) +patch_builder.add_upstream_lineage( + UpstreamClass(upstream_to_add_urn, DatasetLineageTypeClass.TRANSFORMED) +) + +# ...And also include schema field lineage +upstream_field_to_add_urn = make_schema_field_urn(upstream_to_add_urn, "profile_id") +downstream_field_to_add_urn = make_schema_field_urn(dataset_urn, "profile_id") + +patch_builder.add_fine_grained_upstream_lineage( + FineGrainedLineageClass( + FineGrainedLineageUpstreamTypeClass.FIELD_SET, + FineGrainedLineageUpstreamTypeClass.FIELD_SET, + [upstream_field_to_add_urn], + [downstream_field_to_add_urn], + ) +) + +upstream_field_to_remove_urn = make_schema_field_urn( + upstream_to_remove_urn, "profile_id" +) +downstream_field_to_remove_urn = make_schema_field_urn(dataset_urn, "profile_id") + +patch_builder.remove_fine_grained_upstream_lineage( + FineGrainedLineageClass( + FineGrainedLineageUpstreamTypeClass.FIELD_SET, + FineGrainedLineageUpstreamTypeClass.FIELD_SET, + [upstream_field_to_remove_urn], + [downstream_field_to_remove_urn], + ) +) + +patch_mcps = patch_builder.build() + + +# Emit Dataset Patch +for patch_mcp in patch_mcps: + datahub_client.emit(patch_mcp) diff --git a/metadata-ingestion/examples/library/dataset_field_add_glossary_term_patch.py b/metadata-ingestion/examples/library/dataset_field_add_glossary_term_patch.py new file mode 100644 index 0000000000000..3f8da2c143c92 --- /dev/null +++ b/metadata-ingestion/examples/library/dataset_field_add_glossary_term_patch.py @@ -0,0 +1,26 @@ +from datahub.emitter.mce_builder import make_dataset_urn, make_term_urn +from datahub.ingestion.graph.client import DataHubGraph, DataHubGraphConfig +from datahub.metadata.schema_classes import GlossaryTermAssociationClass +from datahub.specific.dataset import DatasetPatchBuilder + +# Create DataHub Client +datahub_client = DataHubGraph(DataHubGraphConfig(server="http://localhost:8080")) + +# Create Dataset URN +dataset_urn = make_dataset_urn( + platform="snowflake", name="fct_users_created", env="PROD" +) + +# Create Dataset Patch to Add + Remove Term for 'profile_id' column +patch_builder = DatasetPatchBuilder(dataset_urn) +patch_builder.for_field("profile_id").add_term( + GlossaryTermAssociationClass(make_term_urn("term-to-add-id")) +) +patch_builder.for_field("profile_id").remove_term( + "urn:li:glossaryTerm:term-to-remove-id" +) +patch_mcps = patch_builder.build() + +# Emit Dataset Patch +for patch_mcp in patch_mcps: + datahub_client.emit(patch_mcp) diff --git a/metadata-ingestion/examples/library/dataset_field_add_tag_patch.py b/metadata-ingestion/examples/library/dataset_field_add_tag_patch.py new file mode 100644 index 0000000000000..3075cac5320ae --- /dev/null +++ b/metadata-ingestion/examples/library/dataset_field_add_tag_patch.py @@ -0,0 +1,24 @@ +from datahub.emitter.mce_builder import make_dataset_urn, make_tag_urn +from datahub.ingestion.graph.client import DataHubGraph, DataHubGraphConfig +from datahub.metadata.schema_classes import TagAssociationClass +from datahub.specific.dataset import DatasetPatchBuilder + +# Create DataHub Client +datahub_client = DataHubGraph(DataHubGraphConfig(server="http://localhost:8080")) + +# Create Dataset URN +dataset_urn = make_dataset_urn( + platform="snowflake", name="fct_users_created", env="PROD" +) + +# Create Dataset Patch to Add + Remove Tag for 'profile_id' column +patch_builder = DatasetPatchBuilder(dataset_urn) +patch_builder.for_field("profile_id").add_tag( + TagAssociationClass(make_tag_urn("tag-to-add-id")) +) +patch_builder.for_field("profile_id").remove_tag("urn:li:tag:tag-to-remove-id") +patch_mcps = patch_builder.build() + +# Emit Dataset Patch +for patch_mcp in patch_mcps: + datahub_client.emit(patch_mcp) From b7bb5ca7ee3e0e80c5f8ca1843e67671f779f27d Mon Sep 17 00:00:00 2001 From: Andrew Sikowitz Date: Thu, 19 Dec 2024 10:20:06 -0800 Subject: [PATCH 05/11] feat(graphql/ml): Add custom properties to ml entities (#12152) --- .../types/mappers/EmbeddedModelMapper.java | 12 +++++++++++ .../mlmodel/mappers/MLFeatureMapper.java | 12 +++++++---- .../mappers/MLFeaturePropertiesMapper.java | 20 +++++++++++++------ .../mlmodel/mappers/MLFeatureTableMapper.java | 10 +++++----- .../MLFeatureTablePropertiesMapper.java | 18 ++++++++++------- .../mlmodel/mappers/MLModelGroupMapper.java | 11 ++++++---- .../mappers/MLModelGroupPropertiesMapper.java | 19 ++++++++++++------ .../mappers/MLModelPropertiesMapper.java | 12 ++++++----- .../mlmodel/mappers/MLPrimaryKeyMapper.java | 15 ++++++++------ .../mappers/MLPrimaryKeyPropertiesMapper.java | 19 ++++++++++++------ .../src/main/resources/entity.graphql | 12 ++++++++--- 11 files changed, 108 insertions(+), 52 deletions(-) create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mappers/EmbeddedModelMapper.java diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mappers/EmbeddedModelMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mappers/EmbeddedModelMapper.java new file mode 100644 index 0000000000000..62e7c90ab9b0e --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mappers/EmbeddedModelMapper.java @@ -0,0 +1,12 @@ +package com.linkedin.datahub.graphql.types.mappers; + +import com.linkedin.common.urn.Urn; +import com.linkedin.datahub.graphql.QueryContext; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +/** Made for models that are embedded in other models and thus do not encode their own URN. */ +public interface EmbeddedModelMapper { + O apply( + @Nullable final QueryContext context, @Nonnull final I input, @Nonnull final Urn entityUrn); +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLFeatureMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLFeatureMapper.java index d5eb1a15624dc..74076fd2f4ee9 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLFeatureMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLFeatureMapper.java @@ -75,7 +75,8 @@ public MLFeature apply( mlFeature.setOwnership( OwnershipMapper.map(context, new Ownership(dataMap), entityUrn))); mappingHelper.mapToResult( - context, ML_FEATURE_PROPERTIES_ASPECT_NAME, MLFeatureMapper::mapMLFeatureProperties); + ML_FEATURE_PROPERTIES_ASPECT_NAME, + (entity, dataMap) -> mapMLFeatureProperties(context, entity, dataMap, entityUrn)); mappingHelper.mapToResult( INSTITUTIONAL_MEMORY_ASPECT_NAME, (mlFeature, dataMap) -> @@ -138,10 +139,13 @@ private static void mapMLFeatureKey(@Nonnull MLFeature mlFeature, @Nonnull DataM private static void mapMLFeatureProperties( @Nullable final QueryContext context, @Nonnull MLFeature mlFeature, - @Nonnull DataMap dataMap) { + @Nonnull DataMap dataMap, + @Nonnull Urn entityUrn) { MLFeatureProperties featureProperties = new MLFeatureProperties(dataMap); - mlFeature.setFeatureProperties(MLFeaturePropertiesMapper.map(context, featureProperties)); - mlFeature.setProperties(MLFeaturePropertiesMapper.map(context, featureProperties)); + com.linkedin.datahub.graphql.generated.MLFeatureProperties graphqlProperties = + MLFeaturePropertiesMapper.map(context, featureProperties, entityUrn); + mlFeature.setFeatureProperties(graphqlProperties); + mlFeature.setProperties(graphqlProperties); mlFeature.setDescription(featureProperties.getDescription()); if (featureProperties.getDataType() != null) { mlFeature.setDataType(MLFeatureDataType.valueOf(featureProperties.getDataType().toString())); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLFeaturePropertiesMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLFeaturePropertiesMapper.java index 92d090275867d..08ac3a1b5f138 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLFeaturePropertiesMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLFeaturePropertiesMapper.java @@ -1,29 +1,34 @@ package com.linkedin.datahub.graphql.types.mlmodel.mappers; +import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.generated.Dataset; import com.linkedin.datahub.graphql.generated.MLFeatureDataType; import com.linkedin.datahub.graphql.generated.MLFeatureProperties; -import com.linkedin.datahub.graphql.types.mappers.ModelMapper; +import com.linkedin.datahub.graphql.types.common.mappers.CustomPropertiesMapper; +import com.linkedin.datahub.graphql.types.mappers.EmbeddedModelMapper; import java.util.stream.Collectors; +import javax.annotation.Nonnull; import javax.annotation.Nullable; -import lombok.NonNull; public class MLFeaturePropertiesMapper - implements ModelMapper { + implements EmbeddedModelMapper< + com.linkedin.ml.metadata.MLFeatureProperties, MLFeatureProperties> { public static final MLFeaturePropertiesMapper INSTANCE = new MLFeaturePropertiesMapper(); public static MLFeatureProperties map( @Nullable QueryContext context, - @NonNull final com.linkedin.ml.metadata.MLFeatureProperties mlFeatureProperties) { - return INSTANCE.apply(context, mlFeatureProperties); + @Nonnull final com.linkedin.ml.metadata.MLFeatureProperties mlFeatureProperties, + @Nonnull Urn entityUrn) { + return INSTANCE.apply(context, mlFeatureProperties, entityUrn); } @Override public MLFeatureProperties apply( @Nullable QueryContext context, - @NonNull final com.linkedin.ml.metadata.MLFeatureProperties mlFeatureProperties) { + @Nonnull final com.linkedin.ml.metadata.MLFeatureProperties mlFeatureProperties, + @Nonnull Urn entityUrn) { final MLFeatureProperties result = new MLFeatureProperties(); result.setDescription(mlFeatureProperties.getDescription()); @@ -45,6 +50,9 @@ public MLFeatureProperties apply( .collect(Collectors.toList())); } + result.setCustomProperties( + CustomPropertiesMapper.map(mlFeatureProperties.getCustomProperties(), entityUrn)); + return result; } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLFeatureTableMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLFeatureTableMapper.java index 51d3004d97a61..65bc8e84f7bbb 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLFeatureTableMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLFeatureTableMapper.java @@ -76,7 +76,7 @@ public MLFeatureTable apply( mappingHelper.mapToResult(ML_FEATURE_TABLE_KEY_ASPECT_NAME, this::mapMLFeatureTableKey); mappingHelper.mapToResult( ML_FEATURE_TABLE_PROPERTIES_ASPECT_NAME, - (entity, dataMap) -> this.mapMLFeatureTableProperties(context, entity, dataMap, entityUrn)); + (entity, dataMap) -> mapMLFeatureTableProperties(context, entity, dataMap, entityUrn)); mappingHelper.mapToResult( INSTITUTIONAL_MEMORY_ASPECT_NAME, (mlFeatureTable, dataMap) -> @@ -146,10 +146,10 @@ private static void mapMLFeatureTableProperties( @Nonnull DataMap dataMap, Urn entityUrn) { MLFeatureTableProperties featureTableProperties = new MLFeatureTableProperties(dataMap); - mlFeatureTable.setFeatureTableProperties( - MLFeatureTablePropertiesMapper.map(context, featureTableProperties, entityUrn)); - mlFeatureTable.setProperties( - MLFeatureTablePropertiesMapper.map(context, featureTableProperties, entityUrn)); + com.linkedin.datahub.graphql.generated.MLFeatureTableProperties graphqlProperties = + MLFeatureTablePropertiesMapper.map(context, featureTableProperties, entityUrn); + mlFeatureTable.setFeatureTableProperties(graphqlProperties); + mlFeatureTable.setProperties(graphqlProperties); mlFeatureTable.setDescription(featureTableProperties.getDescription()); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLFeatureTablePropertiesMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLFeatureTablePropertiesMapper.java index d9fed13ed0d0b..3c054cb6a9a5b 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLFeatureTablePropertiesMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLFeatureTablePropertiesMapper.java @@ -8,26 +8,30 @@ import com.linkedin.datahub.graphql.generated.MLFeatureTableProperties; import com.linkedin.datahub.graphql.generated.MLPrimaryKey; import com.linkedin.datahub.graphql.types.common.mappers.CustomPropertiesMapper; +import com.linkedin.datahub.graphql.types.mappers.EmbeddedModelMapper; import java.util.stream.Collectors; +import javax.annotation.Nonnull; import javax.annotation.Nullable; -import lombok.NonNull; -public class MLFeatureTablePropertiesMapper { +public class MLFeatureTablePropertiesMapper + implements EmbeddedModelMapper< + com.linkedin.ml.metadata.MLFeatureTableProperties, MLFeatureTableProperties> { public static final MLFeatureTablePropertiesMapper INSTANCE = new MLFeatureTablePropertiesMapper(); public static MLFeatureTableProperties map( @Nullable final QueryContext context, - @NonNull final com.linkedin.ml.metadata.MLFeatureTableProperties mlFeatureTableProperties, - Urn entityUrn) { + @Nonnull final com.linkedin.ml.metadata.MLFeatureTableProperties mlFeatureTableProperties, + @Nonnull Urn entityUrn) { return INSTANCE.apply(context, mlFeatureTableProperties, entityUrn); } - public static MLFeatureTableProperties apply( + @Override + public MLFeatureTableProperties apply( @Nullable final QueryContext context, - @NonNull final com.linkedin.ml.metadata.MLFeatureTableProperties mlFeatureTableProperties, - Urn entityUrn) { + @Nonnull final com.linkedin.ml.metadata.MLFeatureTableProperties mlFeatureTableProperties, + @Nonnull Urn entityUrn) { final MLFeatureTableProperties result = new MLFeatureTableProperties(); result.setDescription(mlFeatureTableProperties.getDescription()); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelGroupMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelGroupMapper.java index 6e3da1c153392..9009972a47616 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelGroupMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelGroupMapper.java @@ -75,9 +75,8 @@ public MLModelGroup apply( mappingHelper.mapToResult( ML_MODEL_GROUP_KEY_ASPECT_NAME, MLModelGroupMapper::mapToMLModelGroupKey); mappingHelper.mapToResult( - context, ML_MODEL_GROUP_PROPERTIES_ASPECT_NAME, - MLModelGroupMapper::mapToMLModelGroupProperties); + (entity, dataMap) -> mapToMLModelGroupProperties(context, entity, dataMap, entityUrn)); mappingHelper.mapToResult( STATUS_ASPECT_NAME, (mlModelGroup, dataMap) -> @@ -136,9 +135,13 @@ private static void mapToMLModelGroupKey(MLModelGroup mlModelGroup, DataMap data } private static void mapToMLModelGroupProperties( - @Nullable final QueryContext context, MLModelGroup mlModelGroup, DataMap dataMap) { + @Nullable final QueryContext context, + MLModelGroup mlModelGroup, + DataMap dataMap, + @Nonnull Urn entityUrn) { MLModelGroupProperties modelGroupProperties = new MLModelGroupProperties(dataMap); - mlModelGroup.setProperties(MLModelGroupPropertiesMapper.map(context, modelGroupProperties)); + mlModelGroup.setProperties( + MLModelGroupPropertiesMapper.map(context, modelGroupProperties, entityUrn)); if (modelGroupProperties.getDescription() != null) { mlModelGroup.setDescription(modelGroupProperties.getDescription()); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelGroupPropertiesMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelGroupPropertiesMapper.java index 9f1918f9ec489..a6cfded9865d9 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelGroupPropertiesMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelGroupPropertiesMapper.java @@ -1,27 +1,31 @@ package com.linkedin.datahub.graphql.types.mlmodel.mappers; +import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.generated.MLModelGroupProperties; -import com.linkedin.datahub.graphql.types.mappers.ModelMapper; +import com.linkedin.datahub.graphql.types.common.mappers.CustomPropertiesMapper; +import com.linkedin.datahub.graphql.types.mappers.EmbeddedModelMapper; +import javax.annotation.Nonnull; import javax.annotation.Nullable; -import lombok.NonNull; public class MLModelGroupPropertiesMapper - implements ModelMapper< + implements EmbeddedModelMapper< com.linkedin.ml.metadata.MLModelGroupProperties, MLModelGroupProperties> { public static final MLModelGroupPropertiesMapper INSTANCE = new MLModelGroupPropertiesMapper(); public static MLModelGroupProperties map( @Nullable QueryContext context, - @NonNull final com.linkedin.ml.metadata.MLModelGroupProperties mlModelGroupProperties) { - return INSTANCE.apply(context, mlModelGroupProperties); + @Nonnull final com.linkedin.ml.metadata.MLModelGroupProperties mlModelGroupProperties, + @Nonnull Urn entityUrn) { + return INSTANCE.apply(context, mlModelGroupProperties, entityUrn); } @Override public MLModelGroupProperties apply( @Nullable QueryContext context, - @NonNull final com.linkedin.ml.metadata.MLModelGroupProperties mlModelGroupProperties) { + @Nonnull final com.linkedin.ml.metadata.MLModelGroupProperties mlModelGroupProperties, + @Nonnull Urn entityUrn) { final MLModelGroupProperties result = new MLModelGroupProperties(); result.setDescription(mlModelGroupProperties.getDescription()); @@ -30,6 +34,9 @@ public MLModelGroupProperties apply( } result.setCreatedAt(mlModelGroupProperties.getCreatedAt()); + result.setCustomProperties( + CustomPropertiesMapper.map(mlModelGroupProperties.getCustomProperties(), entityUrn)); + return result; } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelPropertiesMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelPropertiesMapper.java index a89904b3ab915..265005c2caa9e 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelPropertiesMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelPropertiesMapper.java @@ -7,25 +7,27 @@ import com.linkedin.datahub.graphql.generated.MLModelGroup; import com.linkedin.datahub.graphql.generated.MLModelProperties; import com.linkedin.datahub.graphql.types.common.mappers.CustomPropertiesMapper; +import com.linkedin.datahub.graphql.types.mappers.EmbeddedModelMapper; import java.util.stream.Collectors; +import javax.annotation.Nonnull; import javax.annotation.Nullable; -import lombok.NonNull; -public class MLModelPropertiesMapper { +public class MLModelPropertiesMapper + implements EmbeddedModelMapper { public static final MLModelPropertiesMapper INSTANCE = new MLModelPropertiesMapper(); public static MLModelProperties map( @Nullable final QueryContext context, - @NonNull final com.linkedin.ml.metadata.MLModelProperties mlModelProperties, + @Nonnull final com.linkedin.ml.metadata.MLModelProperties mlModelProperties, Urn entityUrn) { return INSTANCE.apply(context, mlModelProperties, entityUrn); } public MLModelProperties apply( @Nullable final QueryContext context, - @NonNull final com.linkedin.ml.metadata.MLModelProperties mlModelProperties, - Urn entityUrn) { + @Nonnull final com.linkedin.ml.metadata.MLModelProperties mlModelProperties, + @Nonnull Urn entityUrn) { final MLModelProperties result = new MLModelProperties(); result.setDate(mlModelProperties.getDate()); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLPrimaryKeyMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLPrimaryKeyMapper.java index c446c892cb223..d48d93ede9c1a 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLPrimaryKeyMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLPrimaryKeyMapper.java @@ -74,9 +74,8 @@ public MLPrimaryKey apply( mappingHelper.mapToResult( ML_PRIMARY_KEY_KEY_ASPECT_NAME, MLPrimaryKeyMapper::mapMLPrimaryKeyKey); mappingHelper.mapToResult( - context, ML_PRIMARY_KEY_PROPERTIES_ASPECT_NAME, - MLPrimaryKeyMapper::mapMLPrimaryKeyProperties); + (entity, dataMap) -> mapMLPrimaryKeyProperties(context, entity, dataMap, entityUrn)); mappingHelper.mapToResult( INSTITUTIONAL_MEMORY_ASPECT_NAME, (mlPrimaryKey, dataMap) -> @@ -132,11 +131,15 @@ private static void mapMLPrimaryKeyKey(MLPrimaryKey mlPrimaryKey, DataMap dataMa } private static void mapMLPrimaryKeyProperties( - @Nullable final QueryContext context, MLPrimaryKey mlPrimaryKey, DataMap dataMap) { + @Nullable final QueryContext context, + MLPrimaryKey mlPrimaryKey, + DataMap dataMap, + @Nonnull Urn entityUrn) { MLPrimaryKeyProperties primaryKeyProperties = new MLPrimaryKeyProperties(dataMap); - mlPrimaryKey.setPrimaryKeyProperties( - MLPrimaryKeyPropertiesMapper.map(context, primaryKeyProperties)); - mlPrimaryKey.setProperties(MLPrimaryKeyPropertiesMapper.map(context, primaryKeyProperties)); + com.linkedin.datahub.graphql.generated.MLPrimaryKeyProperties graphqlProperties = + MLPrimaryKeyPropertiesMapper.map(context, primaryKeyProperties, entityUrn); + mlPrimaryKey.setPrimaryKeyProperties(graphqlProperties); + mlPrimaryKey.setProperties(graphqlProperties); mlPrimaryKey.setDescription(primaryKeyProperties.getDescription()); if (primaryKeyProperties.getDataType() != null) { mlPrimaryKey.setDataType( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLPrimaryKeyPropertiesMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLPrimaryKeyPropertiesMapper.java index 09e41fe7ee4e8..0bbe8f53f3271 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLPrimaryKeyPropertiesMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLPrimaryKeyPropertiesMapper.java @@ -1,30 +1,34 @@ package com.linkedin.datahub.graphql.types.mlmodel.mappers; +import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.generated.Dataset; import com.linkedin.datahub.graphql.generated.MLFeatureDataType; import com.linkedin.datahub.graphql.generated.MLPrimaryKeyProperties; -import com.linkedin.datahub.graphql.types.mappers.ModelMapper; +import com.linkedin.datahub.graphql.types.common.mappers.CustomPropertiesMapper; +import com.linkedin.datahub.graphql.types.mappers.EmbeddedModelMapper; import java.util.stream.Collectors; +import javax.annotation.Nonnull; import javax.annotation.Nullable; -import lombok.NonNull; public class MLPrimaryKeyPropertiesMapper - implements ModelMapper< + implements EmbeddedModelMapper< com.linkedin.ml.metadata.MLPrimaryKeyProperties, MLPrimaryKeyProperties> { public static final MLPrimaryKeyPropertiesMapper INSTANCE = new MLPrimaryKeyPropertiesMapper(); public static MLPrimaryKeyProperties map( @Nullable QueryContext context, - @NonNull final com.linkedin.ml.metadata.MLPrimaryKeyProperties mlPrimaryKeyProperties) { - return INSTANCE.apply(context, mlPrimaryKeyProperties); + @Nonnull final com.linkedin.ml.metadata.MLPrimaryKeyProperties mlPrimaryKeyProperties, + @Nonnull Urn entityUrn) { + return INSTANCE.apply(context, mlPrimaryKeyProperties, entityUrn); } @Override public MLPrimaryKeyProperties apply( @Nullable QueryContext context, - @NonNull final com.linkedin.ml.metadata.MLPrimaryKeyProperties mlPrimaryKeyProperties) { + @Nonnull final com.linkedin.ml.metadata.MLPrimaryKeyProperties mlPrimaryKeyProperties, + @Nonnull Urn entityUrn) { final MLPrimaryKeyProperties result = new MLPrimaryKeyProperties(); result.setDescription(mlPrimaryKeyProperties.getDescription()); @@ -45,6 +49,9 @@ public MLPrimaryKeyProperties apply( }) .collect(Collectors.toList())); + result.setCustomProperties( + CustomPropertiesMapper.map(mlPrimaryKeyProperties.getCustomProperties(), entityUrn)); + return result; } } diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql index 049527e5d77e3..926cd256a5c5a 100644 --- a/datahub-graphql-core/src/main/resources/entity.graphql +++ b/datahub-graphql-core/src/main/resources/entity.graphql @@ -9829,11 +9829,13 @@ type MLModelGroup implements EntityWithRelationships & Entity & BrowsableEntity type MLModelGroupProperties { -description: String + description: String createdAt: Long version: VersionTag + + customProperties: [CustomPropertiesEntry!] } """ @@ -10028,6 +10030,8 @@ type MLFeatureProperties { version: VersionTag sources: [Dataset] + + customProperties: [CustomPropertiesEntry!] } """ @@ -10164,13 +10168,15 @@ type MLPrimaryKey implements EntityWithRelationships & Entity { type MLPrimaryKeyProperties { -description: String + description: String dataType: MLFeatureDataType version: VersionTag sources: [Dataset] + + customProperties: [CustomPropertiesEntry!] } """ @@ -10347,7 +10353,7 @@ type MLModelGroupEditableProperties { type MLFeatureTableProperties { -description: String + description: String mlFeatures: [MLFeature] From 9762c46702dc4492d09a5810544dfa7922266fb1 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Thu, 19 Dec 2024 12:41:44 -0600 Subject: [PATCH 06/11] chore(bump): ingestion-base & actions (#12171) --- docker/datahub-ingestion-base/build.gradle | 2 +- docker/datahub-ingestion/build.gradle | 2 +- docker/profiles/docker-compose.actions.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docker/datahub-ingestion-base/build.gradle b/docker/datahub-ingestion-base/build.gradle index ef482de9256a3..f19faa227ca61 100644 --- a/docker/datahub-ingestion-base/build.gradle +++ b/docker/datahub-ingestion-base/build.gradle @@ -12,7 +12,7 @@ ext { docker_target = project.getProperties().getOrDefault("dockerTarget", "slim") docker_version = "${version}${docker_target == 'slim' ? '-slim' : ''}" - revision = 7 // increment to trigger rebuild + revision = 8 // increment to trigger rebuild } docker { diff --git a/docker/datahub-ingestion/build.gradle b/docker/datahub-ingestion/build.gradle index 113a6dcf0a1bd..b236a53c288f7 100644 --- a/docker/datahub-ingestion/build.gradle +++ b/docker/datahub-ingestion/build.gradle @@ -12,7 +12,7 @@ ext { docker_target = project.getProperties().getOrDefault("dockerTarget", "slim") docker_version = "${version}${docker_target == 'slim' ? '-slim' : ''}" - revision = 8 // increment to trigger rebuild + revision = 9 // increment to trigger rebuild } dependencies { diff --git a/docker/profiles/docker-compose.actions.yml b/docker/profiles/docker-compose.actions.yml index c2985f4299326..459fffdd8acf3 100644 --- a/docker/profiles/docker-compose.actions.yml +++ b/docker/profiles/docker-compose.actions.yml @@ -6,7 +6,7 @@ x-search-datastore-elasticsearch-env: &search-datastore-env x-datahub-actions-service: &datahub-actions-service hostname: actions - image: ${DATAHUB_ACTIONS_IMAGE:-${DATAHUB_ACTIONS_REPO:-acryldata}/datahub-actions}:${ACTIONS_VERSION:-v0.1.1} + image: ${DATAHUB_ACTIONS_IMAGE:-${DATAHUB_ACTIONS_REPO:-acryldata}/datahub-actions}:${ACTIONS_VERSION:-v0.1.6} env_file: - datahub-actions/env/docker.env - ${DATAHUB_LOCAL_COMMON_ENV:-empty.env} From 45ace13fe26a9ae20ed9fcdd7df04bb7c197d52a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20G=C3=B3mez=20Villamor?= Date: Thu, 19 Dec 2024 20:20:42 +0100 Subject: [PATCH 07/11] feat(mssql): platform instance aspect for dataflow and datajob entities (#12180) --- .../ingestion/source/sql/mssql/job_models.py | 31 +- .../ingestion/source/sql/mssql/source.py | 14 + .../golden_mces_mssql_to_file.json | 756 ++++++++++++------ .../sql_server/source_files/mssql_to_file.yml | 1 + 4 files changed, 574 insertions(+), 228 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/job_models.py b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/job_models.py index 5107a4e38f64d..d3941e7add0fd 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/job_models.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/job_models.py @@ -1,11 +1,17 @@ from dataclasses import dataclass, field from typing import Dict, List, Optional, Union -from datahub.emitter.mce_builder import make_data_flow_urn, make_data_job_urn +from datahub.emitter.mce_builder import ( + make_data_flow_urn, + make_data_job_urn, + make_data_platform_urn, + make_dataplatform_instance_urn, +) from datahub.metadata.schema_classes import ( DataFlowInfoClass, DataJobInfoClass, DataJobInputOutputClass, + DataPlatformInstanceClass, ) @@ -204,6 +210,18 @@ def as_datajob_info_aspect(self) -> DataJobInfoClass: status=self.status, ) + @property + def as_maybe_platform_instance_aspect(self) -> Optional[DataPlatformInstanceClass]: + if self.entity.flow.platform_instance: + return DataPlatformInstanceClass( + platform=make_data_platform_urn(self.entity.flow.orchestrator), + instance=make_dataplatform_instance_urn( + platform=self.entity.flow.orchestrator, + instance=self.entity.flow.platform_instance, + ), + ) + return None + @dataclass class MSSQLDataFlow: @@ -238,3 +256,14 @@ def as_dataflow_info_aspect(self) -> DataFlowInfoClass: customProperties=self.flow_properties, externalUrl=self.external_url, ) + + @property + def as_maybe_platform_instance_aspect(self) -> Optional[DataPlatformInstanceClass]: + if self.entity.platform_instance: + return DataPlatformInstanceClass( + platform=make_data_platform_urn(self.entity.orchestrator), + instance=make_dataplatform_instance_urn( + self.entity.orchestrator, self.entity.platform_instance + ), + ) + return None diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py index 414c1faaa1661..9d8b67041998c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py @@ -639,6 +639,13 @@ def construct_job_workunits( aspect=data_job.as_datajob_info_aspect, ).as_workunit() + data_platform_instance_aspect = data_job.as_maybe_platform_instance_aspect + if data_platform_instance_aspect: + yield MetadataChangeProposalWrapper( + entityUrn=data_job.urn, + aspect=data_platform_instance_aspect, + ).as_workunit() + if include_lineage: yield MetadataChangeProposalWrapper( entityUrn=data_job.urn, @@ -654,6 +661,13 @@ def construct_flow_workunits( entityUrn=data_flow.urn, aspect=data_flow.as_dataflow_info_aspect, ).as_workunit() + + data_platform_instance_aspect = data_flow.as_maybe_platform_instance_aspect + if data_platform_instance_aspect: + yield MetadataChangeProposalWrapper( + entityUrn=data_flow.urn, + aspect=data_platform_instance_aspect, + ).as_workunit() # TODO: Add SubType when it appear def get_inspectors(self) -> Iterable[Inspector]: diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json index b67ebfb206883..b36188405e7e1 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json @@ -1,13 +1,14 @@ [ { "entityType": "container", - "entityUrn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "entityUrn": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { "json": { "customProperties": { "platform": "mssql", + "instance": "my-instance", "env": "PROD", "database": "DemoData" }, @@ -23,7 +24,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "entityUrn": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -39,12 +40,13 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "entityUrn": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { "json": { - "platform": "urn:li:dataPlatform:mssql" + "platform": "urn:li:dataPlatform:mssql", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" } }, "systemMetadata": { @@ -55,7 +57,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "entityUrn": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -73,12 +75,17 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "entityUrn": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { - "path": [] + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" + } + ] } }, "systemMetadata": { @@ -89,7 +96,7 @@ }, { "entityType": "dataFlow", - "entityUrn": "urn:li:dataFlow:(mssql,Weekly Demo Data Backup,PROD)", + "entityUrn": "urn:li:dataFlow:(mssql,my-instance.Weekly Demo Data Backup,PROD)", "changeType": "UPSERT", "aspectName": "dataFlowInfo", "aspect": { @@ -105,19 +112,36 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(mssql,my-instance.Weekly Demo Data Backup,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,Weekly Demo Data Backup,PROD),Weekly Demo Data Backup)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,my-instance.Weekly Demo Data Backup,PROD),Weekly Demo Data Backup)", "changeType": "UPSERT", "aspectName": "dataJobInfo", "aspect": { "json": { "customProperties": { - "job_id": "c2d77890-83ba-435f-879b-1c77fa38dd47", + "job_id": "b8907be7-52f5-4df4-a870-f4fe0679ec45", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-12-05 16:44:43.910000", - "date_modified": "2024-12-05 16:44:44.043000", + "date_created": "2024-12-19 12:34:45.843000", + "date_modified": "2024-12-19 12:34:46.017000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -138,7 +162,24 @@ }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,Weekly Demo Data Backup,PROD),Weekly Demo Data Backup)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,my-instance.Weekly Demo Data Backup,PROD),Weekly Demo Data Backup)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,my-instance.Weekly Demo Data Backup,PROD),Weekly Demo Data Backup)", "changeType": "UPSERT", "aspectName": "dataJobInputOutput", "aspect": { @@ -156,12 +197,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:7da983a1581c33cce8a106587b150f02", + "entityUrn": "urn:li:container:5726a09b23f60be6f661206c879a3683", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "container": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a" } }, "systemMetadata": { @@ -172,13 +213,14 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:7da983a1581c33cce8a106587b150f02", + "entityUrn": "urn:li:container:5726a09b23f60be6f661206c879a3683", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { "json": { "customProperties": { "platform": "mssql", + "instance": "my-instance", "env": "PROD", "database": "DemoData", "schema": "db_accessadmin" @@ -195,7 +237,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:7da983a1581c33cce8a106587b150f02", + "entityUrn": "urn:li:container:5726a09b23f60be6f661206c879a3683", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -211,12 +253,13 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:7da983a1581c33cce8a106587b150f02", + "entityUrn": "urn:li:container:5726a09b23f60be6f661206c879a3683", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { "json": { - "platform": "urn:li:dataPlatform:mssql" + "platform": "urn:li:dataPlatform:mssql", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" } }, "systemMetadata": { @@ -227,7 +270,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:7da983a1581c33cce8a106587b150f02", + "entityUrn": "urn:li:container:5726a09b23f60be6f661206c879a3683", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -245,15 +288,19 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:7da983a1581c33cce8a106587b150f02", + "entityUrn": "urn:li:container:5726a09b23f60be6f661206c879a3683", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" + }, + { + "id": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a", + "urn": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a" } ] } @@ -266,12 +313,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:671f67227a05c22c9fa97c27abc56820", + "entityUrn": "urn:li:container:5d8a64d9bc388814ac06d9a4d7a3ad22", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "container": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a" } }, "systemMetadata": { @@ -282,13 +329,14 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:671f67227a05c22c9fa97c27abc56820", + "entityUrn": "urn:li:container:5d8a64d9bc388814ac06d9a4d7a3ad22", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { "json": { "customProperties": { "platform": "mssql", + "instance": "my-instance", "env": "PROD", "database": "DemoData", "schema": "db_backupoperator" @@ -305,7 +353,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:671f67227a05c22c9fa97c27abc56820", + "entityUrn": "urn:li:container:5d8a64d9bc388814ac06d9a4d7a3ad22", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -321,12 +369,13 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:671f67227a05c22c9fa97c27abc56820", + "entityUrn": "urn:li:container:5d8a64d9bc388814ac06d9a4d7a3ad22", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { "json": { - "platform": "urn:li:dataPlatform:mssql" + "platform": "urn:li:dataPlatform:mssql", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" } }, "systemMetadata": { @@ -337,7 +386,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:671f67227a05c22c9fa97c27abc56820", + "entityUrn": "urn:li:container:5d8a64d9bc388814ac06d9a4d7a3ad22", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -355,15 +404,19 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:671f67227a05c22c9fa97c27abc56820", + "entityUrn": "urn:li:container:5d8a64d9bc388814ac06d9a4d7a3ad22", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" + }, + { + "id": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a", + "urn": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a" } ] } @@ -376,12 +429,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:830660638ee785d5352ca300835af7ec", + "entityUrn": "urn:li:container:d5f6914a2b8e0dd461f1ad02e7b28c11", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "container": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a" } }, "systemMetadata": { @@ -392,13 +445,14 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:830660638ee785d5352ca300835af7ec", + "entityUrn": "urn:li:container:d5f6914a2b8e0dd461f1ad02e7b28c11", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { "json": { "customProperties": { "platform": "mssql", + "instance": "my-instance", "env": "PROD", "database": "DemoData", "schema": "db_datareader" @@ -415,7 +469,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:830660638ee785d5352ca300835af7ec", + "entityUrn": "urn:li:container:d5f6914a2b8e0dd461f1ad02e7b28c11", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -431,12 +485,13 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:830660638ee785d5352ca300835af7ec", + "entityUrn": "urn:li:container:d5f6914a2b8e0dd461f1ad02e7b28c11", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { "json": { - "platform": "urn:li:dataPlatform:mssql" + "platform": "urn:li:dataPlatform:mssql", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" } }, "systemMetadata": { @@ -447,7 +502,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:830660638ee785d5352ca300835af7ec", + "entityUrn": "urn:li:container:d5f6914a2b8e0dd461f1ad02e7b28c11", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -465,15 +520,19 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:830660638ee785d5352ca300835af7ec", + "entityUrn": "urn:li:container:d5f6914a2b8e0dd461f1ad02e7b28c11", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" + }, + { + "id": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a", + "urn": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a" } ] } @@ -486,12 +545,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:e6b69ac2a511e798a89a4186881f70b8", + "entityUrn": "urn:li:container:e3f86c86f3794233740cad99cba0b854", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "container": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a" } }, "systemMetadata": { @@ -502,13 +561,14 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:e6b69ac2a511e798a89a4186881f70b8", + "entityUrn": "urn:li:container:e3f86c86f3794233740cad99cba0b854", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { "json": { "customProperties": { "platform": "mssql", + "instance": "my-instance", "env": "PROD", "database": "DemoData", "schema": "db_datawriter" @@ -525,7 +585,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:e6b69ac2a511e798a89a4186881f70b8", + "entityUrn": "urn:li:container:e3f86c86f3794233740cad99cba0b854", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -541,12 +601,13 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:e6b69ac2a511e798a89a4186881f70b8", + "entityUrn": "urn:li:container:e3f86c86f3794233740cad99cba0b854", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { "json": { - "platform": "urn:li:dataPlatform:mssql" + "platform": "urn:li:dataPlatform:mssql", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" } }, "systemMetadata": { @@ -557,7 +618,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:e6b69ac2a511e798a89a4186881f70b8", + "entityUrn": "urn:li:container:e3f86c86f3794233740cad99cba0b854", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -575,15 +636,19 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:e6b69ac2a511e798a89a4186881f70b8", + "entityUrn": "urn:li:container:e3f86c86f3794233740cad99cba0b854", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" + }, + { + "id": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a", + "urn": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a" } ] } @@ -596,12 +661,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:a5b29b900882d27c0d5fb0d5ccac92a5", + "entityUrn": "urn:li:container:c978c9ed6c196412685945ad89f8fbd6", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "container": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a" } }, "systemMetadata": { @@ -612,13 +677,14 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:a5b29b900882d27c0d5fb0d5ccac92a5", + "entityUrn": "urn:li:container:c978c9ed6c196412685945ad89f8fbd6", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { "json": { "customProperties": { "platform": "mssql", + "instance": "my-instance", "env": "PROD", "database": "DemoData", "schema": "db_ddladmin" @@ -635,7 +701,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:a5b29b900882d27c0d5fb0d5ccac92a5", + "entityUrn": "urn:li:container:c978c9ed6c196412685945ad89f8fbd6", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -651,12 +717,13 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:a5b29b900882d27c0d5fb0d5ccac92a5", + "entityUrn": "urn:li:container:c978c9ed6c196412685945ad89f8fbd6", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { "json": { - "platform": "urn:li:dataPlatform:mssql" + "platform": "urn:li:dataPlatform:mssql", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" } }, "systemMetadata": { @@ -667,7 +734,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:a5b29b900882d27c0d5fb0d5ccac92a5", + "entityUrn": "urn:li:container:c978c9ed6c196412685945ad89f8fbd6", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -685,15 +752,19 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:a5b29b900882d27c0d5fb0d5ccac92a5", + "entityUrn": "urn:li:container:c978c9ed6c196412685945ad89f8fbd6", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" + }, + { + "id": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a", + "urn": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a" } ] } @@ -706,12 +777,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:b6baf19c5f148fba3d3385151a8c672f", + "entityUrn": "urn:li:container:17749025f27ce9ebd6febcaa6a49d715", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "container": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a" } }, "systemMetadata": { @@ -722,13 +793,14 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:b6baf19c5f148fba3d3385151a8c672f", + "entityUrn": "urn:li:container:17749025f27ce9ebd6febcaa6a49d715", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { "json": { "customProperties": { "platform": "mssql", + "instance": "my-instance", "env": "PROD", "database": "DemoData", "schema": "db_denydatareader" @@ -745,7 +817,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:b6baf19c5f148fba3d3385151a8c672f", + "entityUrn": "urn:li:container:17749025f27ce9ebd6febcaa6a49d715", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -761,12 +833,13 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:b6baf19c5f148fba3d3385151a8c672f", + "entityUrn": "urn:li:container:17749025f27ce9ebd6febcaa6a49d715", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { "json": { - "platform": "urn:li:dataPlatform:mssql" + "platform": "urn:li:dataPlatform:mssql", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" } }, "systemMetadata": { @@ -777,7 +850,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:b6baf19c5f148fba3d3385151a8c672f", + "entityUrn": "urn:li:container:17749025f27ce9ebd6febcaa6a49d715", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -795,15 +868,19 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:b6baf19c5f148fba3d3385151a8c672f", + "entityUrn": "urn:li:container:17749025f27ce9ebd6febcaa6a49d715", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" + }, + { + "id": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a", + "urn": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a" } ] } @@ -816,12 +893,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:ee19bd6cf8db0a0d086fbe78f7539bf7", + "entityUrn": "urn:li:container:63c0518620c06ef7af76019fea52b862", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "container": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a" } }, "systemMetadata": { @@ -832,13 +909,14 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:ee19bd6cf8db0a0d086fbe78f7539bf7", + "entityUrn": "urn:li:container:63c0518620c06ef7af76019fea52b862", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { "json": { "customProperties": { "platform": "mssql", + "instance": "my-instance", "env": "PROD", "database": "DemoData", "schema": "db_denydatawriter" @@ -855,7 +933,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:ee19bd6cf8db0a0d086fbe78f7539bf7", + "entityUrn": "urn:li:container:63c0518620c06ef7af76019fea52b862", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -871,12 +949,13 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:ee19bd6cf8db0a0d086fbe78f7539bf7", + "entityUrn": "urn:li:container:63c0518620c06ef7af76019fea52b862", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { "json": { - "platform": "urn:li:dataPlatform:mssql" + "platform": "urn:li:dataPlatform:mssql", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" } }, "systemMetadata": { @@ -887,7 +966,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:ee19bd6cf8db0a0d086fbe78f7539bf7", + "entityUrn": "urn:li:container:63c0518620c06ef7af76019fea52b862", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -905,15 +984,19 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:ee19bd6cf8db0a0d086fbe78f7539bf7", + "entityUrn": "urn:li:container:63c0518620c06ef7af76019fea52b862", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" + }, + { + "id": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a", + "urn": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a" } ] } @@ -926,12 +1009,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:6514a64e5b04f103c9c1dd0ebe3d8b47", + "entityUrn": "urn:li:container:c6e96aed010f9205f809c1ce9a530003", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "container": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a" } }, "systemMetadata": { @@ -942,13 +1025,14 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:6514a64e5b04f103c9c1dd0ebe3d8b47", + "entityUrn": "urn:li:container:c6e96aed010f9205f809c1ce9a530003", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { "json": { "customProperties": { "platform": "mssql", + "instance": "my-instance", "env": "PROD", "database": "DemoData", "schema": "db_owner" @@ -965,7 +1049,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:6514a64e5b04f103c9c1dd0ebe3d8b47", + "entityUrn": "urn:li:container:c6e96aed010f9205f809c1ce9a530003", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -981,12 +1065,13 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:6514a64e5b04f103c9c1dd0ebe3d8b47", + "entityUrn": "urn:li:container:c6e96aed010f9205f809c1ce9a530003", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { "json": { - "platform": "urn:li:dataPlatform:mssql" + "platform": "urn:li:dataPlatform:mssql", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" } }, "systemMetadata": { @@ -997,7 +1082,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:6514a64e5b04f103c9c1dd0ebe3d8b47", + "entityUrn": "urn:li:container:c6e96aed010f9205f809c1ce9a530003", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1015,15 +1100,19 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:6514a64e5b04f103c9c1dd0ebe3d8b47", + "entityUrn": "urn:li:container:c6e96aed010f9205f809c1ce9a530003", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" + }, + { + "id": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a", + "urn": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a" } ] } @@ -1036,12 +1125,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:fd80008628a03642d6e747c460a90619", + "entityUrn": "urn:li:container:895216bb602fb0002beac82d96507acf", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "container": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a" } }, "systemMetadata": { @@ -1052,13 +1141,14 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:fd80008628a03642d6e747c460a90619", + "entityUrn": "urn:li:container:895216bb602fb0002beac82d96507acf", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { "json": { "customProperties": { "platform": "mssql", + "instance": "my-instance", "env": "PROD", "database": "DemoData", "schema": "db_securityadmin" @@ -1075,7 +1165,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:fd80008628a03642d6e747c460a90619", + "entityUrn": "urn:li:container:895216bb602fb0002beac82d96507acf", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -1091,12 +1181,13 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:fd80008628a03642d6e747c460a90619", + "entityUrn": "urn:li:container:895216bb602fb0002beac82d96507acf", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { "json": { - "platform": "urn:li:dataPlatform:mssql" + "platform": "urn:li:dataPlatform:mssql", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" } }, "systemMetadata": { @@ -1107,7 +1198,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:fd80008628a03642d6e747c460a90619", + "entityUrn": "urn:li:container:895216bb602fb0002beac82d96507acf", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1125,15 +1216,19 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:fd80008628a03642d6e747c460a90619", + "entityUrn": "urn:li:container:895216bb602fb0002beac82d96507acf", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" + }, + { + "id": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a", + "urn": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a" } ] } @@ -1146,12 +1241,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:61332a50b978d8ca7245ddb34565d7b1", + "entityUrn": "urn:li:container:92899b29bb814fdeb1186eb99139073f", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "container": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a" } }, "systemMetadata": { @@ -1162,13 +1257,14 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:61332a50b978d8ca7245ddb34565d7b1", + "entityUrn": "urn:li:container:92899b29bb814fdeb1186eb99139073f", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { "json": { "customProperties": { "platform": "mssql", + "instance": "my-instance", "env": "PROD", "database": "DemoData", "schema": "dbo" @@ -1185,7 +1281,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:61332a50b978d8ca7245ddb34565d7b1", + "entityUrn": "urn:li:container:92899b29bb814fdeb1186eb99139073f", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -1201,12 +1297,13 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:61332a50b978d8ca7245ddb34565d7b1", + "entityUrn": "urn:li:container:92899b29bb814fdeb1186eb99139073f", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { "json": { - "platform": "urn:li:dataPlatform:mssql" + "platform": "urn:li:dataPlatform:mssql", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" } }, "systemMetadata": { @@ -1217,7 +1314,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:61332a50b978d8ca7245ddb34565d7b1", + "entityUrn": "urn:li:container:92899b29bb814fdeb1186eb99139073f", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1235,15 +1332,19 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:61332a50b978d8ca7245ddb34565d7b1", + "entityUrn": "urn:li:container:92899b29bb814fdeb1186eb99139073f", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" + }, + { + "id": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a", + "urn": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a" } ] } @@ -1256,12 +1357,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.dbo.Products,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,my-instance.DemoData.dbo.Products,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:61332a50b978d8ca7245ddb34565d7b1" + "container": "urn:li:container:92899b29bb814fdeb1186eb99139073f" } }, "systemMetadata": { @@ -1273,7 +1374,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.dbo.Products,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,my-instance.DemoData.dbo.Products,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -1345,7 +1446,24 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.dbo.Products,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,my-instance.DemoData.dbo.Products,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,my-instance.DemoData.dbo.Products,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1363,19 +1481,23 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.dbo.Products,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,my-instance.DemoData.dbo.Products,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" }, { - "id": "urn:li:container:61332a50b978d8ca7245ddb34565d7b1", - "urn": "urn:li:container:61332a50b978d8ca7245ddb34565d7b1" + "id": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a", + "urn": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a" + }, + { + "id": "urn:li:container:92899b29bb814fdeb1186eb99139073f", + "urn": "urn:li:container:92899b29bb814fdeb1186eb99139073f" } ] } @@ -1388,12 +1510,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", + "entityUrn": "urn:li:container:6fbadfb496ee98718da210cc2fca1680", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "container": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a" } }, "systemMetadata": { @@ -1404,13 +1526,14 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", + "entityUrn": "urn:li:container:6fbadfb496ee98718da210cc2fca1680", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { "json": { "customProperties": { "platform": "mssql", + "instance": "my-instance", "env": "PROD", "database": "DemoData", "schema": "Foo" @@ -1427,7 +1550,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", + "entityUrn": "urn:li:container:6fbadfb496ee98718da210cc2fca1680", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -1443,12 +1566,13 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", + "entityUrn": "urn:li:container:6fbadfb496ee98718da210cc2fca1680", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { "json": { - "platform": "urn:li:dataPlatform:mssql" + "platform": "urn:li:dataPlatform:mssql", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" } }, "systemMetadata": { @@ -1459,7 +1583,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", + "entityUrn": "urn:li:container:6fbadfb496ee98718da210cc2fca1680", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1477,15 +1601,19 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", + "entityUrn": "urn:li:container:6fbadfb496ee98718da210cc2fca1680", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" + }, + { + "id": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a", + "urn": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a" } ] } @@ -1498,12 +1626,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,my-instance.DemoData.Foo.age_dist,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9" + "container": "urn:li:container:6fbadfb496ee98718da210cc2fca1680" } }, "systemMetadata": { @@ -1515,7 +1643,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,my-instance.DemoData.Foo.age_dist,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -1587,7 +1715,24 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,my-instance.DemoData.Foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,my-instance.DemoData.Foo.age_dist,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1605,19 +1750,23 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,my-instance.DemoData.Foo.age_dist,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" }, { - "id": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", - "urn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9" + "id": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a", + "urn": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a" + }, + { + "id": "urn:li:container:6fbadfb496ee98718da210cc2fca1680", + "urn": "urn:li:container:6fbadfb496ee98718da210cc2fca1680" } ] } @@ -1630,12 +1779,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.Items,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,my-instance.DemoData.Foo.Items,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9" + "container": "urn:li:container:6fbadfb496ee98718da210cc2fca1680" } }, "systemMetadata": { @@ -1647,7 +1796,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.Items,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,my-instance.DemoData.Foo.Items,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -1720,7 +1869,24 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.Items,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,my-instance.DemoData.Foo.Items,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,my-instance.DemoData.Foo.Items,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1738,19 +1904,23 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.Items,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,my-instance.DemoData.Foo.Items,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" + }, + { + "id": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a", + "urn": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a" }, { - "id": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", - "urn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9" + "id": "urn:li:container:6fbadfb496ee98718da210cc2fca1680", + "urn": "urn:li:container:6fbadfb496ee98718da210cc2fca1680" } ] } @@ -1763,12 +1933,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.Persons,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,my-instance.DemoData.Foo.Persons,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9" + "container": "urn:li:container:6fbadfb496ee98718da210cc2fca1680" } }, "systemMetadata": { @@ -1780,7 +1950,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.Persons,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,my-instance.DemoData.Foo.Persons,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -1877,7 +2047,24 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.Persons,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,my-instance.DemoData.Foo.Persons,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,my-instance.DemoData.Foo.Persons,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1895,19 +2082,23 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.Persons,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,my-instance.DemoData.Foo.Persons,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" + }, + { + "id": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a", + "urn": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a" }, { - "id": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", - "urn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9" + "id": "urn:li:container:6fbadfb496ee98718da210cc2fca1680", + "urn": "urn:li:container:6fbadfb496ee98718da210cc2fca1680" } ] } @@ -1920,12 +2111,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.SalesReason,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,my-instance.DemoData.Foo.SalesReason,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9" + "container": "urn:li:container:6fbadfb496ee98718da210cc2fca1680" } }, "systemMetadata": { @@ -1937,7 +2128,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.SalesReason,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,my-instance.DemoData.Foo.SalesReason,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -2012,12 +2203,12 @@ { "name": "FK_TempSales_SalesReason", "foreignFields": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.Persons,PROD),ID)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,my-instance.DemoData.Foo.Persons,PROD),ID)" ], "sourceFields": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.SalesReason,PROD),TempID)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,my-instance.DemoData.Foo.SalesReason,PROD),TempID)" ], - "foreignDataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.Persons,PROD)" + "foreignDataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,my-instance.DemoData.Foo.Persons,PROD)" } ] } @@ -2033,7 +2224,24 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.SalesReason,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,my-instance.DemoData.Foo.SalesReason,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,my-instance.DemoData.Foo.SalesReason,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -2051,19 +2259,23 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.SalesReason,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,my-instance.DemoData.Foo.SalesReason,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" + }, + { + "id": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a", + "urn": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a" }, { - "id": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", - "urn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9" + "id": "urn:li:container:6fbadfb496ee98718da210cc2fca1680", + "urn": "urn:li:container:6fbadfb496ee98718da210cc2fca1680" } ] } @@ -2076,12 +2288,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.PersonsView,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,my-instance.DemoData.Foo.PersonsView,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9" + "container": "urn:li:container:6fbadfb496ee98718da210cc2fca1680" } }, "systemMetadata": { @@ -2093,7 +2305,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.PersonsView,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,my-instance.DemoData.Foo.PersonsView,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -2103,8 +2315,8 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "view_definition": "CREATE VIEW Foo.PersonsView AS SELECT * FROM Foo.Persons;\n", - "is_view": "True" + "is_view": "True", + "view_definition": "CREATE VIEW Foo.PersonsView AS SELECT * FROM Foo.Persons;\n" }, "name": "PersonsView", "tags": [] @@ -2192,7 +2404,24 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.PersonsView,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,my-instance.DemoData.Foo.PersonsView,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,my-instance.DemoData.Foo.PersonsView,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -2210,7 +2439,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.PersonsView,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,my-instance.DemoData.Foo.PersonsView,PROD)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -2228,19 +2457,23 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.PersonsView,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,my-instance.DemoData.Foo.PersonsView,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" }, { - "id": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", - "urn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9" + "id": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a", + "urn": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a" + }, + { + "id": "urn:li:container:6fbadfb496ee98718da210cc2fca1680", + "urn": "urn:li:container:6fbadfb496ee98718da210cc2fca1680" } ] } @@ -2253,7 +2486,7 @@ }, { "entityType": "dataFlow", - "entityUrn": "urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD)", + "entityUrn": "urn:li:dataFlow:(mssql,my-instance.DemoData.Foo.stored_procedures,PROD)", "changeType": "UPSERT", "aspectName": "dataFlowInfo", "aspect": { @@ -2269,9 +2502,26 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(mssql,my-instance.DemoData.Foo.stored_procedures,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,my-instance.DemoData.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", "changeType": "UPSERT", "aspectName": "dataJobInfo", "aspect": { @@ -2282,8 +2532,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-12-05 16:44:43.800000", - "date_modified": "2024-12-05 16:44:43.800000" + "date_created": "2024-12-19 12:34:45.660000", + "date_modified": "2024-12-19 12:34:45.660000" }, "externalUrl": "", "name": "DemoData.Foo.Proc.With.SpecialChar", @@ -2300,7 +2550,24 @@ }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,my-instance.DemoData.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,my-instance.DemoData.Foo.stored_procedures,PROD),NewProc)", "changeType": "UPSERT", "aspectName": "dataJobInfo", "aspect": { @@ -2310,8 +2577,8 @@ "depending_on_procedure": "{}", "code": "CREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS NULL\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n -- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n\n SELECT ID, Age INTO #TEMPTABLE FROM NewData.FooNew.PersonsNew\n \n UPDATE DemoData.Foo.Persons\n SET Age = t.Age\n FROM DemoData.Foo.Persons p\n JOIN #TEMPTABLE t ON p.ID = t.ID\n\n END\n", "input parameters": "[]", - "date_created": "2024-12-05 16:44:43.803000", - "date_modified": "2024-12-05 16:44:43.803000" + "date_created": "2024-12-19 12:34:45.667000", + "date_modified": "2024-12-19 12:34:45.667000" }, "externalUrl": "", "name": "DemoData.Foo.NewProc", @@ -2326,14 +2593,31 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,my-instance.DemoData.Foo.stored_procedures,PROD),NewProc)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "container", - "entityUrn": "urn:li:container:250ce23f940485303fa5e5d4f5194975", + "entityUrn": "urn:li:container:5631370915311469374ef3cb5f0ebbf0", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "container": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a" } }, "systemMetadata": { @@ -2344,13 +2628,14 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:250ce23f940485303fa5e5d4f5194975", + "entityUrn": "urn:li:container:5631370915311469374ef3cb5f0ebbf0", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { "json": { "customProperties": { "platform": "mssql", + "instance": "my-instance", "env": "PROD", "database": "DemoData", "schema": "guest" @@ -2367,7 +2652,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:250ce23f940485303fa5e5d4f5194975", + "entityUrn": "urn:li:container:5631370915311469374ef3cb5f0ebbf0", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2383,12 +2668,13 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:250ce23f940485303fa5e5d4f5194975", + "entityUrn": "urn:li:container:5631370915311469374ef3cb5f0ebbf0", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { "json": { - "platform": "urn:li:dataPlatform:mssql" + "platform": "urn:li:dataPlatform:mssql", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" } }, "systemMetadata": { @@ -2399,7 +2685,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:250ce23f940485303fa5e5d4f5194975", + "entityUrn": "urn:li:container:5631370915311469374ef3cb5f0ebbf0", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -2417,15 +2703,19 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:250ce23f940485303fa5e5d4f5194975", + "entityUrn": "urn:li:container:5631370915311469374ef3cb5f0ebbf0", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" + }, + { + "id": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a", + "urn": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a" } ] } @@ -2438,12 +2728,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:f84e3b6c61876e1625f9112cbc0e988f", + "entityUrn": "urn:li:container:63c0319e212536168ec5b7dce2b7da2f", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "container": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a" } }, "systemMetadata": { @@ -2454,13 +2744,14 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:f84e3b6c61876e1625f9112cbc0e988f", + "entityUrn": "urn:li:container:63c0319e212536168ec5b7dce2b7da2f", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { "json": { "customProperties": { "platform": "mssql", + "instance": "my-instance", "env": "PROD", "database": "DemoData", "schema": "INFORMATION_SCHEMA" @@ -2477,7 +2768,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:f84e3b6c61876e1625f9112cbc0e988f", + "entityUrn": "urn:li:container:63c0319e212536168ec5b7dce2b7da2f", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2493,12 +2784,13 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:f84e3b6c61876e1625f9112cbc0e988f", + "entityUrn": "urn:li:container:63c0319e212536168ec5b7dce2b7da2f", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { "json": { - "platform": "urn:li:dataPlatform:mssql" + "platform": "urn:li:dataPlatform:mssql", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" } }, "systemMetadata": { @@ -2509,7 +2801,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:f84e3b6c61876e1625f9112cbc0e988f", + "entityUrn": "urn:li:container:63c0319e212536168ec5b7dce2b7da2f", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -2527,15 +2819,19 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:f84e3b6c61876e1625f9112cbc0e988f", + "entityUrn": "urn:li:container:63c0319e212536168ec5b7dce2b7da2f", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" + }, + { + "id": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a", + "urn": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a" } ] } @@ -2548,12 +2844,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:d730a6ecf30bbb41cac5df5c0014168d", + "entityUrn": "urn:li:container:b0e2ef63fa03ab69f77b60844124ec97", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "container": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a" } }, "systemMetadata": { @@ -2564,13 +2860,14 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:d730a6ecf30bbb41cac5df5c0014168d", + "entityUrn": "urn:li:container:b0e2ef63fa03ab69f77b60844124ec97", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { "json": { "customProperties": { "platform": "mssql", + "instance": "my-instance", "env": "PROD", "database": "DemoData", "schema": "sys" @@ -2587,7 +2884,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:d730a6ecf30bbb41cac5df5c0014168d", + "entityUrn": "urn:li:container:b0e2ef63fa03ab69f77b60844124ec97", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2603,12 +2900,13 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:d730a6ecf30bbb41cac5df5c0014168d", + "entityUrn": "urn:li:container:b0e2ef63fa03ab69f77b60844124ec97", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { "json": { - "platform": "urn:li:dataPlatform:mssql" + "platform": "urn:li:dataPlatform:mssql", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" } }, "systemMetadata": { @@ -2619,7 +2917,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:d730a6ecf30bbb41cac5df5c0014168d", + "entityUrn": "urn:li:container:b0e2ef63fa03ab69f77b60844124ec97", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -2637,15 +2935,19 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:d730a6ecf30bbb41cac5df5c0014168d", + "entityUrn": "urn:li:container:b0e2ef63fa03ab69f77b60844124ec97", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mssql,my-instance)" + }, + { + "id": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a", + "urn": "urn:li:container:db8117ee3cc6397c503e7824ae3e0f6a" } ] } @@ -2658,7 +2960,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.PersonsView,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,my-instance.DemoData.Foo.PersonsView,PROD)", "changeType": "UPSERT", "aspectName": "upstreamLineage", "aspect": { @@ -2669,7 +2971,7 @@ "time": 0, "actor": "urn:li:corpuser:unknown" }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,my-instance.demodata.foo.persons,PROD)", "type": "VIEW" } ] @@ -2683,7 +2985,7 @@ }, { "entityType": "dataFlow", - "entityUrn": "urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD)", + "entityUrn": "urn:li:dataFlow:(mssql,my-instance.DemoData.Foo.stored_procedures,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2699,7 +3001,7 @@ }, { "entityType": "dataFlow", - "entityUrn": "urn:li:dataFlow:(mssql,Weekly Demo Data Backup,PROD)", + "entityUrn": "urn:li:dataFlow:(mssql,my-instance.Weekly Demo Data Backup,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2715,7 +3017,7 @@ }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,my-instance.DemoData.Foo.stored_procedures,PROD),NewProc)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2731,7 +3033,7 @@ }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,my-instance.DemoData.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2747,7 +3049,7 @@ }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,Weekly Demo Data Backup,PROD),Weekly Demo Data Backup)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,my-instance.Weekly Demo Data Backup,PROD),Weekly Demo Data Backup)", "changeType": "UPSERT", "aspectName": "status", "aspect": { diff --git a/metadata-ingestion/tests/integration/sql_server/source_files/mssql_to_file.yml b/metadata-ingestion/tests/integration/sql_server/source_files/mssql_to_file.yml index 40bef3ff104a3..e003ec39cd528 100644 --- a/metadata-ingestion/tests/integration/sql_server/source_files/mssql_to_file.yml +++ b/metadata-ingestion/tests/integration/sql_server/source_files/mssql_to_file.yml @@ -7,6 +7,7 @@ source: password: test!Password database: DemoData host_port: localhost:21433 + platform_instance: my-instance # use_odbc: True # uri_args: # driver: "ODBC Driver 17 for SQL Server" From acb76cd97c8fc104b5c26a438db862a8d5e87705 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20G=C3=B3mez=20Villamor?= Date: Thu, 19 Dec 2024 20:26:58 +0100 Subject: [PATCH 08/11] fix(tableau): prevents warning in case of site admin creator role (#12175) --- .../src/datahub/ingestion/source/tableau/tableau.py | 2 +- .../datahub/ingestion/source/tableau/tableau_constant.py | 4 +++- .../ingestion/source/tableau/tableau_server_wrapper.py | 8 ++++++-- .../ingestion/source/tableau/tableau_validation.py | 2 +- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py index 6cc2220d90fd9..7838e5fa256b8 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py @@ -645,7 +645,7 @@ def report_user_role(report: TableauSourceReport, server: Server) -> None: # the site-role might be different on another site logged_in_user: UserInfo = UserInfo.from_server(server=server) - if not logged_in_user.is_site_administrator_explorer(): + if not logged_in_user.has_site_administrator_explorer_privileges(): report.warning( title=title, message=message, diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_constant.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_constant.py index ea0878143ef35..d69312f803021 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_constant.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_constant.py @@ -82,4 +82,6 @@ SITE = "Site" IS_UNSUPPORTED_CUSTOM_SQL = "isUnsupportedCustomSql" SITE_PERMISSION = "sitePermission" -SITE_ROLE = "SiteAdministratorExplorer" +ROLE_SITE_ADMIN_EXPLORER = "SiteAdministratorExplorer" +ROLE_SITE_ADMIN_CREATOR = "SiteAdministratorCreator" +ROLE_SERVER_ADMIN = "ServerAdministrator" diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_server_wrapper.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_server_wrapper.py index f309622d12b91..482140a227511 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_server_wrapper.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_server_wrapper.py @@ -11,8 +11,12 @@ class UserInfo: site_role: str site_id: str - def is_site_administrator_explorer(self): - return self.site_role == c.SITE_ROLE + def has_site_administrator_explorer_privileges(self): + return self.site_role in [ + c.ROLE_SITE_ADMIN_EXPLORER, + c.ROLE_SITE_ADMIN_CREATOR, + c.ROLE_SERVER_ADMIN, + ] @staticmethod def from_server(server: Server) -> "UserInfo": diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_validation.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_validation.py index 4a703faf6091b..4ec0e5ef01d3c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_validation.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_validation.py @@ -28,7 +28,7 @@ def check_user_role( try: # TODO: Add check for `Enable Derived Permissions` - if not logged_in_user.is_site_administrator_explorer(): + if not logged_in_user.has_site_administrator_explorer_privileges(): capability_dict[c.SITE_PERMISSION] = CapabilityReport( capable=False, failure_reason=f"{failure_reason} Their current role is {logged_in_user.site_role}.", From eceb799e634aa19340dbfe9da51714311f401996 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20G=C3=B3mez=20Villamor?= Date: Fri, 20 Dec 2024 08:37:21 +0100 Subject: [PATCH 09/11] fix(tableau): restart server object when reauthenticating (#12182) Co-authored-by: Harshal Sheth --- .../src/datahub/ingestion/source/tableau/tableau.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py index 7838e5fa256b8..fadcb8ff8f396 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py @@ -896,10 +896,9 @@ def dataset_browse_prefix(self) -> str: return f"/{self.config.env.lower()}{self.no_env_browse_prefix}" def _re_authenticate(self): - tableau_auth: Union[ - TableauAuth, PersonalAccessTokenAuth - ] = self.config.get_tableau_auth(self.site_id) - self.server.auth.sign_in(tableau_auth) + # Sign-in again may not be enough because Tableau sometimes caches invalid sessions + # so we need to recreate the Tableau Server object + self.server = self.config.make_tableau_client(self.site_id) @property def site_content_url(self) -> Optional[str]: From 66df362c0f7f10f5f0230054977410c3f1eb688a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20G=C3=B3mez=20Villamor?= Date: Fri, 20 Dec 2024 09:57:53 +0100 Subject: [PATCH 10/11] fix(dagster): support dagster v1.9.6 (#12189) --- .../src/datahub_dagster_plugin/client/dagster_generator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/client/dagster_generator.py b/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/client/dagster_generator.py index 2fdd0a41edf6c..a87f490f2d947 100644 --- a/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/client/dagster_generator.py +++ b/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/client/dagster_generator.py @@ -522,7 +522,7 @@ def generate_datajob( # Also, add datahub inputs/outputs if present in input/output metatdata. for input_def_snap in op_def_snap.input_def_snaps: job_property_bag[f"input.{input_def_snap.name}"] = str( - input_def_snap._asdict() + input_def_snap.__dict__ ) if Constant.DATAHUB_INPUTS in input_def_snap.metadata: datajob.inlets.extend( @@ -533,7 +533,7 @@ def generate_datajob( for output_def_snap in op_def_snap.output_def_snaps: job_property_bag[f"output_{output_def_snap.name}"] = str( - output_def_snap._asdict() + output_def_snap.__dict__ ) if ( Constant.DATAHUB_OUTPUTS in output_def_snap.metadata From 42d4254cdcc13b10e4955bfabff83bf09e56c0dd Mon Sep 17 00:00:00 2001 From: kevinkarchacryl Date: Fri, 20 Dec 2024 04:30:59 -0500 Subject: [PATCH 11/11] fix(graphql): add suspended to corpuserstatus (#12185) --- datahub-graphql-core/src/main/resources/entity.graphql | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql index 926cd256a5c5a..e086273068ee5 100644 --- a/datahub-graphql-core/src/main/resources/entity.graphql +++ b/datahub-graphql-core/src/main/resources/entity.graphql @@ -3838,6 +3838,11 @@ enum CorpUserStatus { A User that has been provisioned and logged in """ ACTIVE + + """ + A user that has been suspended + """ + SUSPENDED } union ResolvedActor = CorpUser | CorpGroup