diff --git a/build.gradle b/build.gradle index 79a4ca9384d28..67968ce3ee290 100644 --- a/build.gradle +++ b/build.gradle @@ -398,6 +398,7 @@ subprojects { implementation("com.fasterxml.jackson.core:jackson-databind:$jacksonVersion") implementation("com.fasterxml.jackson.core:jackson-dataformat-cbor:$jacksonVersion") implementation(externalDependency.commonsIo) + implementation(externalDependency.protobuf) } } diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index 06838f2e686bc..8f3da2050a9b7 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -113,6 +113,18 @@ module.exports = { id: "docs/automations/snowflake-tag-propagation", className: "saasOnly", }, + { + label: "AI Classification", + type: "doc", + id: "docs/automations/ai-term-suggestion", + className: "saasOnly", + }, + { + label: "AI Documentation", + type: "doc", + id: "docs/automations/ai-docs", + className: "saasOnly", + }, ], }, { diff --git a/docs/api/datahub-apis.md b/docs/api/datahub-apis.md index 6bb793a59a86e..c46aacde3a0cb 100644 --- a/docs/api/datahub-apis.md +++ b/docs/api/datahub-apis.md @@ -2,18 +2,16 @@ DataHub has several APIs to manipulate metadata on the platform. Here's the list of APIs and their pros and cons to help you choose the right one for your use case. -| API | Definition | Pros | Cons | -|--------------------------------------------------------------------------------|------------------------------------|------------------------------------------|-------------------------------------------------------------| -| **[Python SDK](/metadata-ingestion/as-a-library.md)** | SDK | Highly flexible, Good for bulk execution | Requires an understanding of the metadata change event | -| **[Java SDK](/metadata-integration/java/as-a-library.md)** | SDK | Highly flexible, Good for bulk execution | Requires an understanding of the metadata change event | -| **[GraphQL API](docs/api/graphql/getting-started.md)** | GraphQL interface | Intuitive; mirrors UI capabilities | Less flexible than SDKs; requires knowledge of GraphQL syntax | -| **[OpenAPI](docs/api/openapi/openapi-usage-guide.md)**
(Not Recommended) | Lower-level API for advanced users | | Generally not recommended for typical use cases | +| API | Definition | Pros | Cons | +| ---------------------------------------------------------- | ---------------------------------- | ---------------------------------------- | ----------------------------------------------------------------------- | +| **[Python SDK](/metadata-ingestion/as-a-library.md)** | SDK | Highly flexible, Good for bulk execution | Requires an understanding of the metadata change event | +| **[Java SDK](/metadata-integration/java/as-a-library.md)** | SDK | Highly flexible, Good for bulk execution | Requires an understanding of the metadata change event | +| **[GraphQL API](docs/api/graphql/getting-started.md)** | GraphQL interface | Intuitive; mirrors UI capabilities | Less flexible than SDKs; requires knowledge of GraphQL syntax | +| **[OpenAPI](docs/api/openapi/openapi-usage-guide.md)** | Lower-level API for advanced users | Most powerful and flexible | Can be hard to use for straightforward use cases; no corresponding SDKs | In general, **Python and Java SDKs** are our most recommended tools for extending and customizing the behavior of your DataHub instance. We don't recommend using the **OpenAPI** directly, as it's more complex and less user-friendly than the other APIs. - - ## Python and Java SDK We offer an SDK for both Python and Java that provide full functionality when it comes to CRUD operations and any complex functionality you may want to build into DataHub. We recommend using the SDKs for most use cases. Here are the examples of how to use the SDKs: @@ -23,22 +21,22 @@ We offer an SDK for both Python and Java that provide full functionality when it - Creating custom metadata entities Learn more about the SDKs: + - **[Python SDK →](/metadata-ingestion/as-a-library.md)** - **[Java SDK →](/metadata-integration/java/as-a-library.md)** - ## GraphQL API The `graphql` API serves as the primary public API for the platform. It can be used to fetch and update metadata programatically in the language of your choice. Intended as a higher-level API that simplifies the most common operations. We recommend using the GraphQL API if you're getting started with DataHub since it's more user-friendly and straighfowrad. Here are some examples of how to use the GraphQL API: + - Search for datasets with conditions - Update a certain field of a dataset Learn more about the GraphQL API: -- **[GraphQL API →](docs/api/graphql/getting-started.md)** - +- **[GraphQL API →](docs/api/graphql/getting-started.md)** ## DataHub API Comparison @@ -47,59 +45,59 @@ Here's an overview of what each API can do. > Last Updated : Feb 16 2024 -| Feature | GraphQL | Python SDK | OpenAPI | -|------------------------------------|------------------------------------------------------------------------------|------------------------------------------------------------------------------|---------| -| Create a Dataset | 🚫 | ✅ [[Guide]](/docs/api/tutorials/datasets.md) | ✅ | -| Delete a Dataset (Soft Delete) | ✅ [[Guide]](/docs/api/tutorials/datasets.md#delete-dataset) | ✅ [[Guide]](/docs/api/tutorials/datasets.md#delete-dataset) | ✅ | -| Delete a Dataset (Hard Delete) | 🚫 | ✅ [[Guide]](/docs/api/tutorials/datasets.md#delete-dataset) | ✅ | -| Search a Dataset | ✅ [[Guide]](/docs/how/search.md#graphql) | ✅ | ✅ | -| Read a Dataset Deprecation | ✅ | ✅ | ✅ | -| Read Dataset Entities (V2) | ✅ | ✅ | ✅ | -| Create a Tag | ✅ [[Guide]](/docs/api/tutorials/tags.md#create-tags) | ✅ [[Guide]](/docs/api/tutorials/tags.md#create-tags) | ✅ | -| Read a Tag | ✅ [[Guide]](/docs/api/tutorials/tags.md#read-tags) | ✅ [[Guide]](/docs/api/tutorials/tags.md#read-tags) | ✅ | -| Add Tags to a Dataset | ✅ [[Guide]](/docs/api/tutorials/tags.md#add-tags-to-a-dataset) | ✅ [[Guide]](/docs/api/tutorials/tags.md#add-tags-to-a-dataset) | ✅ | -| Add Tags to a Column of a Dataset | ✅ [[Guide]](/docs/api/tutorials/tags.md#add-tags-to-a-column-of-a-dataset) | ✅ [[Guide]](/docs/api/tutorials/tags.md#add-tags-to-a-column-of-a-dataset) | ✅ | -| Remove Tags from a Dataset | ✅ [[Guide]](/docs/api/tutorials/tags.md#remove-tags) | ✅ [[Guide]](/docs/api/tutorials/tags.md#add-tags#remove-tags) | ✅ | -| Create Glossary Terms | ✅ [[Guide]](/docs/api/tutorials/terms.md#create-terms) | ✅ [[Guide]](/docs/api/tutorials/terms.md#create-terms) | ✅ | -| Read Terms from a Dataset | ✅ [[Guide]](/docs/api/tutorials/terms.md#read-terms) | ✅ [[Guide]](/docs/api/tutorials/terms.md#read-terms) | ✅ | -| Add Terms to a Column of a Dataset | ✅ [[Guide]](/docs/api/tutorials/terms.md#add-terms-to-a-column-of-a-dataset) | ✅ [[Guide]](/docs/api/tutorials/terms.md#add-terms-to-a-column-of-a-dataset) | ✅ | -| Add Terms to a Dataset | ✅ [[Guide]](/docs/api/tutorials/terms.md#add-terms-to-a-dataset) | ✅ [[Guide]](/docs/api/tutorials/terms.md#add-terms-to-a-dataset) | ✅ | -| Create Domains | ✅ [[Guide]](/docs/api/tutorials/domains.md#create-domain) | ✅ [[Guide]](/docs/api/tutorials/domains.md#create-domain) | ✅ | -| Read Domains | ✅ [[Guide]](/docs/api/tutorials/domains.md#read-domains) | ✅ [[Guide]](/docs/api/tutorials/domains.md#read-domains) | ✅ | -| Add Domains to a Dataset | ✅ [[Guide]](/docs/api/tutorials/domains.md#add-domains) | ✅ [[Guide]](/docs/api/tutorials/domains.md#add-domains) | ✅ | -| Remove Domains from a Dataset | ✅ [[Guide]](/docs/api/tutorials/domains.md#remove-domains) | ✅ [[Guide]](/docs/api/tutorials/domains.md#remove-domains) | ✅ | -| Create / Upsert Users | ✅ [[Guide]](/docs/api/tutorials/owners.md#upsert-users) | ✅ [[Guide]](/docs/api/tutorials/owners.md#upsert-users) | ✅ | -| Create / Upsert Group | ✅ [[Guide]](/docs/api/tutorials/owners.md#upsert-group) | ✅ [[Guide]](/docs/api/tutorials/owners.md#upsert-group) | ✅ | -| Read Owners of a Dataset | ✅ [[Guide]](/docs/api/tutorials/owners.md#read-owners) | ✅ [[Guide]](/docs/api/tutorials/owners.md#read-owners) | ✅ | -| Add Owner to a Dataset | ✅ [[Guide]](/docs/api/tutorials/owners.md#add-owners) | ✅ [[Guide]](/docs/api/tutorials/owners.md#add-owners#remove-owners) | ✅ | -| Remove Owner from a Dataset | ✅ [[Guide]](/docs/api/tutorials/owners.md#remove-owners) | ✅ [[Guide]](/docs/api/tutorials/owners.md) | ✅ | -| Add Lineage | ✅ [[Guide]](/docs/api/tutorials/lineage.md) | ✅ [[Guide]](/docs/api/tutorials/lineage.md#add-lineage) | ✅ | -| Add Column Level (Fine Grained) Lineage | 🚫 | ✅ [[Guide]](docs/api/tutorials/lineage.md#add-column-level-lineage) | ✅ | -| Add Documentation (Description) to a Column of a Dataset | ✅ [[Guide]](/docs/api/tutorials/descriptions.md#add-description-on-column) | ✅ [[Guide]](/docs/api/tutorials/descriptions.md#add-description-on-column) | ✅ | -| Add Documentation (Description) to a Dataset | ✅ [[Guide]](/docs/api/tutorials/descriptions.md#add-description-on-dataset) | ✅ [[Guide]](/docs/api/tutorials/descriptions.md#add-description-on-dataset) | ✅ | -| Add / Remove / Replace Custom Properties on a Dataset | 🚫 | ✅ [[Guide]](/docs/api/tutorials/custom-properties.md) | ✅ | -| Add ML Feature to ML Feature Table | 🚫 | ✅ [[Guide]](/docs/api/tutorials/ml.md#add-mlfeature-to-mlfeaturetable) | ✅ | -| Add ML Feature to MLModel | 🚫 | ✅ [[Guide]](/docs/api/tutorials/ml.md#add-mlfeature-to-mlmodel) | ✅ | -| Add ML Group to MLFeatureTable | 🚫 | ✅ [[Guide]](/docs/api/tutorials/ml.md#add-mlgroup-to-mlfeaturetable) | ✅ | -| Create MLFeature | 🚫 | ✅ [[Guide]](/docs/api/tutorials/ml.md#create-mlfeature) | ✅ | -| Create MLFeatureTable | 🚫 | ✅ [[Guide]](/docs/api/tutorials/ml.md#create-mlfeaturetable) | ✅ | -| Create MLModel | 🚫 | ✅ [[Guide]](/docs/api/tutorials/ml.md#create-mlmodel) | ✅ | -| Create MLModelGroup | 🚫 | ✅ [[Guide]](/docs/api/tutorials/ml.md#create-mlmodelgroup) | ✅ | -| Create MLPrimaryKey | 🚫 | ✅ [[Guide]](/docs/api/tutorials/ml.md#create-mlprimarykey) | ✅ | -| Create MLFeatureTable | 🚫 | ✅ [[Guide]](/docs/api/tutorials/ml.md#create-mlfeaturetable)| ✅ | -| Read MLFeature | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlfeature) | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlfeature) | ✅ | -| Read MLFeatureTable | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlfeaturetable) | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlfeaturetable) | ✅ | -| Read MLModel | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlmodel) | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlmodel) | ✅ | -| Read MLModelGroup | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlmodelgroup) | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlmodelgroup) | ✅ | -| Read MLPrimaryKey | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlprimarykey) | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlprimarykey) | ✅ | -| Create Data Product | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/create_dataproduct.py) | ✅ | -| Create Lineage Between Chart and Dashboard | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_chart_dashboard.py) | ✅ | -| Create Lineage Between Dataset and Chart | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_dataset_chart.py) | ✅ | -| Create Lineage Between Dataset and DataJob | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_dataset_job_dataset.py) | ✅ | -| Create Finegrained Lineage as DataJob for Dataset | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_emitter_datajob_finegrained.py) | ✅ | -| Create Finegrained Lineage for Dataset | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_emitter_dataset_finegrained.py) | ✅ | -| Create Dataset Lineage with Kafka | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_emitter_kafka.py) | ✅ | -| Create Dataset Lineage with MCPW & Rest Emitter | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_emitter_mcpw_rest.py) | ✅ | -| Create Dataset Lineage with Rest Emitter | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_emitter_rest.py) | ✅ | -| Create DataJob with Dataflow | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_job_dataflow.py) [[Simple]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_job_dataflow_new_api_simple.py) [[Verbose]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_job_dataflow_new_api_verbose.py) | ✅ | -| Create Programmatic Pipeline | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/programatic_pipeline.py) | ✅ | +| Feature | GraphQL | Python SDK | OpenAPI | +| -------------------------------------------------------- | ----------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- | +| Create a Dataset | 🚫 | ✅ [[Guide]](/docs/api/tutorials/datasets.md) | ✅ | +| Delete a Dataset (Soft Delete) | ✅ [[Guide]](/docs/api/tutorials/datasets.md#delete-dataset) | ✅ [[Guide]](/docs/api/tutorials/datasets.md#delete-dataset) | ✅ | +| Delete a Dataset (Hard Delete) | 🚫 | ✅ [[Guide]](/docs/api/tutorials/datasets.md#delete-dataset) | ✅ | +| Search a Dataset | ✅ [[Guide]](/docs/how/search.md#graphql) | ✅ | ✅ | +| Read a Dataset Deprecation | ✅ | ✅ | ✅ | +| Read Dataset Entities (V2) | ✅ | ✅ | ✅ | +| Create a Tag | ✅ [[Guide]](/docs/api/tutorials/tags.md#create-tags) | ✅ [[Guide]](/docs/api/tutorials/tags.md#create-tags) | ✅ | +| Read a Tag | ✅ [[Guide]](/docs/api/tutorials/tags.md#read-tags) | ✅ [[Guide]](/docs/api/tutorials/tags.md#read-tags) | ✅ | +| Add Tags to a Dataset | ✅ [[Guide]](/docs/api/tutorials/tags.md#add-tags-to-a-dataset) | ✅ [[Guide]](/docs/api/tutorials/tags.md#add-tags-to-a-dataset) | ✅ | +| Add Tags to a Column of a Dataset | ✅ [[Guide]](/docs/api/tutorials/tags.md#add-tags-to-a-column-of-a-dataset) | ✅ [[Guide]](/docs/api/tutorials/tags.md#add-tags-to-a-column-of-a-dataset) | ✅ | +| Remove Tags from a Dataset | ✅ [[Guide]](/docs/api/tutorials/tags.md#remove-tags) | ✅ [[Guide]](/docs/api/tutorials/tags.md#add-tags#remove-tags) | ✅ | +| Create Glossary Terms | ✅ [[Guide]](/docs/api/tutorials/terms.md#create-terms) | ✅ [[Guide]](/docs/api/tutorials/terms.md#create-terms) | ✅ | +| Read Terms from a Dataset | ✅ [[Guide]](/docs/api/tutorials/terms.md#read-terms) | ✅ [[Guide]](/docs/api/tutorials/terms.md#read-terms) | ✅ | +| Add Terms to a Column of a Dataset | ✅ [[Guide]](/docs/api/tutorials/terms.md#add-terms-to-a-column-of-a-dataset) | ✅ [[Guide]](/docs/api/tutorials/terms.md#add-terms-to-a-column-of-a-dataset) | ✅ | +| Add Terms to a Dataset | ✅ [[Guide]](/docs/api/tutorials/terms.md#add-terms-to-a-dataset) | ✅ [[Guide]](/docs/api/tutorials/terms.md#add-terms-to-a-dataset) | ✅ | +| Create Domains | ✅ [[Guide]](/docs/api/tutorials/domains.md#create-domain) | ✅ [[Guide]](/docs/api/tutorials/domains.md#create-domain) | ✅ | +| Read Domains | ✅ [[Guide]](/docs/api/tutorials/domains.md#read-domains) | ✅ [[Guide]](/docs/api/tutorials/domains.md#read-domains) | ✅ | +| Add Domains to a Dataset | ✅ [[Guide]](/docs/api/tutorials/domains.md#add-domains) | ✅ [[Guide]](/docs/api/tutorials/domains.md#add-domains) | ✅ | +| Remove Domains from a Dataset | ✅ [[Guide]](/docs/api/tutorials/domains.md#remove-domains) | ✅ [[Guide]](/docs/api/tutorials/domains.md#remove-domains) | ✅ | +| Create / Upsert Users | ✅ [[Guide]](/docs/api/tutorials/owners.md#upsert-users) | ✅ [[Guide]](/docs/api/tutorials/owners.md#upsert-users) | ✅ | +| Create / Upsert Group | ✅ [[Guide]](/docs/api/tutorials/owners.md#upsert-group) | ✅ [[Guide]](/docs/api/tutorials/owners.md#upsert-group) | ✅ | +| Read Owners of a Dataset | ✅ [[Guide]](/docs/api/tutorials/owners.md#read-owners) | ✅ [[Guide]](/docs/api/tutorials/owners.md#read-owners) | ✅ | +| Add Owner to a Dataset | ✅ [[Guide]](/docs/api/tutorials/owners.md#add-owners) | ✅ [[Guide]](/docs/api/tutorials/owners.md#add-owners#remove-owners) | ✅ | +| Remove Owner from a Dataset | ✅ [[Guide]](/docs/api/tutorials/owners.md#remove-owners) | ✅ [[Guide]](/docs/api/tutorials/owners.md) | ✅ | +| Add Lineage | ✅ [[Guide]](/docs/api/tutorials/lineage.md) | ✅ [[Guide]](/docs/api/tutorials/lineage.md#add-lineage) | ✅ | +| Add Column Level (Fine Grained) Lineage | 🚫 | ✅ [[Guide]](docs/api/tutorials/lineage.md#add-column-level-lineage) | ✅ | +| Add Documentation (Description) to a Column of a Dataset | ✅ [[Guide]](/docs/api/tutorials/descriptions.md#add-description-on-column) | ✅ [[Guide]](/docs/api/tutorials/descriptions.md#add-description-on-column) | ✅ | +| Add Documentation (Description) to a Dataset | ✅ [[Guide]](/docs/api/tutorials/descriptions.md#add-description-on-dataset) | ✅ [[Guide]](/docs/api/tutorials/descriptions.md#add-description-on-dataset) | ✅ | +| Add / Remove / Replace Custom Properties on a Dataset | 🚫 | ✅ [[Guide]](/docs/api/tutorials/custom-properties.md) | ✅ | +| Add ML Feature to ML Feature Table | 🚫 | ✅ [[Guide]](/docs/api/tutorials/ml.md#add-mlfeature-to-mlfeaturetable) | ✅ | +| Add ML Feature to MLModel | 🚫 | ✅ [[Guide]](/docs/api/tutorials/ml.md#add-mlfeature-to-mlmodel) | ✅ | +| Add ML Group to MLFeatureTable | 🚫 | ✅ [[Guide]](/docs/api/tutorials/ml.md#add-mlgroup-to-mlfeaturetable) | ✅ | +| Create MLFeature | 🚫 | ✅ [[Guide]](/docs/api/tutorials/ml.md#create-mlfeature) | ✅ | +| Create MLFeatureTable | 🚫 | ✅ [[Guide]](/docs/api/tutorials/ml.md#create-mlfeaturetable) | ✅ | +| Create MLModel | 🚫 | ✅ [[Guide]](/docs/api/tutorials/ml.md#create-mlmodel) | ✅ | +| Create MLModelGroup | 🚫 | ✅ [[Guide]](/docs/api/tutorials/ml.md#create-mlmodelgroup) | ✅ | +| Create MLPrimaryKey | 🚫 | ✅ [[Guide]](/docs/api/tutorials/ml.md#create-mlprimarykey) | ✅ | +| Create MLFeatureTable | 🚫 | ✅ [[Guide]](/docs/api/tutorials/ml.md#create-mlfeaturetable) | ✅ | +| Read MLFeature | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlfeature) | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlfeature) | ✅ | +| Read MLFeatureTable | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlfeaturetable) | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlfeaturetable) | ✅ | +| Read MLModel | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlmodel) | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlmodel) | ✅ | +| Read MLModelGroup | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlmodelgroup) | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlmodelgroup) | ✅ | +| Read MLPrimaryKey | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlprimarykey) | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlprimarykey) | ✅ | +| Create Data Product | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/create_dataproduct.py) | ✅ | +| Create Lineage Between Chart and Dashboard | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_chart_dashboard.py) | ✅ | +| Create Lineage Between Dataset and Chart | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_dataset_chart.py) | ✅ | +| Create Lineage Between Dataset and DataJob | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_dataset_job_dataset.py) | ✅ | +| Create Finegrained Lineage as DataJob for Dataset | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_emitter_datajob_finegrained.py) | ✅ | +| Create Finegrained Lineage for Dataset | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_emitter_dataset_finegrained.py) | ✅ | +| Create Dataset Lineage with Kafka | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_emitter_kafka.py) | ✅ | +| Create Dataset Lineage with MCPW & Rest Emitter | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_emitter_mcpw_rest.py) | ✅ | +| Create Dataset Lineage with Rest Emitter | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_emitter_rest.py) | ✅ | +| Create DataJob with Dataflow | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_job_dataflow.py) [[Simple]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_job_dataflow_new_api_simple.py) [[Verbose]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_job_dataflow_new_api_verbose.py) | ✅ | +| Create Programmatic Pipeline | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/programatic_pipeline.py) | ✅ | diff --git a/docs/api/tutorials/custom-assertions.md b/docs/api/tutorials/custom-assertions.md index 6544efb8809c2..47975c5739464 100644 --- a/docs/api/tutorials/custom-assertions.md +++ b/docs/api/tutorials/custom-assertions.md @@ -265,7 +265,7 @@ query getAssertion { customType # Will be your custom type. description lastUpdated { - time + time actor } customAssertion { @@ -282,6 +282,18 @@ query getAssertion { } } } + # Fetch what entities have the assertion attached to it + relationships(input: { + types: ["Asserts"] + direction: OUTGOING + }) { + total + relationships { + entity { + urn + } + } + } } } ``` diff --git a/docs/api/tutorials/structured-properties.md b/docs/api/tutorials/structured-properties.md index 00e992f2bd0bb..9b18aa922290b 100644 --- a/docs/api/tutorials/structured-properties.md +++ b/docs/api/tutorials/structured-properties.md @@ -532,6 +532,50 @@ Or you can run the following command to view the properties associated with the datahub dataset get --urn {urn} ``` +## Read Structured Properties From a Dataset + +For reading all structured properties from a dataset: + + + + +```graphql +query getDataset { + dataset(urn: "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.ecommerce.customer,PROD)") { + structuredProperties { + properties { + structuredProperty { + urn + type + definition { + displayName + description + allowedValues { + description + } + } + } + values { + ... on StringValue { + stringValue + } + ... on NumberValue { + numberValue + } + } + valueEntities { + urn + type + } + } + } + } +} +``` + + + + ## Remove Structured Properties From a Dataset For removing a structured property or list of structured properties from a dataset: @@ -1733,4 +1777,4 @@ Example Response: ``` - \ No newline at end of file + diff --git a/docs/automations/ai-docs.md b/docs/automations/ai-docs.md new file mode 100644 index 0000000000000..bbec33f3bcae6 --- /dev/null +++ b/docs/automations/ai-docs.md @@ -0,0 +1,36 @@ +import FeatureAvailability from '@site/src/components/FeatureAvailability'; + +# AI Documentation + + + +:::info + +This feature is currently in closed beta. Reach out to your Acryl representative to get access. + +::: + +With AI-powered documentation, you can automatically generate documentation for tables and columns. + +

+ +

+ +## Configuring + +No configuration is required - just hit "Generate" on any table or column in the UI. + +## How it works + +Generating good documentation requires a holistic understanding of the data. Information we take into account includes, but is not limited to: + +- Dataset name and any existing documentation +- Column name, type, description, and sample values +- Lineage relationships to upstream and downstream assets +- Metadata about other related assets + +Data privacy: Your metadata is not sent to any third-party LLMs. We use AWS Bedrock internally, which means all metadata remains within the Acryl AWS account. We do not fine-tune on customer data. + +## Limitations + +- This feature is powered by an LLM, which can produce inaccurate results. While we've taken steps to reduce the likelihood of hallucinations, they can still occur. diff --git a/docs/automations/ai-term-suggestion.md b/docs/automations/ai-term-suggestion.md new file mode 100644 index 0000000000000..27d1716cfc372 --- /dev/null +++ b/docs/automations/ai-term-suggestion.md @@ -0,0 +1,72 @@ +import FeatureAvailability from '@site/src/components/FeatureAvailability'; + +# AI Glossary Term Suggestions + + + +:::info + +This feature is currently in closed beta. Reach out to your Acryl representative to get access. + +::: + +The AI Glossary Term Suggestion automation uses LLMs to suggest [Glossary Terms](../glossary/business-glossary.md) for tables and columns in your data. + +This is useful for improving coverage of glossary terms across your organization, which is important for compliance and governance efforts. + +This automation can: + +- Automatically suggests glossary terms for tables and columns. +- Goes beyond a predefined set of terms and works with your business glossary. +- Generates [proposals](../managed-datahub/approval-workflows.md) for owners to review, or can automatically add terms to tables/columns. +- Automatically adjusts to human-provided feedback and curation (coming soon). + +## Prerequisites + +- A business glossary with terms defined. Additional metadata, like documentation and existing term assignments, will improve the accuracy of our suggestions. + +## Configuring + +1. **Navigate to Automations**: Click on 'Govern' > 'Automations' in the navigation bar. + +

+ +

+ +2. **Create the Automation**: Click on 'Create' and select 'AI Glossary Term Suggestions'. + +

+ +

+ +3. **Configure the Automation**: Fill in the required fields to configure the automation. + The main fields to configure are (1) what terms to use for suggestions and (2) what entities to generate suggestions for. + +

+ +

+ +4. Once it's enabled, that's it! You'll start to see terms show up in the UI, either on assets or in the proposals page. + +

+ +

+ +## How it works + +The automation will scan through all the datasets matched by the configured filters. For each one, it will generate suggestions. +If new entities are added that match the configured filters, those will also be classified within 24 hours. + +We take into account the following metadata when generating suggestions: + +- Dataset name and description +- Column name, type, description, and sample values +- Glossary term name, documentation, and hierarchy +- Feedback loop: existing assignments and accepted/rejected proposals (coming soon) + +Data privacy: Your metadata is not sent to any third-party LLMs. We use AWS Bedrock internally, which means all metadata remains within the Acryl AWS account. We do not fine-tune on customer data. + +## Limitations + +- A single configured automation can classify at most 10k entities. +- We cannot do partial reclassification. If you add a new column to an existing table, we won't regenerate suggestions for that table. diff --git a/docs/automations/snowflake-tag-propagation.md b/docs/automations/snowflake-tag-propagation.md index bdc80376dfb48..c708e40cbdd81 100644 --- a/docs/automations/snowflake-tag-propagation.md +++ b/docs/automations/snowflake-tag-propagation.md @@ -1,4 +1,3 @@ - import FeatureAvailability from '@site/src/components/FeatureAvailability'; # Snowflake Tag Propagation Automation @@ -20,22 +19,22 @@ both columns and tables back to Snowflake. This automation is available in DataH 1. **Navigate to Automations**: Click on 'Govern' > 'Automations' in the navigation bar. -

- +

+

2. **Create An Automation**: Click on 'Create' and select 'Snowflake Tag Propagation'. -

- +

+

-3. **Configure Automation**: Fill in the required fields to connect to Snowflake, along with the name, description, and category. -Note that you can limit propagation based on specific Tags and Glossary Terms. If none are selected, then ALL Tags or Glossary Terms will be automatically -propagated to Snowflake tables and columns. Finally, click 'Save and Run' to start the automation +3. **Configure Automation**: Fill in the required fields to connect to Snowflake, along with the name, description, and category. + Note that you can limit propagation based on specific Tags and Glossary Terms. If none are selected, then ALL Tags or Glossary Terms will be automatically + propagated to Snowflake tables and columns. Finally, click 'Save and Run' to start the automation -

- +

+

## Propagating for Existing Assets @@ -46,13 +45,13 @@ Note that it may take some time to complete the initial back-filling process, de To do so, navigate to the Automation you created in Step 3 above, click the 3-dot "More" menu

- +

and then click "Initialize".

- +

This one-time step will kick off the back-filling process for existing descriptions. If you only want to begin propagating @@ -68,21 +67,21 @@ that you no longer want propagated descriptions to be visible. To do this, navigate to the Automation you created in Step 3 above, click the 3-dot "More" menu

- +

and then click "Rollback".

- +

This one-time step will remove all propagated tags and glossary terms from Snowflake. To simply stop propagating new tags, you can disable the automation. ## Viewing Propagated Tags -You can view propagated Tags (and corresponding DataHub URNs) inside the Snowflake UI to confirm the automation is working as expected. +You can view propagated Tags (and corresponding DataHub URNs) inside the Snowflake UI to confirm the automation is working as expected. -

- +

+

diff --git a/docs/lineage/airflow.md b/docs/lineage/airflow.md index aca6d30619ea8..35f2ff862e695 100644 --- a/docs/lineage/airflow.md +++ b/docs/lineage/airflow.md @@ -132,7 +132,7 @@ conn_id = datahub_rest_default # or datahub_kafka_default ``` | Name | Default value | Description | -| -------------------------- | -------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +|----------------------------|----------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | enabled | true | If the plugin should be enabled. | | conn_id | datahub_rest_default | The name of the datahub connection you set in step 1. | | cluster | prod | name of the airflow cluster | @@ -145,6 +145,7 @@ conn_id = datahub_rest_default # or datahub_kafka_default | datajob_url_link | taskinstance | If taskinstance, the datajob url will be taskinstance link on airflow. It can also be grid. | | | | graceful_exceptions | true | If set to true, most runtime errors in the lineage backend will be suppressed and will not cause the overall task to fail. Note that configuration issues will still throw exceptions. | +| dag_filter_str | { "allow": [".*"] } | AllowDenyPattern value in form of JSON string to filter the DAGs from running. | #### Validate that the plugin is working diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_config.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_config.py index 8deba22a107ce..c4964712cf9f7 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_config.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_config.py @@ -3,7 +3,8 @@ import datahub.emitter.mce_builder as builder from airflow.configuration import conf -from datahub.configuration.common import ConfigModel +from datahub.configuration.common import AllowDenyPattern, ConfigModel +from pydantic.fields import Field if TYPE_CHECKING: from datahub_airflow_plugin.hooks.datahub import DatahubGenericHook @@ -56,6 +57,11 @@ class DatahubLineageConfig(ConfigModel): # Makes extraction of jinja-templated fields more accurate. render_templates: bool = True + dag_filter_pattern: AllowDenyPattern = Field( + default=AllowDenyPattern.allow_all(), + description="regex patterns for DAGs to ingest", + ) + log_level: Optional[str] = None debug_emitter: bool = False @@ -93,6 +99,9 @@ def get_lineage_config() -> DatahubLineageConfig: datajob_url_link = conf.get( "datahub", "datajob_url_link", fallback=DatajobUrl.TASKINSTANCE.value ) + dag_filter_pattern = AllowDenyPattern.parse_raw( + conf.get("datahub", "dag_filter_str", fallback='{"allow": [".*"]}') + ) return DatahubLineageConfig( enabled=enabled, @@ -109,4 +118,5 @@ def get_lineage_config() -> DatahubLineageConfig: disable_openlineage_plugin=disable_openlineage_plugin, datajob_url_link=datajob_url_link, render_templates=render_templates, + dag_filter_pattern=dag_filter_pattern, ) diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py index b818b76de9f7f..c1d5b306f187d 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py @@ -383,9 +383,13 @@ def on_task_instance_running( return logger.debug( - f"DataHub listener got notification about task instance start for {task_instance.task_id}" + f"DataHub listener got notification about task instance start for {task_instance.task_id} of dag {task_instance.dag_id}" ) + if not self.config.dag_filter_pattern.allowed(task_instance.dag_id): + logger.debug(f"DAG {task_instance.dag_id} is not allowed by the pattern") + return + if self.config.render_templates: task_instance = _render_templates(task_instance) @@ -492,6 +496,10 @@ def on_task_instance_finish( dag: "DAG" = task.dag # type: ignore[assignment] + if not self.config.dag_filter_pattern.allowed(dag.dag_id): + logger.debug(f"DAG {dag.dag_id} is not allowed by the pattern") + return + datajob = AirflowGenerator.generate_datajob( cluster=self.config.cluster, task=task, @@ -689,8 +697,12 @@ def on_dag_run_running(self, dag_run: "DagRun", msg: str) -> None: f"DataHub listener got notification about dag run start for {dag_run.dag_id}" ) - self.on_dag_start(dag_run) + assert dag_run.dag_id + if not self.config.dag_filter_pattern.allowed(dag_run.dag_id): + logger.debug(f"DAG {dag_run.dag_id} is not allowed by the pattern") + return + self.on_dag_start(dag_run) self.emitter.flush() # TODO: Add hooks for on_dag_run_success, on_dag_run_failed -> call AirflowGenerator.complete_dataflow diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/dag_to_skip.py b/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/dag_to_skip.py new file mode 100644 index 0000000000000..a805a2219d142 --- /dev/null +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/dag_to_skip.py @@ -0,0 +1,34 @@ +from datetime import datetime + +from airflow import DAG +from airflow.operators.bash import BashOperator + +from datahub_airflow_plugin.entities import Dataset, Urn + +with DAG( + "dag_to_skip", + start_date=datetime(2023, 1, 1), + schedule_interval=None, + catchup=False, +) as dag: + task1 = BashOperator( + task_id="dag_to_skip_task_1", + dag=dag, + bash_command="echo 'dag_to_skip_task_1'", + inlets=[ + Dataset(platform="snowflake", name="mydb.schema.tableA"), + Urn( + "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)" + ), + Urn("urn:li:dataJob:(urn:li:dataFlow:(airflow,test_dag,PROD),test_task)"), + ], + outlets=[Dataset("snowflake", "mydb.schema.tableD")], + ) + + task2 = BashOperator( + task_id="dag_to_skip_task_2", + dag=dag, + bash_command="echo 'dag_to_skip_task_2'", + ) + + task1 >> task2 diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py b/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py index 37cd3b792d535..44efd94f834b1 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py @@ -33,6 +33,8 @@ DAGS_FOLDER = pathlib.Path(__file__).parent / "dags" GOLDENS_FOLDER = pathlib.Path(__file__).parent / "goldens" +DAG_TO_SKIP_INGESTION = "dag_to_skip" + @dataclasses.dataclass class AirflowInstance: @@ -140,6 +142,7 @@ def _run_airflow( # Configure the datahub plugin and have it write the MCPs to a file. "AIRFLOW__CORE__LAZY_LOAD_PLUGINS": "False" if is_v1 else "True", "AIRFLOW__DATAHUB__CONN_ID": datahub_connection_name, + "AIRFLOW__DATAHUB__DAG_FILTER_STR": f'{{ "deny": ["{DAG_TO_SKIP_INGESTION}"] }}', f"AIRFLOW_CONN_{datahub_connection_name.upper()}": Connection( conn_id="datahub_file_default", conn_type="datahub-file", @@ -276,6 +279,7 @@ class DagTestCase: test_cases = [ DagTestCase("simple_dag"), DagTestCase("basic_iolets"), + DagTestCase("dag_to_skip", v2_only=True), DagTestCase("snowflake_operator", success=False, v2_only=True), DagTestCase("sqlite_operator", v2_only=True), DagTestCase("custom_operator_dag", v2_only=True), @@ -373,20 +377,24 @@ def test_airflow_plugin( print("Sleeping for a few seconds to let the plugin finish...") time.sleep(10) - _sanitize_output_file(airflow_instance.metadata_file) - - check_golden_file( - pytestconfig=pytestconfig, - output_path=airflow_instance.metadata_file, - golden_path=golden_path, - ignore_paths=[ - # TODO: If we switched to Git urls, maybe we could get this to work consistently. - r"root\[\d+\]\['aspect'\]\['json'\]\['customProperties'\]\['datahub_sql_parser_error'\]", - r"root\[\d+\]\['aspect'\]\['json'\]\['customProperties'\]\['openlineage_.*'\]", - r"root\[\d+\]\['aspect'\]\['json'\]\['customProperties'\]\['log_url'\]", - r"root\[\d+\]\['aspect'\]\['json'\]\['externalUrl'\]", - ], - ) + if dag_id == DAG_TO_SKIP_INGESTION: + # Verify that no MCPs were generated. + assert not os.path.exists(airflow_instance.metadata_file) + else: + _sanitize_output_file(airflow_instance.metadata_file) + + check_golden_file( + pytestconfig=pytestconfig, + output_path=airflow_instance.metadata_file, + golden_path=golden_path, + ignore_paths=[ + # TODO: If we switched to Git urls, maybe we could get this to work consistently. + r"root\[\d+\]\['aspect'\]\['json'\]\['customProperties'\]\['datahub_sql_parser_error'\]", + r"root\[\d+\]\['aspect'\]\['json'\]\['customProperties'\]\['openlineage_.*'\]", + r"root\[\d+\]\['aspect'\]\['json'\]\['customProperties'\]\['log_url'\]", + r"root\[\d+\]\['aspect'\]\['json'\]\['externalUrl'\]", + ], + ) def _sanitize_output_file(output_path: pathlib.Path) -> None: diff --git a/metadata-ingestion/docs/sources/powerbi/powerbi_pre.md b/metadata-ingestion/docs/sources/powerbi/powerbi_pre.md index b581e5fc8f70d..f2745d5e77f49 100644 --- a/metadata-ingestion/docs/sources/powerbi/powerbi_pre.md +++ b/metadata-ingestion/docs/sources/powerbi/powerbi_pre.md @@ -18,9 +18,11 @@ | `Report.webUrl` | `Chart.externalUrl` | | `Workspace` | `Container` | | `Report` | `Dashboard` | +| `PaginatedReport` | `Dashboard` | | `Page` | `Chart` | -If Tile is created from report then Chart.externalUrl is set to Report.webUrl. +- If `Tile` is created from report then `Chart.externalUrl` is set to Report.webUrl. +- The `Page` is unavailable for PowerBI PaginatedReport. ## Lineage diff --git a/metadata-ingestion/src/datahub/api/entities/platformresource/platform_resource.py b/metadata-ingestion/src/datahub/api/entities/platformresource/platform_resource.py index 2b730ccb86f51..1556a67a9e555 100644 --- a/metadata-ingestion/src/datahub/api/entities/platformresource/platform_resource.py +++ b/metadata-ingestion/src/datahub/api/entities/platformresource/platform_resource.py @@ -186,10 +186,17 @@ def to_datahub(self, graph_client: DataHubGraph) -> None: def from_datahub( cls, graph_client: DataHubGraph, key: Union[PlatformResourceKey, str] ) -> Optional["PlatformResource"]: + """ + Fetches a PlatformResource from the graph given a key. + Key can be either a PlatformResourceKey object or an urn string. + Returns None if the resource is not found. + """ if isinstance(key, PlatformResourceKey): urn = PlatformResourceUrn(id=key.id) else: urn = PlatformResourceUrn.from_string(key) + if not graph_client.exists(str(urn)): + return None platform_resource = graph_client.get_entity_semityped(str(urn)) return cls( id=urn.id, diff --git a/metadata-ingestion/src/datahub/ingestion/api/source.py b/metadata-ingestion/src/datahub/ingestion/api/source.py index 85ae17ddf6529..586b1c610dc75 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/source.py +++ b/metadata-ingestion/src/datahub/ingestion/api/source.py @@ -37,6 +37,7 @@ from datahub.ingestion.api.source_helpers import ( auto_browse_path_v2, auto_fix_duplicate_schema_field_paths, + auto_fix_empty_field_paths, auto_lowercase_urns, auto_materialize_referenced_tags_terms, auto_status_aspect, @@ -444,6 +445,7 @@ def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: partial( auto_fix_duplicate_schema_field_paths, platform=self._infer_platform() ), + partial(auto_fix_empty_field_paths, platform=self._infer_platform()), browse_path_processor, partial(auto_workunit_reporter, self.get_report()), auto_patch_last_modified, diff --git a/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py b/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py index 372aef707f232..748d8a8e52a79 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py +++ b/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py @@ -394,6 +394,50 @@ def auto_fix_duplicate_schema_field_paths( ) +def auto_fix_empty_field_paths( + stream: Iterable[MetadataWorkUnit], + *, + platform: Optional[str] = None, +) -> Iterable[MetadataWorkUnit]: + """Count schema metadata aspects with empty field paths and emit telemetry.""" + + total_schema_aspects = 0 + schemas_with_empty_fields = 0 + empty_field_paths = 0 + + for wu in stream: + schema_metadata = wu.get_aspect_of_type(SchemaMetadataClass) + if schema_metadata: + total_schema_aspects += 1 + + updated_fields: List[SchemaFieldClass] = [] + for field in schema_metadata.fields: + if field.fieldPath: + updated_fields.append(field) + else: + empty_field_paths += 1 + + if empty_field_paths > 0: + logger.info( + f"Fixing empty field paths in schema aspect for {wu.get_urn()} by dropping empty fields" + ) + schema_metadata.fields = updated_fields + schemas_with_empty_fields += 1 + + yield wu + + if schemas_with_empty_fields > 0: + properties = { + "platform": platform, + "total_schema_aspects": total_schema_aspects, + "schemas_with_empty_fields": schemas_with_empty_fields, + "empty_field_paths": empty_field_paths, + } + telemetry.telemetry_instance.ping( + "ingestion_empty_schema_field_paths", properties + ) + + def auto_empty_dataset_usage_statistics( stream: Iterable[MetadataWorkUnit], *, diff --git a/metadata-ingestion/src/datahub/ingestion/graph/client.py b/metadata-ingestion/src/datahub/ingestion/graph/client.py index b9b0ed556e66c..e8fae6254ae88 100644 --- a/metadata-ingestion/src/datahub/ingestion/graph/client.py +++ b/metadata-ingestion/src/datahub/ingestion/graph/client.py @@ -559,8 +559,10 @@ def get_entity_as_mcps( post_json_obj = post_json_transform(aspect_json) aspect_value = aspect_type.from_obj(post_json_obj["value"]) - system_metadata_raw = post_json_obj["systemMetadata"] - system_metadata = SystemMetadataClass.from_obj(system_metadata_raw) + system_metadata_raw = post_json_obj.get("systemMetadata") + system_metadata = None + if system_metadata_raw: + system_metadata = SystemMetadataClass.from_obj(system_metadata_raw) mcpw = MetadataChangeProposalWrapper( entityUrn=entity_urn, @@ -590,7 +592,7 @@ def get_entity_semityped( not be present in the dictionary. The entity's key aspect will always be present. """ - mcps = self.get_entity_as_mcps(entity_urn, aspects) + mcps = self.get_entity_as_mcps(entity_urn, aspects=aspects) result: AspectBag = {} for mcp in mcps: diff --git a/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py b/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py index 86c1c8db11b05..b6aa8c1f5f1f1 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py +++ b/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py @@ -14,7 +14,6 @@ class DatasetSubTypes(StrEnum): ELASTIC_DATASTREAM = "Datastream" SALESFORCE_CUSTOM_OBJECT = "Custom Object" SALESFORCE_STANDARD_OBJECT = "Object" - POWERBI_DATASET_TABLE = "PowerBI Dataset Table" QLIK_DATASET = "Qlik Dataset" BIGQUERY_TABLE_SNAPSHOT = "Bigquery Table Snapshot" SHARDED_TABLE = "Sharded Table" @@ -48,8 +47,8 @@ class BIContainerSubTypes(StrEnum): LOOKML_PROJECT = "LookML Project" LOOKML_MODEL = "LookML Model" TABLEAU_WORKBOOK = "Workbook" - POWERBI_WORKSPACE = "Workspace" - POWERBI_DATASET = "PowerBI Dataset" + POWERBI_DATASET = "Semantic Model" + POWERBI_DATASET_TABLE = "Table" QLIK_SPACE = "Qlik Space" QLIK_APP = "Qlik App" SIGMA_WORKSPACE = "Sigma Workspace" diff --git a/metadata-ingestion/src/datahub/ingestion/source/delta_lake/source.py b/metadata-ingestion/src/datahub/ingestion/source/delta_lake/source.py index 6a52d8fdd8905..98133ca69011e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/delta_lake/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/delta_lake/source.py @@ -223,15 +223,14 @@ def ingest_table( ) customProperties = { - "number_of_files": str(get_file_count(delta_table)), "partition_columns": str(delta_table.metadata().partition_columns), "table_creation_time": str(delta_table.metadata().created_time), "id": str(delta_table.metadata().id), "version": str(delta_table.version()), "location": self.source_config.complete_path, } - if not self.source_config.require_files: - del customProperties["number_of_files"] # always 0 + if self.source_config.require_files: + customProperties["number_of_files"] = str(get_file_count(delta_table)) dataset_properties = DatasetPropertiesClass( description=delta_table.metadata().description, diff --git a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg.py b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg.py index b5caa83b2ff37..d8c6c03ce81e6 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg.py +++ b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg.py @@ -182,6 +182,7 @@ def _create_iceberg_workunit( custom_properties["snapshot-id"] = str(table.current_snapshot().snapshot_id) custom_properties["manifest-list"] = table.current_snapshot().manifest_list dataset_properties = DatasetPropertiesClass( + name=table.name()[-1], tags=[], description=table.metadata.properties.get("comment", None), customProperties=custom_properties, diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py index 967dd5d81112d..522639a160781 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py @@ -1,7 +1,7 @@ import logging from dataclasses import dataclass, field as dataclass_field from enum import Enum -from typing import Dict, List, Optional, Union +from typing import Dict, List, Literal, Optional, Union import pydantic from pydantic import validator @@ -47,6 +47,7 @@ class Constant: WORKSPACE_ID = "workspaceId" DASHBOARD_ID = "powerbi.linkedin.com/dashboards/{}" DATASET_EXECUTE_QUERIES = "DATASET_EXECUTE_QUERIES_POST" + GET_WORKSPACE_APP = "GET_WORKSPACE_APP" DATASET_ID = "datasetId" REPORT_ID = "reportId" SCAN_ID = "ScanId" @@ -118,6 +119,15 @@ class Constant: CHART_COUNT = "chartCount" WORKSPACE_NAME = "workspaceName" DATASET_WEB_URL = "datasetWebUrl" + TYPE = "type" + REPORT_TYPE = "reportType" + LAST_UPDATE = "lastUpdate" + APP_ID = "appId" + REPORTS = "reports" + ORIGINAL_REPORT_OBJECT_ID = "originalReportObjectId" + APP_SUB_TYPE = "App" + STATE = "state" + ACTIVE = "Active" @dataclass @@ -273,7 +283,8 @@ class PowerBiDashboardSourceConfig( # PowerBi workspace identifier workspace_id_pattern: AllowDenyPattern = pydantic.Field( default=AllowDenyPattern.allow_all(), - description="Regex patterns to filter PowerBI workspaces in ingestion", + description="Regex patterns to filter PowerBI workspaces in ingestion." + " Note: This field works in conjunction with 'workspace_type_filter' and both must be considered when filtering workspaces.", ) # Dataset type mapping PowerBI support many type of data-sources. Here user need to define what type of PowerBI @@ -340,7 +351,7 @@ class PowerBiDashboardSourceConfig( ) modified_since: Optional[str] = pydantic.Field( default=None, - description="Get only recently modified workspaces based on modified_since datetime '2023-02-10T00:00:00.0000000Z', excludePersonalWorkspaces and excludeInActiveWorkspaces limit to last 30 days", + description="Get only recently modified workspaces based on modified_since datetime '2023-02-10T00:00:00.0000000Z', excludeInActiveWorkspaces limit to last 30 days", ) extract_dashboards: bool = pydantic.Field( default=True, @@ -445,6 +456,16 @@ class PowerBiDashboardSourceConfig( description="Patch dashboard metadata", ) + workspace_type_filter: List[ + Literal[ + "Workspace", "PersonalGroup", "Personal", "AdminWorkspace", "AdminInsights" + ] + ] = pydantic.Field( + default=["Workspace"], + description="Ingest the metadata of the workspace where the workspace type corresponds to the specified workspace_type_filter." + " Note: This field works in conjunction with 'workspace_id_pattern'. Both must be matched for a workspace to be processed.", + ) + @root_validator(skip_on_failure=True) def validate_extract_column_level_lineage(cls, values: Dict) -> Dict: flags = [ diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py index 065bbac9e9645..f5c0aedb329cd 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py @@ -34,7 +34,6 @@ from datahub.ingestion.source.common.subtypes import ( BIAssetSubTypes, BIContainerSubTypes, - DatasetSubTypes, ) from datahub.ingestion.source.powerbi.config import ( Constant, @@ -142,9 +141,7 @@ def assets_urn_to_lowercase(self, value): def new_mcp( self, - entity_type, entity_urn, - aspect_name, aspect, change_type=ChangeTypeClass.UPSERT, ): @@ -152,10 +149,8 @@ def new_mcp( Create MCP """ return MetadataChangeProposalWrapper( - entityType=entity_type, changeType=change_type, entityUrn=entity_urn, - aspectName=aspect_name, aspect=aspect, ) @@ -176,9 +171,7 @@ def extract_dataset_schema( ) -> List[MetadataChangeProposalWrapper]: schema_metadata = self.to_datahub_schema(table) schema_mcp = self.new_mcp( - entity_type=Constant.DATASET, entity_urn=ds_urn, - aspect_name=Constant.SCHEMA_METADATA, aspect=schema_metadata, ) return [schema_mcp] @@ -409,9 +402,7 @@ def to_datahub_dataset( viewLanguage="m_query", ) view_prop_mcp = self.new_mcp( - entity_type=Constant.DATASET, entity_urn=ds_urn, - aspect_name=Constant.VIEW_PROPERTIES, aspect=view_properties, ) dataset_mcps.extend([view_prop_mcp]) @@ -425,30 +416,23 @@ def to_datahub_dataset( ) info_mcp = self.new_mcp( - entity_type=Constant.DATASET, entity_urn=ds_urn, - aspect_name=Constant.DATASET_PROPERTIES, aspect=ds_properties, ) # Remove status mcp status_mcp = self.new_mcp( - entity_type=Constant.DATASET, entity_urn=ds_urn, - aspect_name=Constant.STATUS, aspect=StatusClass(removed=False), ) if self.__config.extract_dataset_schema: dataset_mcps.extend(self.extract_dataset_schema(table, ds_urn)) subtype_mcp = self.new_mcp( - entity_type=Constant.DATASET, entity_urn=ds_urn, - aspect_name=Constant.SUBTYPES, aspect=SubTypesClass( typeNames=[ - DatasetSubTypes.POWERBI_DATASET_TABLE, - DatasetSubTypes.VIEW, + BIContainerSubTypes.POWERBI_DATASET_TABLE, ] ), ) @@ -464,9 +448,7 @@ def to_datahub_dataset( # Dashboard owner MCP ownership = OwnershipClass(owners=[owner_class]) owner_mcp = self.new_mcp( - entity_type=Constant.DATASET, entity_urn=ds_urn, - aspect_name=Constant.OWNERSHIP, aspect=ownership, ) dataset_mcps.extend([owner_mcp]) @@ -606,17 +588,13 @@ def tile_custom_properties(tile: powerbi_data_classes.Tile) -> dict: ) info_mcp = self.new_mcp( - entity_type=Constant.CHART, entity_urn=chart_urn, - aspect_name=Constant.CHART_INFO, aspect=chart_info_instance, ) # removed status mcp status_mcp = self.new_mcp( - entity_type=Constant.CHART, entity_urn=chart_urn, - aspect_name=Constant.STATUS, aspect=StatusClass(removed=False), ) @@ -633,18 +611,14 @@ def tile_custom_properties(tile: powerbi_data_classes.Tile) -> dict: # Explicitly emitting this aspect isn't necessary, but we do it here to ensure that # the old, bad data gets overwritten. chart_key_mcp = self.new_mcp( - entity_type=Constant.CHART, entity_urn=chart_urn, - aspect_name=Constant.CHART_KEY, aspect=ChartUrn.from_string(chart_urn).to_key_aspect(), ) # Browse path browse_path = BrowsePathsClass(paths=[f"/powerbi/{workspace.name}"]) browse_path_mcp = self.new_mcp( - entity_type=Constant.CHART, entity_urn=chart_urn, - aspect_name=Constant.BROWSERPATH, aspect=browse_path, ) result_mcps = [ @@ -710,17 +684,13 @@ def chart_custom_properties(dashboard: powerbi_data_classes.Dashboard) -> dict: ) info_mcp = self.new_mcp( - entity_type=Constant.DASHBOARD, entity_urn=dashboard_urn, - aspect_name=Constant.DASHBOARD_INFO, aspect=dashboard_info_cls, ) # removed status mcp removed_status_mcp = self.new_mcp( - entity_type=Constant.DASHBOARD, entity_urn=dashboard_urn, - aspect_name=Constant.STATUS, aspect=StatusClass(removed=False), ) @@ -732,9 +702,7 @@ def chart_custom_properties(dashboard: powerbi_data_classes.Dashboard) -> dict: # Dashboard key dashboard_key_mcp = self.new_mcp( - entity_type=Constant.DASHBOARD, entity_urn=dashboard_urn, - aspect_name=Constant.DASHBOARD_KEY, aspect=dashboard_key_cls, ) @@ -750,9 +718,7 @@ def chart_custom_properties(dashboard: powerbi_data_classes.Dashboard) -> dict: # Dashboard owner MCP ownership = OwnershipClass(owners=owners) owner_mcp = self.new_mcp( - entity_type=Constant.DASHBOARD, entity_urn=dashboard_urn, - aspect_name=Constant.OWNERSHIP, aspect=ownership, ) @@ -761,9 +727,7 @@ def chart_custom_properties(dashboard: powerbi_data_classes.Dashboard) -> dict: paths=[f"/{Constant.PLATFORM_NAME}/{dashboard.workspace_name}"] ) browse_path_mcp = self.new_mcp( - entity_type=Constant.DASHBOARD, entity_urn=dashboard_urn, - aspect_name=Constant.BROWSERPATH, aspect=browse_path, ) @@ -827,7 +791,7 @@ def generate_container_for_workspace( container_work_units = gen_containers( container_key=self.workspace_key, name=workspace.name, - sub_types=[BIContainerSubTypes.POWERBI_WORKSPACE], + sub_types=[workspace.type], ) return container_work_units @@ -858,9 +822,7 @@ def append_tag_mcp( ) -> None: if self.__config.extract_endorsements_to_tags and tags: tags_mcp = self.new_mcp( - entity_type=entity_type, entity_urn=entity_urn, - aspect_name=Constant.GLOBAL_TAGS, aspect=self.transform_tags(tags), ) list_of_mcps.append(tags_mcp) @@ -883,9 +845,7 @@ def to_datahub_user( user_key = CorpUserKeyClass(username=user.id) user_key_mcp = self.new_mcp( - entity_type=Constant.CORP_USER, entity_urn=user_urn, - aspect_name=Constant.CORP_USER_KEY, aspect=user_key, ) @@ -1028,17 +988,13 @@ def to_chart_mcps( ) info_mcp = self.new_mcp( - entity_type=Constant.CHART, entity_urn=chart_urn, - aspect_name=Constant.CHART_INFO, aspect=chart_info_instance, ) # removed status mcp status_mcp = self.new_mcp( - entity_type=Constant.CHART, entity_urn=chart_urn, - aspect_name=Constant.STATUS, aspect=StatusClass(removed=False), ) # Subtype mcp @@ -1052,9 +1008,7 @@ def to_chart_mcps( # Browse path browse_path = BrowsePathsClass(paths=[f"/powerbi/{workspace.name}"]) browse_path_mcp = self.new_mcp( - entity_type=Constant.CHART, entity_urn=chart_urn, - aspect_name=Constant.BROWSERPATH, aspect=browse_path, ) list_of_mcps = [info_mcp, status_mcp, subtype_mcp, browse_path_mcp] @@ -1105,17 +1059,13 @@ def report_to_dashboard( ) info_mcp = self.new_mcp( - entity_type=Constant.DASHBOARD, entity_urn=dashboard_urn, - aspect_name=Constant.DASHBOARD_INFO, aspect=dashboard_info_cls, ) # removed status mcp removed_status_mcp = self.new_mcp( - entity_type=Constant.DASHBOARD, entity_urn=dashboard_urn, - aspect_name=Constant.STATUS, aspect=StatusClass(removed=False), ) @@ -1127,9 +1077,7 @@ def report_to_dashboard( # Dashboard key dashboard_key_mcp = self.new_mcp( - entity_type=Constant.DASHBOARD, entity_urn=dashboard_urn, - aspect_name=Constant.DASHBOARD_KEY, aspect=dashboard_key_cls, ) # Report Ownership @@ -1144,9 +1092,7 @@ def report_to_dashboard( # Report owner MCP ownership = OwnershipClass(owners=owners) owner_mcp = self.new_mcp( - entity_type=Constant.DASHBOARD, entity_urn=dashboard_urn, - aspect_name=Constant.OWNERSHIP, aspect=ownership, ) @@ -1155,17 +1101,13 @@ def report_to_dashboard( paths=[f"/{Constant.PLATFORM_NAME}/{workspace.name}"] ) browse_path_mcp = self.new_mcp( - entity_type=Constant.DASHBOARD, entity_urn=dashboard_urn, - aspect_name=Constant.BROWSERPATH, aspect=browse_path, ) sub_type_mcp = self.new_mcp( - entity_type=Constant.DASHBOARD, entity_urn=dashboard_urn, - aspect_name=SubTypesClass.ASPECT_NAME, - aspect=SubTypesClass(typeNames=[Constant.REPORT_TYPE_NAME]), + aspect=SubTypesClass(typeNames=[report.type.value]), ) list_of_mcps = [ @@ -1203,7 +1145,7 @@ def report_to_datahub_work_units( logger.debug(f"Converting report={report.name} to datahub dashboard") # Convert user to CorpUser user_mcps = self.to_datahub_users(report.users) - # Convert pages to charts. A report has single dataset and same dataset used in pages to create visualization + # Convert pages to charts. A report has a single dataset and the same dataset used in pages to create visualization ds_mcps = self.to_datahub_dataset(report.dataset, workspace) chart_mcps = self.pages_to_chart(report.pages, workspace, ds_mcps) @@ -1267,7 +1209,10 @@ def __init__(self, config: PowerBiDashboardSourceConfig, ctx: PipelineContext): self.source_config ) try: - self.powerbi_client = PowerBiAPI(self.source_config) + self.powerbi_client = PowerBiAPI( + config=self.source_config, + reporter=self.reporter, + ) except Exception as e: logger.warning(e) exit( @@ -1288,7 +1233,10 @@ def __init__(self, config: PowerBiDashboardSourceConfig, ctx: PipelineContext): def test_connection(config_dict: dict) -> TestConnectionReport: test_report = TestConnectionReport() try: - PowerBiAPI(PowerBiDashboardSourceConfig.parse_obj_allow_extras(config_dict)) + PowerBiAPI( + PowerBiDashboardSourceConfig.parse_obj_allow_extras(config_dict), + PowerBiDashboardSourceReport(), + ) test_report.basic_connectivity = CapabilityReport(capable=True) except Exception as e: test_report.basic_connectivity = CapabilityReport( @@ -1308,6 +1256,7 @@ def get_allowed_workspaces(self) -> List[powerbi_data_classes.Workspace]: workspace for workspace in all_workspaces if self.source_config.workspace_id_pattern.allowed(workspace.id) + and workspace.type in self.source_config.workspace_type_filter ] logger.info(f"Number of workspaces = {len(all_workspaces)}") @@ -1366,8 +1315,9 @@ def get_workspace_workunit( ) for workunit in workspace_workunits: - # Return workunit to Datahub Ingestion framework + # Return workunit to a Datahub Ingestion framework yield workunit + for dashboard in workspace.dashboards: try: # Fetch PowerBi users for dashboards diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py index 5106b9817d351..fb0959ac604c4 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py @@ -41,6 +41,7 @@ class DatasetKey(ContainerKey): class Workspace: id: str name: str + type: str # This is used as a subtype of the Container entity. dashboards: List["Dashboard"] reports: List["Report"] datasets: Dict[str, "PowerBIDataset"] @@ -211,10 +212,16 @@ def __hash__(self): return hash(self.__members()) +class ReportType(Enum): + PaginatedReport = "PaginatedReport" + PowerBIReport = "Report" + + @dataclass class Report: id: str name: str + type: ReportType webUrl: Optional[str] embedUrl: str description: str @@ -259,7 +266,7 @@ class Dashboard: tiles: List["Tile"] users: List["User"] tags: List[str] - webUrl: Optional[str] = None + webUrl: Optional[str] def get_urn_part(self): return f"dashboards.{self.id}" diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py index b190cf065b6e3..d89b9662d12ed 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py @@ -1,9 +1,8 @@ import logging -import math from abc import ABC, abstractmethod from datetime import datetime, timedelta from time import sleep -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, Iterator, List, Optional, Union import msal import requests @@ -21,6 +20,7 @@ Page, PowerBIDataset, Report, + ReportType, Table, Tile, User, @@ -57,7 +57,8 @@ def is_http_failure(response: Response, message: str) -> bool: class DataResolverBase(ABC): SCOPE: str = "https://analysis.windows.net/powerbi/api/.default" - BASE_URL: str = "https://api.powerbi.com/v1.0/myorg/groups" + MY_ORG_URL = "https://api.powerbi.com/v1.0/myorg" + BASE_URL: str = f"{MY_ORG_URL}/groups" ADMIN_BASE_URL: str = "https://api.powerbi.com/v1.0/myorg/admin" AUTHORITY: str = "https://login.microsoftonline.com/" TOP: int = 1000 @@ -222,49 +223,27 @@ def get_dashboards(self, workspace: Workspace) -> List[Dashboard]: tags=[], ) for instance in dashboards_dict - if instance is not None + if ( + instance is not None + and Constant.APP_ID + not in instance # As we add dashboards to the App, Power BI starts + # providing duplicate dashboard information, + # where the duplicate includes an AppId, while the original dashboard does not. + ) ] return dashboards - def get_groups(self) -> List[dict]: + def get_groups(self, filter_: Dict) -> List[dict]: group_endpoint = self.get_groups_endpoint() - params: dict = {"$top": self.TOP, "$skip": 0, "$filter": "type eq 'Workspace'"} - - def fetch_page(page_number: int) -> dict: - params["$skip"] = self.TOP * page_number - logger.debug(f"Query parameters = {params}") - response = self._request_session.get( - group_endpoint, - headers=self.get_authorization_header(), - params=params, - ) - response.raise_for_status() - return response.json() - # Hit PowerBi - logger.debug(f"Request to groups endpoint URL={group_endpoint}") - zeroth_page = fetch_page(0) - logger.debug(f"Page 0 = {zeroth_page}") - if zeroth_page.get(Constant.ODATA_COUNT) is None: - logger.warning( - "@odata.count field is not present in response. Unable to fetch workspaces." - ) - return [] + output: List[dict] = [] - number_of_items = zeroth_page[Constant.ODATA_COUNT] - number_of_pages = math.ceil(number_of_items / self.TOP) - output: List[dict] = zeroth_page[Constant.VALUE] - for page in range( - 1, number_of_pages - ): # start from 1 as 0th index already fetched - page_response = fetch_page(page) - if len(page_response[Constant.VALUE]) == 0: - break - - logger.debug(f"Page {page} = {zeroth_page}") - - output.extend(page_response[Constant.VALUE]) + for page in self.itr_pages( + endpoint=group_endpoint, + parameter_override=filter_, + ): + output.extend(page) return output @@ -286,13 +265,14 @@ def fetch_reports(): ) response.raise_for_status() response_dict = response.json() - logger.debug(f"Request response = {response_dict}") + logger.debug(f"Report Request response = {response_dict}") return response_dict.get(Constant.VALUE, []) reports: List[Report] = [ Report( id=raw_instance.get(Constant.ID), name=raw_instance.get(Constant.NAME), + type=ReportType[raw_instance.get(Constant.REPORT_TYPE)], webUrl=raw_instance.get(Constant.WEB_URL), embedUrl=raw_instance.get(Constant.EMBED_URL), description=raw_instance.get(Constant.DESCRIPTION, ""), @@ -304,6 +284,11 @@ def fetch_reports(): dataset=workspace.datasets.get(raw_instance.get(Constant.DATASET_ID)), ) for raw_instance in fetch_reports() + if Constant.APP_ID + not in raw_instance # As we add reports to the App, Power BI starts providing + # duplicate report information, + # where the duplicate includes an AppId, + # while the original report does not. ] return reports @@ -395,6 +380,40 @@ def new_dataset_or_report(tile_instance: Any) -> dict: return tiles + def itr_pages( + self, + endpoint: str, + parameter_override: Dict = {}, + ) -> Iterator[List[Dict]]: + params: dict = { + "$skip": 0, + "$top": self.TOP, + **parameter_override, + } + + page_number: int = 0 + + while True: + params["$skip"] = self.TOP * page_number + response = self._request_session.get( + endpoint, + headers=self.get_authorization_header(), + params=params, + ) + + response.raise_for_status() + + assert ( + Constant.VALUE in response.json() + ), "'value' key is not present in paginated response" + + if not response.json()[Constant.VALUE]: # if it is an empty list then break + break + + yield response.json()[Constant.VALUE] + + page_number += 1 + class RegularAPIResolver(DataResolverBase): # Regular access endpoints @@ -407,6 +426,7 @@ class RegularAPIResolver(DataResolverBase): Constant.REPORT_LIST: "{POWERBI_BASE_URL}/{WORKSPACE_ID}/reports", Constant.PAGE_BY_REPORT: "{POWERBI_BASE_URL}/{WORKSPACE_ID}/reports/{REPORT_ID}/pages", Constant.DATASET_EXECUTE_QUERIES: "{POWERBI_BASE_URL}/{WORKSPACE_ID}/datasets/{DATASET_ID}/executeQueries", + Constant.GET_WORKSPACE_APP: "{MY_ORG_URL}/apps/{APP_ID}", } def get_dataset( @@ -676,6 +696,7 @@ class AdminAPIResolver(DataResolverBase): Constant.ENTITY_USER_LIST: "{POWERBI_ADMIN_BASE_URL}/{ENTITY}/{ENTITY_ID}/users", Constant.DATASET_LIST: "{POWERBI_ADMIN_BASE_URL}/groups/{WORKSPACE_ID}/datasets", Constant.WORKSPACE_MODIFIED_LIST: "{POWERBI_ADMIN_BASE_URL}/workspaces/modified", + Constant.GET_WORKSPACE_APP: "{POWERBI_ADMIN_BASE_URL}/apps", } def create_scan_job(self, workspace_ids: List[str]) -> str: @@ -922,7 +943,7 @@ def _get_pages_by_report(self, workspace: Workspace, report_id: str) -> List[Pag def get_modified_workspaces(self, modified_since: str) -> List[str]: """ - Get list of modified workspaces + Get a list of modified workspaces """ modified_workspaces_endpoint = self.API_ENDPOINTS[ Constant.WORKSPACE_MODIFIED_LIST @@ -930,7 +951,7 @@ def get_modified_workspaces(self, modified_since: str) -> List[str]: POWERBI_ADMIN_BASE_URL=DataResolverBase.ADMIN_BASE_URL, ) parameters: Dict[str, Any] = { - "excludePersonalWorkspaces": True, + "excludePersonalWorkspaces": False, "excludeInActiveWorkspaces": True, "modifiedSince": modified_since, } diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py index a245d4c2b9a35..25e97b158d48b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py @@ -32,8 +32,13 @@ class PowerBiAPI: - def __init__(self, config: PowerBiDashboardSourceConfig) -> None: + def __init__( + self, + config: PowerBiDashboardSourceConfig, + reporter: PowerBiDashboardSourceReport, + ) -> None: self.__config: PowerBiDashboardSourceConfig = config + self.__reporter = reporter self.__regular_api_resolver = RegularAPIResolver( client_id=self.__config.client_id, @@ -182,17 +187,27 @@ def fill_tags() -> None: fill_ownership() fill_tags() - return reports def get_workspaces(self) -> List[Workspace]: + modified_workspace_ids: List[str] = [] + if self.__config.modified_since: - workspaces = self.get_modified_workspaces() - return workspaces + modified_workspace_ids = self.get_modified_workspaces() groups: List[dict] = [] + filter_: Dict[str, str] = {} try: - groups = self._get_resolver().get_groups() + if modified_workspace_ids: + id_filter: List[str] = [] + + for id_ in modified_workspace_ids: + id_filter.append(f"id eq {id_}") + + filter_["$filter"] = " or ".join(id_filter) + + groups = self._get_resolver().get_groups(filter_=filter_) + except: self.log_http_error(message="Unable to fetch list of workspaces") raise # we want this exception to bubble up @@ -201,6 +216,7 @@ def get_workspaces(self) -> List[Workspace]: Workspace( id=workspace[Constant.ID], name=workspace[Constant.NAME], + type=workspace[Constant.TYPE], datasets={}, dashboards=[], reports=[], @@ -213,34 +229,20 @@ def get_workspaces(self) -> List[Workspace]: ] return workspaces - def get_modified_workspaces(self) -> List[Workspace]: - workspaces: List[Workspace] = [] + def get_modified_workspaces(self) -> List[str]: + modified_workspace_ids: List[str] = [] if self.__config.modified_since is None: - return workspaces + return modified_workspace_ids try: modified_workspace_ids = self.__admin_api_resolver.get_modified_workspaces( self.__config.modified_since ) - workspaces = [ - Workspace( - id=workspace_id, - name="", - datasets={}, - dashboards=[], - reports=[], - report_endorsements={}, - dashboard_endorsements={}, - scan_result={}, - independent_datasets=[], - ) - for workspace_id in modified_workspace_ids - ] except: self.log_http_error(message="Unable to fetch list of modified workspaces.") - return workspaces + return modified_workspace_ids def _get_scan_result(self, workspace_ids: List[str]) -> Any: scan_id: Optional[str] = None @@ -389,9 +391,28 @@ def _fill_metadata_from_scan_result( workspaces = [] for workspace_metadata in scan_result["workspaces"]: + if ( + workspace_metadata.get(Constant.STATE) != Constant.ACTIVE + or workspace_metadata.get(Constant.TYPE) + not in self.__config.workspace_type_filter + ): + # if the state is not "Active" then in some state like Not Found, "name" attribute is not present + wrk_identifier: str = ( + workspace_metadata[Constant.NAME] + if workspace_metadata.get(Constant.NAME) + else workspace_metadata.get(Constant.ID) + ) + self.__reporter.info( + title="Skipped Workspace", + message="Workspace was skipped due to the workspace_type_filter", + context=f"workspace={wrk_identifier}", + ) + continue + cur_workspace = Workspace( - id=workspace_metadata["id"], - name=workspace_metadata["name"], + id=workspace_metadata[Constant.ID], + name=workspace_metadata[Constant.NAME], + type=workspace_metadata[Constant.TYPE], datasets={}, dashboards=[], reports=[], @@ -403,7 +424,7 @@ def _fill_metadata_from_scan_result( cur_workspace.scan_result = workspace_metadata cur_workspace.datasets = self._get_workspace_datasets(cur_workspace) - # Fetch endorsements tag if it is enabled from configuration + # Fetch endorsement tag if it is enabled from configuration if self.__config.extract_endorsements_to_tags: cur_workspace.dashboard_endorsements = self._get_dashboard_endorsements( cur_workspace.scan_result diff --git a/metadata-ingestion/src/datahub/ingestion/source/preset.py b/metadata-ingestion/src/datahub/ingestion/source/preset.py index e51520898103d..6f53223e000f1 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/preset.py +++ b/metadata-ingestion/src/datahub/ingestion/source/preset.py @@ -56,7 +56,7 @@ class PresetConfig(SupersetConfig): def remove_trailing_slash(cls, v): return config_clean.remove_trailing_slashes(v) - @root_validator + @root_validator(skip_on_failure=True) def default_display_uri_to_connect_uri(cls, values): base = values.get("display_uri") if base is None: diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage_v2.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage_v2.py index 4b7f710beed08..4df64c80bad8a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage_v2.py @@ -334,19 +334,26 @@ def _process_view_lineage(self, lineage_row: LineageRow) -> None: ) def _process_copy_command(self, lineage_row: LineageRow) -> None: - source = self._lineage_v1._get_sources( + logger.debug(f"Processing COPY command for lineage row: {lineage_row}") + sources = self._lineage_v1._get_sources( lineage_type=LineageCollectorType.COPY, db_name=self.database, source_schema=None, source_table=None, ddl=None, filename=lineage_row.filename, - )[0] + ) + logger.debug(f"Recognized sources: {sources}") + source = sources[0] if not source: + logger.debug("Ignoring command since couldn't recognize proper source") return s3_urn = source[0].urn - + logger.debug(f"Recognized s3 dataset urn: {s3_urn}") if not lineage_row.target_schema or not lineage_row.target_table: + logger.debug( + f"Didn't find target schema (found: {lineage_row.target_schema}) or target table (found: {lineage_row.target_table})" + ) return target = self._make_filtered_target(lineage_row) if not target: diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/query.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/query.py index affbcd00b5107..39370b93b561c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/query.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/query.py @@ -283,6 +283,34 @@ def alter_table_rename_query( AND SYS.query_text ILIKE '%alter table % rename to %' """ + @staticmethod + def list_copy_commands_sql( + db_name: str, start_time: datetime, end_time: datetime + ) -> str: + return """ + select + distinct + "schema" as target_schema, + "table" as target_table, + c.file_name as filename + from + SYS_QUERY_DETAIL as si + join SYS_LOAD_DETAIL as c on + si.query_id = c.query_id + join SVV_TABLE_INFO sti on + sti.table_id = si.table_id + where + database = '{db_name}' + and si.start_time >= '{start_time}' + and si.start_time < '{end_time}' + order by target_schema, target_table, si.start_time asc + """.format( + # We need the original database name for filtering + db_name=db_name, + start_time=start_time.strftime(redshift_datetime_format), + end_time=end_time.strftime(redshift_datetime_format), + ) + @staticmethod def additional_table_metadata_query() -> str: raise NotImplementedError @@ -317,12 +345,6 @@ def list_insert_create_queries_sql( ) -> str: raise NotImplementedError - @staticmethod - def list_copy_commands_sql( - db_name: str, start_time: datetime, end_time: datetime - ) -> str: - raise NotImplementedError - class RedshiftProvisionedQuery(RedshiftCommonQuery): @staticmethod @@ -536,34 +558,6 @@ def list_insert_create_queries_sql( end_time=end_time.strftime(redshift_datetime_format), ) - @staticmethod - def list_copy_commands_sql( - db_name: str, start_time: datetime, end_time: datetime - ) -> str: - return """ - select - distinct - "schema" as target_schema, - "table" as target_table, - filename - from - stl_insert as si - join stl_load_commits as c on - si.query = c.query - join SVV_TABLE_INFO sti on - sti.table_id = tbl - where - database = '{db_name}' - and si.starttime >= '{start_time}' - and si.starttime < '{end_time}' - order by target_schema, target_table, starttime asc - """.format( - # We need the original database name for filtering - db_name=db_name, - start_time=start_time.strftime(redshift_datetime_format), - end_time=end_time.strftime(redshift_datetime_format), - ) - @staticmethod def temp_table_ddl_query(start_time: datetime, end_time: datetime) -> str: start_time_str: str = start_time.strftime(redshift_datetime_format) @@ -941,33 +935,6 @@ def list_insert_create_queries_sql( # when loading from s3 using prefix with a single file it produces 2 lines (for file and just directory) - also # behaves like this when run in the old way - @staticmethod - def list_copy_commands_sql( - db_name: str, start_time: datetime, end_time: datetime - ) -> str: - return """ - select - distinct - "schema" as target_schema, - "table" as target_table, - c.file_name - from - SYS_QUERY_DETAIL as si - join SYS_LOAD_DETAIL as c on - si.query_id = c.query_id - join SVV_TABLE_INFO sti on - sti.table_id = si.table_id - where - database = '{db_name}' - and si.start_time >= '{start_time}' - and si.start_time < '{end_time}' - order by target_schema, target_table, si.start_time asc - """.format( - # We need the original database name for filtering - db_name=db_name, - start_time=start_time.strftime(redshift_datetime_format), - end_time=end_time.strftime(redshift_datetime_format), - ) # handles "create table IF ..." statements wrong probably - "create command" field contains only "create table if" in such cases # also similar happens if for example table name contains special characters quoted with " i.e. "test-table1" diff --git a/metadata-ingestion/src/datahub/ingestion/source/sac/sac.py b/metadata-ingestion/src/datahub/ingestion/source/sac/sac.py index 8309c469f67c5..de0904107b9bb 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sac/sac.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sac/sac.py @@ -329,7 +329,9 @@ def get_resource_workunits( entityUrn=dashboard_urn, aspect=DashboardInfoClass( title=resource.name, - description=resource.description, + description=resource.description + if resource.description is not None + else "", lastModified=ChangeAuditStampsClass( created=AuditStampClass( time=round(resource.created_time.timestamp() * 1000), @@ -559,7 +561,14 @@ def get_sac_connection( retries = 3 backoff_factor = 10 - status_forcelist = (500,) + + # The Resources and Data Import Service APIs of SAP Analytics Cloud can be somewhat unstable, occasionally + # returning HTTP errors for some requests, even though the APIs are generally operational. Therefore, we must + # retry these requests to increase the likelihood that the ingestion is successful. For the same reason we + # should also retry requests that receive a 401 HTTP status; however, this status also legitimately indicates + # that the provided OAuth credentials are invalid or that the OAuth client does not have the correct + # permissions assigned, therefore requests that receive a 401 HTTP status must not be retried. + status_forcelist = (400, 500, 503) retry = Retry( total=retries, @@ -611,7 +620,9 @@ def get_resources(self) -> Iterable[Resource]: entity: pyodata.v2.service.EntityProxy for entity in entities: resource_id: str = entity.resourceId - name: str = entity.name.strip() + name: str = ( + entity.name.strip() if entity.name is not None else entity.resourceId + ) if not self.config.resource_id_pattern.allowed( resource_id @@ -655,8 +666,12 @@ def get_resources(self) -> Iterable[Resource]: ResourceModel( namespace=namespace, model_id=model_id, - name=nav_entity.name.strip(), - description=nav_entity.description.strip(), + name=nav_entity.name.strip() + if nav_entity.name is not None + else f"{namespace}:{model_id}", + description=nav_entity.description.strip() + if nav_entity.description is not None + else None, system_type=nav_entity.systemType, # BW or HANA connection_id=nav_entity.connectionId, external_id=nav_entity.externalId, # query:[][][query] or view:[schema][schema.namespace][view] @@ -678,7 +693,9 @@ def get_resources(self) -> Iterable[Resource]: resource_subtype=entity.resourceSubtype, story_id=entity.storyId, name=name, - description=entity.description.strip(), + description=entity.description.strip() + if entity.description is not None + else None, created_time=entity.createdTime, created_by=created_by, modified_time=entity.modifiedTime, @@ -715,7 +732,11 @@ def get_import_data_model_columns( columns.append( ImportDataModelColumn( name=column["columnName"].strip(), - description=column["descriptionName"].strip(), + description=( + column["descriptionName"].strip() + if column.get("descriptionName") is not None + else None + ), property_type=column["propertyType"], data_type=column["columnDataType"], max_length=column.get("maxLength"), diff --git a/metadata-ingestion/src/datahub/ingestion/source/sac/sac_common.py b/metadata-ingestion/src/datahub/ingestion/source/sac/sac_common.py index 457fda1e06181..2c02b444cea1c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sac/sac_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sac/sac_common.py @@ -8,7 +8,7 @@ class ResourceModel: namespace: str model_id: str name: str - description: str + description: Optional[str] system_type: Optional[str] connection_id: Optional[str] external_id: Optional[str] @@ -22,7 +22,7 @@ class Resource: resource_subtype: str story_id: str name: str - description: str + description: Optional[str] created_time: datetime created_by: Optional[str] modified_time: datetime @@ -36,7 +36,7 @@ class Resource: @dataclass(frozen=True) class ImportDataModelColumn: name: str - description: str + description: Optional[str] property_type: str data_type: str max_length: Optional[int] diff --git a/metadata-ingestion/src/datahub/ingestion/source/superset.py b/metadata-ingestion/src/datahub/ingestion/source/superset.py index 858281f880359..4e40407fba908 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/superset.py +++ b/metadata-ingestion/src/datahub/ingestion/source/superset.py @@ -243,6 +243,8 @@ def get_platform_from_database_id(self, database_id): return "athena" if platform_name == "clickhousedb": return "clickhouse" + if platform_name == "postgresql": + return "postgres" return platform_name @lru_cache(maxsize=None) diff --git a/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py b/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py index bd131bc9eaae4..bd91f5be42e7d 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py +++ b/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py @@ -645,7 +645,9 @@ def add_known_lineage_mapping( upstream_urn: The upstream dataset URN. downstream_urn: The downstream dataset URN. """ - + logger.debug( + f"Adding lineage to the map, downstream: {downstream_urn}, upstream: {upstream_urn}" + ) self.report.num_known_mapping_lineage += 1 # We generate a fake "query" object to hold the lineage. diff --git a/metadata-ingestion/tests/integration/iceberg/docker-compose.yml b/metadata-ingestion/tests/integration/iceberg/docker-compose.yml index 8baae6e8ab636..8a05ac7481fe2 100644 --- a/metadata-ingestion/tests/integration/iceberg/docker-compose.yml +++ b/metadata-ingestion/tests/integration/iceberg/docker-compose.yml @@ -1,5 +1,3 @@ -version: "3" - services: spark-iceberg: image: tabulario/spark-iceberg:3.3.2_1.3.0 diff --git a/metadata-ingestion/tests/integration/iceberg/iceberg_deleted_table_mces_golden.json b/metadata-ingestion/tests/integration/iceberg/iceberg_deleted_table_mces_golden.json index 3321fcac0d73e..4b2afb29ddda8 100644 --- a/metadata-ingestion/tests/integration/iceberg/iceberg_deleted_table_mces_golden.json +++ b/metadata-ingestion/tests/integration/iceberg/iceberg_deleted_table_mces_golden.json @@ -11,6 +11,7 @@ }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "name": "another_taxis", "customProperties": { "owner": "root", "created-at": "2024-06-27T17:29:32.492204247Z", diff --git a/metadata-ingestion/tests/integration/iceberg/iceberg_ingest_mces_golden.json b/metadata-ingestion/tests/integration/iceberg/iceberg_ingest_mces_golden.json index b017b6cd31520..477f719ef9317 100644 --- a/metadata-ingestion/tests/integration/iceberg/iceberg_ingest_mces_golden.json +++ b/metadata-ingestion/tests/integration/iceberg/iceberg_ingest_mces_golden.json @@ -11,6 +11,7 @@ }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "name": "taxis", "customProperties": { "owner": "root", "created-at": "2024-05-22T14:08:04.001538500Z", diff --git a/metadata-ingestion/tests/integration/iceberg/iceberg_profile_mces_golden.json b/metadata-ingestion/tests/integration/iceberg/iceberg_profile_mces_golden.json index 453a79494fa25..6d2ca013d81d0 100644 --- a/metadata-ingestion/tests/integration/iceberg/iceberg_profile_mces_golden.json +++ b/metadata-ingestion/tests/integration/iceberg/iceberg_profile_mces_golden.json @@ -11,6 +11,7 @@ }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "name": "taxis", "customProperties": { "owner": "root", "created-at": "2024-05-22T14:10:22.926080700Z", diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_admin_only.json b/metadata-ingestion/tests/integration/powerbi/golden_test_admin_only.json index fa4bcb8abaa94..5cfa4ec80c643 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_admin_only.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_admin_only.json @@ -63,8 +63,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -138,8 +137,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -238,8 +236,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -338,8 +335,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -446,8 +442,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -546,8 +541,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -646,8 +640,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -746,8 +739,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -846,8 +838,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -1118,11 +1109,12 @@ "entityType": "chart", "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "chartKey", + "aspectName": "subTypes", "aspect": { "json": { - "dashboardTool": "powerbi", - "chartId": "charts.23212598-23b5-4980-87cc-5fc0ecd84385" + "typeNames": [ + "PowerBI Tile" + ] } }, "systemMetadata": { @@ -1135,12 +1127,11 @@ "entityType": "chart", "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "chartKey", "aspect": { "json": { - "typeNames": [ - "PowerBI Tile" - ] + "dashboardTool": "powerbi", + "chartId": "charts.23212598-23b5-4980-87cc-5fc0ecd84385" } }, "systemMetadata": { @@ -1416,8 +1407,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -1491,8 +1481,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -1591,8 +1580,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -1691,8 +1679,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -1799,8 +1786,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -1899,8 +1885,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -1994,14 +1979,19 @@ "json": [ { "op": "add", - "path": "/dashboardUrl", - "value": "https://app.powerbi.com/groups/f089354e-8366-4e18-aea3-4cb4a3a50b48/reports/5b218778-e7a5-4d73-8187-f10824047715" + "path": "/title", + "value": "SalesMarketing" }, { "op": "add", "path": "/description", "value": "Acryl sales marketing report" }, + { + "op": "add", + "path": "/dashboardUrl", + "value": "https://app.powerbi.com/groups/f089354e-8366-4e18-aea3-4cb4a3a50b48/reports/5b218778-e7a5-4d73-8187-f10824047715" + }, { "op": "add", "path": "/lastModified", @@ -2015,11 +2005,6 @@ "actor": "urn:li:corpuser:unknown" } } - }, - { - "op": "add", - "path": "/title", - "value": "SalesMarketing" } ] }, diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_cll.json b/metadata-ingestion/tests/integration/powerbi/golden_test_cll.json index 60b36897ed2e4..66ee60c2eebb3 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_cll.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_cll.json @@ -63,8 +63,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -138,8 +137,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -238,8 +236,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -364,8 +361,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -464,8 +460,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -596,8 +591,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -696,8 +690,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -796,8 +789,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -896,8 +888,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -1465,8 +1456,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_container.json b/metadata-ingestion/tests/integration/powerbi/golden_test_container.json index b43e4a6c2c1c2..e8be3aa9c0ac7 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_container.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_container.json @@ -122,15 +122,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "entityType": "container", + "entityUrn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "materialized": false, - "viewLogic": "dummy", - "viewLanguage": "m_query" + "platform": "urn:li:dataPlatform:powerbi" } }, "systemMetadata": { @@ -140,19 +138,15 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "entityType": "container", + "entityUrn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "subTypes", "aspect": { "json": { - "customProperties": { - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" - }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "public issue_history", - "description": "Library dataset description", - "tags": [] + "typeNames": [ + "Semantic Model" + ] } }, "systemMetadata": { @@ -165,10 +159,10 @@ "entityType": "container", "entityUrn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "container", "aspect": { "json": { - "platform": "urn:li:dataPlatform:powerbi" + "container": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9" } }, "systemMetadata": { @@ -181,11 +175,14 @@ "entityType": "container", "entityUrn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "PowerBI Dataset" + "path": [ + { + "id": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9", + "urn": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9" + } ] } }, @@ -196,18 +193,15 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "viewProperties", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9", - "urn": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9" - } - ] + "materialized": false, + "viewLogic": "dummy", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -217,13 +211,19 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "datasetProperties", "aspect": { "json": { - "container": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9" + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "public issue_history", + "description": "Library dataset description", + "tags": [] } }, "systemMetadata": { @@ -256,8 +256,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -372,8 +371,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -488,8 +486,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -604,8 +601,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -720,8 +716,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -836,8 +831,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -952,8 +946,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -1041,15 +1034,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", + "entityType": "container", + "entityUrn": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "materialized": false, - "viewLogic": "let\n Source = Sql.Database(\"localhost\", \"library\"),\n dbo_book_issue = Source{[Schema=\"dbo\",Item=\"book_issue\"]}[Data]\n in dbo_book_issue", - "viewLanguage": "m_query" + "platform": "urn:li:dataPlatform:powerbi" } }, "systemMetadata": { @@ -1059,19 +1050,15 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", + "entityType": "container", + "entityUrn": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "subTypes", "aspect": { "json": { - "customProperties": { - "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" - }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", - "name": "dbo_book_issue", - "description": "hr pbi test description", - "tags": [] + "typeNames": [ + "Semantic Model" + ] } }, "systemMetadata": { @@ -1084,10 +1071,10 @@ "entityType": "container", "entityUrn": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "container", "aspect": { "json": { - "platform": "urn:li:dataPlatform:powerbi" + "container": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9" } }, "systemMetadata": { @@ -1100,11 +1087,14 @@ "entityType": "container", "entityUrn": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "PowerBI Dataset" + "path": [ + { + "id": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9", + "urn": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9" + } ] } }, @@ -1115,18 +1105,15 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "viewProperties", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9", - "urn": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9" - } - ] + "materialized": false, + "viewLogic": "let\n Source = Sql.Database(\"localhost\", \"library\"),\n dbo_book_issue = Source{[Schema=\"dbo\",Item=\"book_issue\"]}[Data]\n in dbo_book_issue", + "viewLanguage": "m_query" } }, "systemMetadata": { @@ -1136,13 +1123,19 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "datasetProperties", "aspect": { "json": { - "container": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9" + "customProperties": { + "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details", + "name": "dbo_book_issue", + "description": "hr pbi test description", + "tags": [] } }, "systemMetadata": { @@ -1175,8 +1168,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -1291,8 +1283,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -1870,15 +1861,17 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "entityType": "container", + "entityUrn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "containerProperties", "aspect": { "json": { - "materialized": false, - "viewLogic": "dummy", - "viewLanguage": "m_query" + "customProperties": { + "platform": "powerbi", + "dataset": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "name": "library-dataset" } }, "systemMetadata": { @@ -1888,19 +1881,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "entityType": "container", + "entityUrn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "status", "aspect": { "json": { - "customProperties": { - "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" - }, - "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "public issue_history", - "description": "Library dataset description", - "tags": [] + "removed": false } }, "systemMetadata": { @@ -1910,13 +1897,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "entityType": "container", + "entityUrn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "removed": false + "platform": "urn:li:dataPlatform:powerbi" } }, "systemMetadata": { @@ -1926,15 +1913,14 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "entityType": "container", + "entityUrn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Semantic Model" ] } }, @@ -1944,15 +1930,31 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { "json": { "materialized": false, - "viewLogic": "let\n Source = Snowflake.Databases(\"hp123rt5.ap-southeast-2.fakecomputing.com\",\"PBI_TEST_WAREHOUSE_PROD\",[Role=\"PBI_TEST_MEMBER\"]),\n PBI_TEST_Database = Source{[Name=\"PBI_TEST\",Kind=\"Database\"]}[Data],\n TEST_Schema = PBI_TEST_Database{[Name=\"TEST\",Kind=\"Schema\"]}[Data],\n TESTTABLE_Table = TEST_Schema{[Name=\"TESTTABLE\",Kind=\"Table\"]}[Data]\nin\n TESTTABLE_Table", + "viewLogic": "dummy", "viewLanguage": "m_query" } }, @@ -1964,7 +1966,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { @@ -1973,7 +1975,7 @@ "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" }, "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "SNOWFLAKE_TESTTABLE", + "name": "public issue_history", "description": "Library dataset description", "tags": [] } @@ -1986,7 +1988,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2002,14 +2004,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -2021,13 +2022,29 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { "json": { "materialized": false, - "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"SELECT#(lf)concat((UPPER(REPLACE(SELLER,'-',''))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4\", null, [EnableFolding=true]),\n #\"Added Conditional Column\" = Table.AddColumn(Source, \"SME Units ENT\", each if [DEAL_TYPE] = \"SME Unit\" then [UNIT] else 0),\n #\"Added Conditional Column1\" = Table.AddColumn(#\"Added Conditional Column\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" then [UNIT] else 0),\n #\"Removed Columns\" = Table.RemoveColumns(#\"Added Conditional Column1\",{\"Banklink Units\"}),\n #\"Added Custom\" = Table.AddColumn(#\"Removed Columns\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" and [SALES_TYPE] = \"3 - Upsell\"\nthen [UNIT]\n\nelse if [SALES_TYPE] = \"Adjusted BL Migration\"\nthen [UNIT]\n\nelse 0),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"SME Units in $ (*$361)\", each if [DEAL_TYPE] = \"SME Unit\" \nand [SALES_TYPE] <> \"4 - Renewal\"\n then [UNIT] * 361\nelse 0),\n #\"Added Custom2\" = Table.AddColumn(#\"Added Custom1\", \"Banklink in $ (*$148)\", each [Banklink Units] * 148)\nin\n #\"Added Custom2\"", + "viewLogic": "let\n Source = Snowflake.Databases(\"hp123rt5.ap-southeast-2.fakecomputing.com\",\"PBI_TEST_WAREHOUSE_PROD\",[Role=\"PBI_TEST_MEMBER\"]),\n PBI_TEST_Database = Source{[Name=\"PBI_TEST\",Kind=\"Database\"]}[Data],\n TEST_Schema = PBI_TEST_Database{[Name=\"TEST\",Kind=\"Schema\"]}[Data],\n TESTTABLE_Table = TEST_Schema{[Name=\"TESTTABLE\",Kind=\"Table\"]}[Data]\nin\n TESTTABLE_Table", "viewLanguage": "m_query" } }, @@ -2039,7 +2056,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { @@ -2048,7 +2065,7 @@ "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" }, "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "snowflake native-query", + "name": "SNOWFLAKE_TESTTABLE", "description": "Library dataset description", "tags": [] } @@ -2061,7 +2078,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2077,14 +2094,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -2096,13 +2112,29 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { "json": { "materialized": false, - "viewLogic": "let\n Source = GoogleBigQuery.Database([BillingProject = #\"Parameter - Source\"]),\n#\"gcp-project\" = Source{[Name=#\"Parameter - Source\"]}[Data],\nuniversal_Schema = #\"gcp-project\"{[Name=\"universal\",Kind=\"Schema\"]}[Data],\nD_WH_DATE_Table = universal_Schema{[Name=\"D_WH_DATE\",Kind=\"Table\"]}[Data],\n#\"Filtered Rows\" = Table.SelectRows(D_WH_DATE_Table, each [D_DATE] > #datetime(2019, 9, 10, 0, 0, 0)),\n#\"Filtered Rows1\" = Table.SelectRows(#\"Filtered Rows\", each DateTime.IsInPreviousNHours([D_DATE], 87600))\n in \n#\"Filtered Rows1\"", + "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"SELECT#(lf)concat((UPPER(REPLACE(SELLER,'-',''))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4\", null, [EnableFolding=true]),\n #\"Added Conditional Column\" = Table.AddColumn(Source, \"SME Units ENT\", each if [DEAL_TYPE] = \"SME Unit\" then [UNIT] else 0),\n #\"Added Conditional Column1\" = Table.AddColumn(#\"Added Conditional Column\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" then [UNIT] else 0),\n #\"Removed Columns\" = Table.RemoveColumns(#\"Added Conditional Column1\",{\"Banklink Units\"}),\n #\"Added Custom\" = Table.AddColumn(#\"Removed Columns\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" and [SALES_TYPE] = \"3 - Upsell\"\nthen [UNIT]\n\nelse if [SALES_TYPE] = \"Adjusted BL Migration\"\nthen [UNIT]\n\nelse 0),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"SME Units in $ (*$361)\", each if [DEAL_TYPE] = \"SME Unit\" \nand [SALES_TYPE] <> \"4 - Renewal\"\n then [UNIT] * 361\nelse 0),\n #\"Added Custom2\" = Table.AddColumn(#\"Added Custom1\", \"Banklink in $ (*$148)\", each [Banklink Units] * 148)\nin\n #\"Added Custom2\"", "viewLanguage": "m_query" } }, @@ -2114,7 +2146,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { @@ -2123,7 +2155,7 @@ "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" }, "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", - "name": "big-query-with-parameter", + "name": "snowflake native-query", "description": "Library dataset description", "tags": [] } @@ -2136,7 +2168,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2152,14 +2184,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -2169,6 +2200,112 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "let\n Source = GoogleBigQuery.Database([BillingProject = #\"Parameter - Source\"]),\n#\"gcp-project\" = Source{[Name=#\"Parameter - Source\"]}[Data],\nuniversal_Schema = #\"gcp-project\"{[Name=\"universal\",Kind=\"Schema\"]}[Data],\nD_WH_DATE_Table = universal_Schema{[Name=\"D_WH_DATE\",Kind=\"Table\"]}[Data],\n#\"Filtered Rows\" = Table.SelectRows(D_WH_DATE_Table, each [D_DATE] > #datetime(2019, 9, 10, 0, 0, 0)),\n#\"Filtered Rows1\" = Table.SelectRows(#\"Filtered Rows\", each DateTime.IsInPreviousNHours([D_DATE], 87600))\n in \n#\"Filtered Rows1\"", + "viewLanguage": "m_query" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "big-query-with-parameter", + "description": "Library dataset description", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", @@ -2233,8 +2370,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -2244,6 +2380,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", @@ -2308,8 +2460,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -2319,6 +2470,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", @@ -2383,8 +2550,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -2394,6 +2560,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "corpuser", "entityUrn": "urn:li:corpuser:users.User1@foo.com", @@ -2495,6 +2677,24 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,pages.5b218778-e7a5-4d73-8187-f10824047715.ReportSection)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "PowerBI Page" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "chart", "entityUrn": "urn:li:chart:(powerbi,pages.5b218778-e7a5-4d73-8187-f10824047715.ReportSection)", @@ -2619,6 +2819,24 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(powerbi,pages.5b218778-e7a5-4d73-8187-f10824047715.ReportSection1)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "PowerBI Page" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "chart", "entityUrn": "urn:li:chart:(powerbi,pages.5b218778-e7a5-4d73-8187-f10824047715.ReportSection1)", @@ -2692,6 +2910,60 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.5b218778-e7a5-4d73-8187-f10824047715)", + "changeType": "PATCH", + "aspectName": "dashboardInfo", + "aspect": { + "json": [ + { + "op": "add", + "path": "/title", + "value": "SalesMarketing" + }, + { + "op": "add", + "path": "/description", + "value": "Acryl sales marketing report" + }, + { + "op": "add", + "path": "/charts/urn:li:chart:(powerbi,pages.5b218778-e7a5-4d73-8187-f10824047715.ReportSection)", + "value": "urn:li:chart:(powerbi,pages.5b218778-e7a5-4d73-8187-f10824047715.ReportSection)" + }, + { + "op": "add", + "path": "/charts/urn:li:chart:(powerbi,pages.5b218778-e7a5-4d73-8187-f10824047715.ReportSection1)", + "value": "urn:li:chart:(powerbi,pages.5b218778-e7a5-4d73-8187-f10824047715.ReportSection1)" + }, + { + "op": "add", + "path": "/dashboardUrl", + "value": "https://app.powerbi.com/groups/f089354e-8366-4e18-aea3-4cb4a3a50b48/reports/5b218778-e7a5-4d73-8187-f10824047715" + }, + { + "op": "add", + "path": "/lastModified", + "value": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dashboard", "entityUrn": "urn:li:dashboard:(powerbi,reports.5b218778-e7a5-4d73-8187-f10824047715)", @@ -2774,15 +3046,13 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,pages.5b218778-e7a5-4d73-8187-f10824047715.ReportSection)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.5b218778-e7a5-4d73-8187-f10824047715)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "container", "aspect": { "json": { - "typeNames": [ - "PowerBI Page" - ] + "container": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9" } }, "systemMetadata": { @@ -2795,10 +3065,411 @@ "entityType": "dashboard", "entityUrn": "urn:li:dashboard:(powerbi,reports.5b218778-e7a5-4d73-8187-f10824047715)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9" + "path": [ + { + "id": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9", + "urn": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "powerbi", + "dataset": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "name": "library-dataset" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:powerbi" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Semantic Model" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "dummy", + "viewLanguage": "m_query" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "public issue_history", + "description": "Library dataset description", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "let\n Source = Snowflake.Databases(\"hp123rt5.ap-southeast-2.fakecomputing.com\",\"PBI_TEST_WAREHOUSE_PROD\",[Role=\"PBI_TEST_MEMBER\"]),\n PBI_TEST_Database = Source{[Name=\"PBI_TEST\",Kind=\"Database\"]}[Data],\n TEST_Schema = PBI_TEST_Database{[Name=\"TEST\",Kind=\"Schema\"]}[Data],\n TESTTABLE_Table = TEST_Schema{[Name=\"TESTTABLE\",Kind=\"Table\"]}[Data]\nin\n TESTTABLE_Table", + "viewLanguage": "m_query" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "SNOWFLAKE_TESTTABLE", + "description": "Library dataset description", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"SELECT#(lf)concat((UPPER(REPLACE(SELLER,'-',''))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4\", null, [EnableFolding=true]),\n #\"Added Conditional Column\" = Table.AddColumn(Source, \"SME Units ENT\", each if [DEAL_TYPE] = \"SME Unit\" then [UNIT] else 0),\n #\"Added Conditional Column1\" = Table.AddColumn(#\"Added Conditional Column\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" then [UNIT] else 0),\n #\"Removed Columns\" = Table.RemoveColumns(#\"Added Conditional Column1\",{\"Banklink Units\"}),\n #\"Added Custom\" = Table.AddColumn(#\"Removed Columns\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" and [SALES_TYPE] = \"3 - Upsell\"\nthen [UNIT]\n\nelse if [SALES_TYPE] = \"Adjusted BL Migration\"\nthen [UNIT]\n\nelse 0),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"SME Units in $ (*$361)\", each if [DEAL_TYPE] = \"SME Unit\" \nand [SALES_TYPE] <> \"4 - Renewal\"\n then [UNIT] * 361\nelse 0),\n #\"Added Custom2\" = Table.AddColumn(#\"Added Custom1\", \"Banklink in $ (*$148)\", each [Banklink Units] * 148)\nin\n #\"Added Custom2\"", + "viewLanguage": "m_query" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "snowflake native-query", + "description": "Library dataset description", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "let\n Source = GoogleBigQuery.Database([BillingProject = #\"Parameter - Source\"]),\n#\"gcp-project\" = Source{[Name=#\"Parameter - Source\"]}[Data],\nuniversal_Schema = #\"gcp-project\"{[Name=\"universal\",Kind=\"Schema\"]}[Data],\nD_WH_DATE_Table = universal_Schema{[Name=\"D_WH_DATE\",Kind=\"Table\"]}[Data],\n#\"Filtered Rows\" = Table.SelectRows(D_WH_DATE_Table, each [D_DATE] > #datetime(2019, 9, 10, 0, 0, 0)),\n#\"Filtered Rows1\" = Table.SelectRows(#\"Filtered Rows\", each DateTime.IsInPreviousNHours([D_DATE], 87600))\n in \n#\"Filtered Rows1\"", + "viewLanguage": "m_query" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "big-query-with-parameter", + "description": "Library dataset description", + "tags": [] } }, "systemMetadata": { @@ -2808,17 +3479,30 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,reports.5b218778-e7a5-4d73-8187-f10824047715)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "status", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9", - "urn": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9" - } + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" ] } }, @@ -2829,8 +3513,64 @@ } }, { - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User1@foo.com", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"xaa48144.snowflakecomputing.com\",\"GSL_TEST_WH\",[Role=\"ACCOUNTADMIN\"]){[Name=\"GSL_TEST_DB\"]}[Data], \"select A.name from GSL_TEST_DB.PUBLIC.SALES_ANALYST as A inner join GSL_TEST_DB.PUBLIC.SALES_FORECAST as B on A.name = B.name where startswith(A.name, 'mo')\", null, [EnableFolding=true])\nin\n Source", + "viewLanguage": "m_query" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "snowflake native-query-with-join", + "description": "Library dataset description", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2845,8 +3585,82 @@ } }, { - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User2@foo.com", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "let\n Source = Oracle.Database(\"localhost:1521/salesdb.domain.com\", [HierarchicalNavigation=true]), HR = Source{[Schema=\"HR\"]}[Data], EMPLOYEES1 = HR{[Name=\"EMPLOYEES\"]}[Data] \n in EMPLOYEES1", + "viewLanguage": "m_query" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "job-history", + "description": "Library dataset description", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2861,13 +3675,15 @@ } }, { - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User4@foo.com", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", "changeType": "UPSERT", - "aspectName": "corpUserKey", + "aspectName": "subTypes", "aspect": { "json": { - "username": "User4@foo.com" + "typeNames": [ + "Table" + ] } }, "systemMetadata": { @@ -2877,8 +3693,64 @@ } }, { - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User4@foo.com", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "let\n Source = PostgreSQL.Database(\"localhost\" , \"mics\" ),\n public_order_date = Source{[Schema=\"public\",Item=\"order_date\"]}[Data] \n in \n public_order_date", + "viewLanguage": "m_query" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "postgres_test_table", + "description": "Library dataset description", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2893,14 +3765,14 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(powerbi,pages.5b218778-e7a5-4d73-8187-f10824047715.ReportSection1)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "PowerBI Page" + "Table" ] } }, @@ -2910,14 +3782,14 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User3@foo.com", +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", "changeType": "UPSERT", - "aspectName": "corpUserKey", + "aspectName": "container", "aspect": { "json": { - "username": "User3@foo.com" + "container": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2" } }, "systemMetadata": { @@ -2927,13 +3799,15 @@ } }, { - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:users.User3@foo.com", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.584cf13a-1485-41c2-a514-b1bb66fff163)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "browsePaths", "aspect": { "json": { - "removed": false + "paths": [ + "/powerbi/demo-workspace" + ] } }, "systemMetadata": { @@ -2944,7 +3818,7 @@ }, { "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,reports.5b218778-e7a5-4d73-8187-f10824047715)", + "entityUrn": "urn:li:dashboard:(powerbi,reports.584cf13a-1485-41c2-a514-b1bb66fff163)", "changeType": "PATCH", "aspectName": "dashboardInfo", "aspect": { @@ -2952,27 +3826,17 @@ { "op": "add", "path": "/title", - "value": "SalesMarketing" + "value": "Printable SalesMarketing" }, { "op": "add", "path": "/description", "value": "Acryl sales marketing report" }, - { - "op": "add", - "path": "/charts/urn:li:chart:(powerbi,pages.5b218778-e7a5-4d73-8187-f10824047715.ReportSection)", - "value": "urn:li:chart:(powerbi,pages.5b218778-e7a5-4d73-8187-f10824047715.ReportSection)" - }, - { - "op": "add", - "path": "/charts/urn:li:chart:(powerbi,pages.5b218778-e7a5-4d73-8187-f10824047715.ReportSection1)", - "value": "urn:li:chart:(powerbi,pages.5b218778-e7a5-4d73-8187-f10824047715.ReportSection1)" - }, { "op": "add", "path": "/dashboardUrl", - "value": "https://app.powerbi.com/groups/f089354e-8366-4e18-aea3-4cb4a3a50b48/reports/5b218778-e7a5-4d73-8187-f10824047715" + "value": "https://app.powerbi.com/groups/f089354e-8366-4e18-aea3-4cb4a3a50b48/reports/584cf13a-1485-41c2-a514-b1bb66fff163" }, { "op": "add", @@ -2997,8 +3861,8 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:33c7cab6ea0e58930cd6f943d0a4111e", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.584cf13a-1485-41c2-a514-b1bb66fff163)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -3012,6 +3876,78 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.584cf13a-1485-41c2-a514-b1bb66fff163)", + "changeType": "UPSERT", + "aspectName": "dashboardKey", + "aspect": { + "json": { + "dashboardTool": "powerbi", + "dashboardId": "powerbi.linkedin.com/dashboards/584cf13a-1485-41c2-a514-b1bb66fff163" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.584cf13a-1485-41c2-a514-b1bb66fff163)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "PaginatedReport" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.584cf13a-1485-41c2-a514-b1bb66fff163)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.584cf13a-1485-41c2-a514-b1bb66fff163)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9", + "urn": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:33c7cab6ea0e58930cd6f943d0a4111e", @@ -3036,10 +3972,10 @@ "entityType": "container", "entityUrn": "urn:li:container:33c7cab6ea0e58930cd6f943d0a4111e", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "status", "aspect": { "json": { - "path": [] + "removed": false } }, "systemMetadata": { @@ -3083,15 +4019,13 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-8FFC-4505-9215-655BCA5BEBAE)", + "entityType": "container", + "entityUrn": "urn:li:container:33c7cab6ea0e58930cd6f943d0a4111e", "changeType": "UPSERT", - "aspectName": "browsePaths", + "aspectName": "browsePathsV2", "aspect": { "json": { - "paths": [ - "/powerbi/second-demo-workspace" - ] + "path": [] } }, "systemMetadata": { @@ -3101,13 +4035,29 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-8FFC-4505-9215-655BCA5BEBAE)", + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User3@foo.com", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "corpUserKey", "aspect": { "json": { - "removed": false + "username": "User3@foo.com" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User4@foo.com", + "changeType": "UPSERT", + "aspectName": "corpUserKey", + "aspect": { + "json": { + "username": "User4@foo.com" } }, "systemMetadata": { @@ -3120,14 +4070,11 @@ "entityType": "dashboard", "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-8FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "browsePaths", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:33c7cab6ea0e58930cd6f943d0a4111e", - "urn": "urn:li:container:33c7cab6ea0e58930cd6f943d0a4111e" - } + "paths": [ + "/powerbi/second-demo-workspace" ] } }, @@ -3195,11 +4142,10 @@ "entityType": "dashboard", "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-8FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", - "aspectName": "dashboardKey", + "aspectName": "status", "aspect": { "json": { - "dashboardTool": "powerbi", - "dashboardId": "powerbi.linkedin.com/dashboards/7D668CAD-8FFC-4505-9215-655BCA5BEBAE" + "removed": false } }, "systemMetadata": { @@ -3212,10 +4158,11 @@ "entityType": "dashboard", "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-8FFC-4505-9215-655BCA5BEBAE)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "dashboardKey", "aspect": { "json": { - "container": "urn:li:container:33c7cab6ea0e58930cd6f943d0a4111e" + "dashboardTool": "powerbi", + "dashboardId": "powerbi.linkedin.com/dashboards/7D668CAD-8FFC-4505-9215-655BCA5BEBAE" } }, "systemMetadata": { @@ -3253,5 +4200,106 @@ "runId": "powerbi-test", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-8FFC-4505-9215-655BCA5BEBAE)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:33c7cab6ea0e58930cd6f943d0a4111e" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-8FFC-4505-9215-655BCA5BEBAE)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:33c7cab6ea0e58930cd6f943d0a4111e", + "urn": "urn:li:container:33c7cab6ea0e58930cd6f943d0a4111e" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User1@foo.com", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User2@foo.com", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User3@foo.com", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User4@foo.com", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_disabled_ownership.json b/metadata-ingestion/tests/integration/powerbi/golden_test_disabled_ownership.json index c5414444cc35b..665f5d5a3bb41 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_disabled_ownership.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_disabled_ownership.json @@ -63,8 +63,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -138,8 +137,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -213,8 +211,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -288,8 +285,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -363,8 +359,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -438,8 +433,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -513,8 +507,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -588,8 +581,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -663,8 +655,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_endorsement.json b/metadata-ingestion/tests/integration/powerbi/golden_test_endorsement.json index e1ddbfb901bad..26476e61a0bd7 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_endorsement.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_endorsement.json @@ -63,8 +63,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -158,8 +157,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -253,8 +251,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -348,8 +345,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -443,8 +439,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -538,8 +533,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -633,8 +627,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -728,8 +721,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -803,8 +795,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_independent_datasets.json b/metadata-ingestion/tests/integration/powerbi/golden_test_independent_datasets.json index d204d426a38d3..0b822ad19b425 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_independent_datasets.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_independent_datasets.json @@ -13,7 +13,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "powerbi-test" + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" } }, { @@ -34,7 +35,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "powerbi-test" + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" } }, { @@ -49,7 +51,8 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "powerbi-test" + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" } }, { @@ -60,14 +63,14 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "powerbi-test" + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_ingest.json b/metadata-ingestion/tests/integration/powerbi/golden_test_ingest.json index 6f899a7fa11b7..83f8f881835b7 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_ingest.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_ingest.json @@ -63,8 +63,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -138,8 +137,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -213,8 +211,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -288,8 +285,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -363,8 +359,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -438,8 +433,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -513,8 +507,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -588,8 +581,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -663,8 +655,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_ingest_patch_disabled.json b/metadata-ingestion/tests/integration/powerbi/golden_test_ingest_patch_disabled.json index efbd9abfdb911..93a2c533d21ca 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_ingest_patch_disabled.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_ingest_patch_disabled.json @@ -63,8 +63,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -138,8 +137,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -213,8 +211,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -288,8 +285,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -363,8 +359,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -438,8 +433,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -513,8 +507,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -588,8 +581,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -663,8 +655,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_lineage.json b/metadata-ingestion/tests/integration/powerbi/golden_test_lineage.json index 9a09cb4fec64d..eda831722cc91 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_lineage.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_lineage.json @@ -63,8 +63,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -138,8 +137,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -238,8 +236,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -338,8 +335,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -413,8 +409,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -521,8 +516,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -621,8 +615,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -721,8 +714,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -821,8 +813,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_lower_case_urn_ingest.json b/metadata-ingestion/tests/integration/powerbi/golden_test_lower_case_urn_ingest.json index a4eb670a4b7f9..6f502cdfc0f5b 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_lower_case_urn_ingest.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_lower_case_urn_ingest.json @@ -167,8 +167,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -186,8 +185,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -279,8 +277,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -336,8 +333,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -409,8 +405,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -446,8 +441,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -668,8 +662,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -744,8 +737,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -819,8 +811,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_most_config_and_modified_since_admin_only.json b/metadata-ingestion/tests/integration/powerbi/golden_test_most_config_and_modified_since_admin_only.json index 66e87952bf141..4393a87d1f570 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_most_config_and_modified_since_admin_only.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_most_config_and_modified_since_admin_only.json @@ -182,33 +182,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", - "changeType": "UPSERT", - "aspectName": "upstreamLineage", - "aspect": { - "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,reporting-db.library.dbo.book_issue,PROD)", - "type": "TRANSFORMED" - } + "Table" ] } }, @@ -354,12 +328,21 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "upstreamLineage", "aspect": { "json": { - "removed": false + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,reporting-db.library.dbo.book_issue,PROD)", + "type": "TRANSFORMED" + } + ] } }, "systemMetadata": { @@ -372,13 +355,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "status", "aspect": { "json": { - "typeNames": [ - "PowerBI Dataset Table", - "View" - ] + "removed": false } }, "systemMetadata": { @@ -391,18 +371,11 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "subTypes", "aspect": { "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,reporting-db.COMMOPSDB.dbo.V_PS_CD_RETENTION,PROD)", - "type": "TRANSFORMED" - } + "typeNames": [ + "Table" ] } }, @@ -600,12 +573,21 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.revenue,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "upstreamLineage", "aspect": { "json": { - "removed": false + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,reporting-db.COMMOPSDB.dbo.V_PS_CD_RETENTION,PROD)", + "type": "TRANSFORMED" + } + ] } }, "systemMetadata": { @@ -618,13 +600,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.revenue,DEV)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "status", "aspect": { "json": { - "typeNames": [ - "PowerBI Dataset Table", - "View" - ] + "removed": false } }, "systemMetadata": { @@ -637,18 +616,11 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.revenue,DEV)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "subTypes", "aspect": { "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,reporting-db.analytics.analytics.sales_revenue,PROD)", - "type": "TRANSFORMED" - } + "typeNames": [ + "Table" ] } }, @@ -819,6 +791,31 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.revenue,DEV)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,reporting-db.analytics.analytics.sales_revenue,PROD)", + "type": "TRANSFORMED" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "chart", "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)", @@ -1395,7 +1392,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset" + "Semantic Model" ] } }, diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_personal_ingest.json b/metadata-ingestion/tests/integration/powerbi/golden_test_personal_ingest.json new file mode 100644 index 0000000000000..f8c0fdc17c880 --- /dev/null +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_personal_ingest.json @@ -0,0 +1,329 @@ +[ +{ + "entityType": "container", + "entityUrn": "urn:li:container:4719aeafb92339db2b69194bcbe55c9a", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "powerbi", + "workspace": "Jane Smith Workspace" + }, + "name": "Jane Smith Workspace" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:4719aeafb92339db2b69194bcbe55c9a", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:4719aeafb92339db2b69194bcbe55c9a", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:powerbi" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:4719aeafb92339db2b69194bcbe55c9a", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "PersonalGroup" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User1@foo.com", + "changeType": "UPSERT", + "aspectName": "corpUserKey", + "aspect": { + "json": { + "username": "User1@foo.com" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User2@foo.com", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "changeType": "UPSERT", + "aspectName": "browsePaths", + "aspect": { + "json": { + "paths": [ + "/powerbi/Jane Smith Workspace" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:4719aeafb92339db2b69194bcbe55c9a", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "changeType": "PATCH", + "aspectName": "dashboardInfo", + "aspect": { + "json": [ + { + "op": "add", + "path": "/customProperties/chartCount", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/workspaceName", + "value": "Jane Smith Workspace" + }, + { + "op": "add", + "path": "/customProperties/workspaceId", + "value": "90E9E256-3D6D-4D38-86C8-6CCCBD8C170C" + }, + { + "op": "add", + "path": "/title", + "value": "test_dashboard" + }, + { + "op": "add", + "path": "/description", + "value": "Description of test dashboard" + }, + { + "op": "add", + "path": "/dashboardUrl", + "value": "https://localhost/dashboards/web/1" + }, + { + "op": "add", + "path": "/lastModified", + "value": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User2@foo.com", + "changeType": "UPSERT", + "aspectName": "corpUserKey", + "aspect": { + "json": { + "username": "User2@foo.com" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "changeType": "UPSERT", + "aspectName": "dashboardKey", + "aspect": { + "json": { + "dashboardTool": "powerbi", + "dashboardId": "powerbi.linkedin.com/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:4719aeafb92339db2b69194bcbe55c9a" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:users.User1@foo.com", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:users.User1@foo.com", + "type": "NONE" + }, + { + "owner": "urn:li:corpuser:users.User2@foo.com", + "type": "NONE" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:4719aeafb92339db2b69194bcbe55c9a", + "urn": "urn:li:container:4719aeafb92339db2b69194bcbe55c9a" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_platform_instance_ingest.json b/metadata-ingestion/tests/integration/powerbi/golden_test_platform_instance_ingest.json index ea1ee0df4b105..6da5f5781112e 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_platform_instance_ingest.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_platform_instance_ingest.json @@ -63,8 +63,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -138,8 +137,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -213,8 +211,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -288,8 +285,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -363,8 +359,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -438,8 +433,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -513,8 +507,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -588,8 +581,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -663,8 +655,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_profiling.json b/metadata-ingestion/tests/integration/powerbi/golden_test_profiling.json index 580a8d1a1db11..b8963a0d7782d 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_profiling.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_profiling.json @@ -48,8 +48,8 @@ "json": { "timestampMillis": 1645599600000, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "rowCount": 542300, "columnCount": 4, @@ -115,8 +115,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_report.json b/metadata-ingestion/tests/integration/powerbi/golden_test_report.json index 094869bfd24f1..f6248db9008af 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_report.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_report.json @@ -63,8 +63,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -138,8 +137,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -213,8 +211,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -288,8 +285,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -363,8 +359,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -438,8 +433,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -513,8 +507,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -588,8 +581,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -663,8 +655,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -910,11 +901,12 @@ "entityType": "chart", "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "chartKey", + "aspectName": "subTypes", "aspect": { "json": { - "dashboardTool": "powerbi", - "chartId": "charts.23212598-23b5-4980-87cc-5fc0ecd84385" + "typeNames": [ + "PowerBI Tile" + ] } }, "systemMetadata": { @@ -927,12 +919,11 @@ "entityType": "chart", "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "chartKey", "aspect": { "json": { - "typeNames": [ - "PowerBI Tile" - ] + "dashboardTool": "powerbi", + "chartId": "charts.23212598-23b5-4980-87cc-5fc0ecd84385" } }, "systemMetadata": { @@ -1213,8 +1204,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -1288,8 +1278,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -1363,8 +1352,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -1438,8 +1426,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -1513,8 +1500,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -1588,8 +1574,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -1663,8 +1648,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -1904,11 +1888,11 @@ "entityType": "chart", "entityUrn": "urn:li:chart:(powerbi,pages.5b218778-e7a5-4d73-8187-f10824047715.ReportSection1)", "changeType": "UPSERT", - "aspectName": "browsePaths", + "aspectName": "subTypes", "aspect": { "json": { - "paths": [ - "/powerbi/demo-workspace" + "typeNames": [ + "PowerBI Page" ] } }, @@ -1922,11 +1906,11 @@ "entityType": "chart", "entityUrn": "urn:li:chart:(powerbi,pages.5b218778-e7a5-4d73-8187-f10824047715.ReportSection1)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePaths", "aspect": { "json": { - "typeNames": [ - "PowerBI Page" + "paths": [ + "/powerbi/demo-workspace" ] } }, @@ -2129,6 +2113,657 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "dummy", + "viewLanguage": "m_query" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "public issue_history", + "description": "Library dataset description", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "let\n Source = Snowflake.Databases(\"hp123rt5.ap-southeast-2.fakecomputing.com\",\"PBI_TEST_WAREHOUSE_PROD\",[Role=\"PBI_TEST_MEMBER\"]),\n PBI_TEST_Database = Source{[Name=\"PBI_TEST\",Kind=\"Database\"]}[Data],\n TEST_Schema = PBI_TEST_Database{[Name=\"TEST\",Kind=\"Schema\"]}[Data],\n TESTTABLE_Table = TEST_Schema{[Name=\"TESTTABLE\",Kind=\"Table\"]}[Data]\nin\n TESTTABLE_Table", + "viewLanguage": "m_query" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "SNOWFLAKE_TESTTABLE", + "description": "Library dataset description", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"SELECT#(lf)concat((UPPER(REPLACE(SELLER,'-',''))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4\", null, [EnableFolding=true]),\n #\"Added Conditional Column\" = Table.AddColumn(Source, \"SME Units ENT\", each if [DEAL_TYPE] = \"SME Unit\" then [UNIT] else 0),\n #\"Added Conditional Column1\" = Table.AddColumn(#\"Added Conditional Column\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" then [UNIT] else 0),\n #\"Removed Columns\" = Table.RemoveColumns(#\"Added Conditional Column1\",{\"Banklink Units\"}),\n #\"Added Custom\" = Table.AddColumn(#\"Removed Columns\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" and [SALES_TYPE] = \"3 - Upsell\"\nthen [UNIT]\n\nelse if [SALES_TYPE] = \"Adjusted BL Migration\"\nthen [UNIT]\n\nelse 0),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"SME Units in $ (*$361)\", each if [DEAL_TYPE] = \"SME Unit\" \nand [SALES_TYPE] <> \"4 - Renewal\"\n then [UNIT] * 361\nelse 0),\n #\"Added Custom2\" = Table.AddColumn(#\"Added Custom1\", \"Banklink in $ (*$148)\", each [Banklink Units] * 148)\nin\n #\"Added Custom2\"", + "viewLanguage": "m_query" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "snowflake native-query", + "description": "Library dataset description", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "let\n Source = GoogleBigQuery.Database([BillingProject = #\"Parameter - Source\"]),\n#\"gcp-project\" = Source{[Name=#\"Parameter - Source\"]}[Data],\nuniversal_Schema = #\"gcp-project\"{[Name=\"universal\",Kind=\"Schema\"]}[Data],\nD_WH_DATE_Table = universal_Schema{[Name=\"D_WH_DATE\",Kind=\"Table\"]}[Data],\n#\"Filtered Rows\" = Table.SelectRows(D_WH_DATE_Table, each [D_DATE] > #datetime(2019, 9, 10, 0, 0, 0)),\n#\"Filtered Rows1\" = Table.SelectRows(#\"Filtered Rows\", each DateTime.IsInPreviousNHours([D_DATE], 87600))\n in \n#\"Filtered Rows1\"", + "viewLanguage": "m_query" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "big-query-with-parameter", + "description": "Library dataset description", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"xaa48144.snowflakecomputing.com\",\"GSL_TEST_WH\",[Role=\"ACCOUNTADMIN\"]){[Name=\"GSL_TEST_DB\"]}[Data], \"select A.name from GSL_TEST_DB.PUBLIC.SALES_ANALYST as A inner join GSL_TEST_DB.PUBLIC.SALES_FORECAST as B on A.name = B.name where startswith(A.name, 'mo')\", null, [EnableFolding=true])\nin\n Source", + "viewLanguage": "m_query" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "snowflake native-query-with-join", + "description": "Library dataset description", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "let\n Source = Oracle.Database(\"localhost:1521/salesdb.domain.com\", [HierarchicalNavigation=true]), HR = Source{[Schema=\"HR\"]}[Data], EMPLOYEES1 = HR{[Name=\"EMPLOYEES\"]}[Data] \n in EMPLOYEES1", + "viewLanguage": "m_query" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "job-history", + "description": "Library dataset description", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "let\n Source = PostgreSQL.Database(\"localhost\" , \"mics\" ),\n public_order_date = Source{[Schema=\"public\",Item=\"order_date\"]}[Data] \n in \n public_order_date", + "viewLanguage": "m_query" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445" + }, + "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details", + "name": "postgres_test_table", + "description": "Library dataset description", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.584cf13a-1485-41c2-a514-b1bb66fff163)", + "changeType": "UPSERT", + "aspectName": "browsePaths", + "aspect": { + "json": { + "paths": [ + "/powerbi/demo-workspace" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.584cf13a-1485-41c2-a514-b1bb66fff163)", + "changeType": "PATCH", + "aspectName": "dashboardInfo", + "aspect": { + "json": [ + { + "op": "add", + "path": "/title", + "value": "Printable SalesMarketing" + }, + { + "op": "add", + "path": "/description", + "value": "Acryl sales marketing report" + }, + { + "op": "add", + "path": "/dashboardUrl", + "value": "https://app.powerbi.com/groups/f089354e-8366-4e18-aea3-4cb4a3a50b48/reports/584cf13a-1485-41c2-a514-b1bb66fff163" + }, + { + "op": "add", + "path": "/lastModified", + "value": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.584cf13a-1485-41c2-a514-b1bb66fff163)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.584cf13a-1485-41c2-a514-b1bb66fff163)", + "changeType": "UPSERT", + "aspectName": "dashboardKey", + "aspect": { + "json": { + "dashboardTool": "powerbi", + "dashboardId": "powerbi.linkedin.com/dashboards/584cf13a-1485-41c2-a514-b1bb66fff163" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.584cf13a-1485-41c2-a514-b1bb66fff163)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "PaginatedReport" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.584cf13a-1485-41c2-a514-b1bb66fff163)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "demo-workspace" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "corpuser", "entityUrn": "urn:li:corpuser:users.User1@foo.com", diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_scan_all_workspaces.json b/metadata-ingestion/tests/integration/powerbi/golden_test_scan_all_workspaces.json index dcaa518a3c323..e327ca695beb7 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_scan_all_workspaces.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_scan_all_workspaces.json @@ -63,8 +63,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -138,8 +137,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -213,8 +211,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -288,8 +285,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -363,8 +359,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -438,8 +433,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -513,8 +507,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -588,8 +581,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -663,8 +655,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_server_to_platform_instance.json b/metadata-ingestion/tests/integration/powerbi/golden_test_server_to_platform_instance.json index bc5e844f679c7..90c8ee5d0379e 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_server_to_platform_instance.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_server_to_platform_instance.json @@ -63,8 +63,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -138,8 +137,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -238,8 +236,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -338,8 +335,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -438,8 +434,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -546,8 +541,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -646,8 +640,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -746,8 +739,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, @@ -846,8 +838,7 @@ "aspect": { "json": { "typeNames": [ - "PowerBI Dataset Table", - "View" + "Table" ] } }, diff --git a/metadata-ingestion/tests/integration/powerbi/test_admin_only_api.py b/metadata-ingestion/tests/integration/powerbi/test_admin_only_api.py index 91c6082524389..b636c12cfda06 100644 --- a/metadata-ingestion/tests/integration/powerbi/test_admin_only_api.py +++ b/metadata-ingestion/tests/integration/powerbi/test_admin_only_api.py @@ -58,21 +58,28 @@ def register_mock_admin_api(request_mock: Any, override_data: dict = {}) -> None "status_code": 200, "json": admin_datasets_response, }, - "https://api.powerbi.com/v1.0/myorg/admin/groups": { + "https://api.powerbi.com/v1.0/myorg/admin/groups?%24skip=0&%24top=1000": { "method": "GET", "status_code": 200, "json": { - "@odata.count": 3, "value": [ { "id": "64ED5CAD-7C10-4684-8180-826122881108", "isReadOnly": True, "name": "demo-workspace", "type": "Workspace", + "state": "Active", } ], }, }, + "https://api.powerbi.com/v1.0/myorg/admin/groups?%24skip=1000&%24top=1000": { + "method": "GET", + "status_code": 200, + "json": { + "value": [], + }, + }, "https://api.powerbi.com/v1.0/myorg/admin/groups/64ED5CAD-7C10-4684-8180-826122881108/dashboards": { "method": "GET", "status_code": 200, @@ -220,6 +227,7 @@ def register_mock_admin_api(request_mock: Any, override_data: dict = {}) -> None { "id": "64ED5CAD-7C10-4684-8180-826122881108", "name": "demo-workspace", + "type": "Workspace", "state": "Active", "datasets": [ { @@ -391,6 +399,7 @@ def register_mock_admin_api(request_mock: Any, override_data: dict = {}) -> None { "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", "id": "5b218778-e7a5-4d73-8187-f10824047715", + "reportType": "PowerBIReport", "name": "SalesMarketing", "description": "Acryl sales marketing report", } @@ -422,6 +431,7 @@ def register_mock_admin_api(request_mock: Any, override_data: dict = {}) -> None "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", "id": "5b218778-e7a5-4d73-8187-f10824047715", "name": "SalesMarketing", + "reportType": "PowerBIReport", "description": "Acryl sales marketing report", "webUrl": "https://app.powerbi.com/groups/f089354e-8366-4e18-aea3-4cb4a3a50b48/reports/5b218778-e7a5-4d73-8187-f10824047715", "embedUrl": "https://app.powerbi.com/reportEmbed?reportId=5b218778-e7a5-4d73-8187-f10824047715&groupId=f089354e-8366-4e18-aea3-4cb4a3a50b48", @@ -436,6 +446,7 @@ def register_mock_admin_api(request_mock: Any, override_data: dict = {}) -> None "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", "id": "5b218778-e7a5-4d73-8187-f10824047715", "name": "SalesMarketing", + "reportType": "PowerBIReport", "description": "Acryl sales marketing report", "webUrl": "https://app.powerbi.com/groups/f089354e-8366-4e18-aea3-4cb4a3a50b48/reports/5b218778-e7a5-4d73-8187-f10824047715", "embedUrl": "https://app.powerbi.com/reportEmbed?reportId=5b218778-e7a5-4d73-8187-f10824047715&groupId=f089354e-8366-4e18-aea3-4cb4a3a50b48", diff --git a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py index 23b23ecada0d4..43f77b059e41f 100644 --- a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py +++ b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py @@ -19,6 +19,7 @@ from datahub.ingestion.source.powerbi.rest_api_wrapper.data_classes import ( Page, Report, + ReportType, Workspace, ) from tests.test_helpers import mce_helpers, test_connection_helpers @@ -70,6 +71,9 @@ def scan_init_response(request, context): "64ED5CAD-7C10-4684-8180-826122881108||64ED5CAD-7C22-4684-8180-826122881108": { "id": "a674efd1-603c-4129-8d82-03cf2be05aff" }, + "90E9E256-3D6D-4D38-86C8-6CCCBD8C170C": { + "id": "4278EDC0-85AA-4BF2-B96A-2BC6C82B73C3" + }, } return w_id_vs_response[workspace_id] @@ -78,11 +82,10 @@ def scan_init_response(request, context): def register_mock_api(request_mock: Any, override_data: Optional[dict] = None) -> None: override_data = override_data or {} api_vs_response = { - "https://api.powerbi.com/v1.0/myorg/groups": { + "https://api.powerbi.com/v1.0/myorg/groups?%24skip=0&%24top=1000": { "method": "GET", "status_code": 200, "json": { - "@odata.count": 3, "value": [ { "id": "64ED5CAD-7C10-4684-8180-826122881108", @@ -105,6 +108,13 @@ def register_mock_api(request_mock: Any, override_data: Optional[dict] = None) - ], }, }, + "https://api.powerbi.com/v1.0/myorg/groups?%24skip=1000&%24top=1000": { + "method": "GET", + "status_code": 200, + "json": { + "value": [], + }, + }, "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/dashboards": { "method": "GET", "status_code": 200, @@ -228,6 +238,11 @@ def register_mock_api(request_mock: Any, override_data: Optional[dict] = None) - ] }, }, + "https://api.powerbi.com/v1.0/myorg/groups/90E9E256-3D6D-4D38-86C8-6CCCBD8C170C/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE/tiles": { + "method": "GET", + "status_code": 200, + "json": {"value": []}, + }, "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C22-4684-8180-826122881108/dashboards/7D668CAD-8FFC-4505-9215-655BCA5BEBAE/tiles": { "method": "GET", "status_code": 200, @@ -318,6 +333,7 @@ def register_mock_api(request_mock: Any, override_data: Optional[dict] = None) - "id": "64ED5CAD-7C10-4684-8180-826122881108", "name": "demo-workspace", "state": "Active", + "type": "Workspace", "datasets": [ { "id": "05169CD2-E713-41E6-9600-1D8066D95445", @@ -473,6 +489,7 @@ def register_mock_api(request_mock: Any, override_data: Optional[dict] = None) - { "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", "id": "5b218778-e7a5-4d73-8187-f10824047715", + "reportType": "PaginatedReport", "name": "SalesMarketing", "description": "Acryl sales marketing report", } @@ -489,6 +506,7 @@ def register_mock_api(request_mock: Any, override_data: Optional[dict] = None) - { "id": "64ED5CAD-7C22-4684-8180-826122881108", "name": "second-demo-workspace", + "type": "Workspace", "state": "Active", "datasets": [ { @@ -515,9 +533,17 @@ def register_mock_api(request_mock: Any, override_data: Optional[dict] = None) - { "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", "id": "5b218778-e7a5-4d73-8187-f10824047715", + "reportType": "PowerBIReport", "name": "SalesMarketing", "description": "Acryl sales marketing report", - } + }, + { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", + "id": "584cf13a-1485-41c2-a514-b1bb66fff163", + "reportType": "PaginatedReport", + "name": "SalesMarketing", + "description": "Acryl sales marketing report", + }, ], }, ] @@ -536,11 +562,21 @@ def register_mock_api(request_mock: Any, override_data: Optional[dict] = None) - { "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", "id": "5b218778-e7a5-4d73-8187-f10824047715", + "reportType": "PowerBIReport", "name": "SalesMarketing", "description": "Acryl sales marketing report", "webUrl": "https://app.powerbi.com/groups/f089354e-8366-4e18-aea3-4cb4a3a50b48/reports/5b218778-e7a5-4d73-8187-f10824047715", "embedUrl": "https://app.powerbi.com/reportEmbed?reportId=5b218778-e7a5-4d73-8187-f10824047715&groupId=f089354e-8366-4e18-aea3-4cb4a3a50b48", - } + }, + { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", + "id": "584cf13a-1485-41c2-a514-b1bb66fff163", + "reportType": "PaginatedReport", + "name": "Printable SalesMarketing", + "description": "Acryl sales marketing report", + "webUrl": "https://app.powerbi.com/groups/f089354e-8366-4e18-aea3-4cb4a3a50b48/reports/584cf13a-1485-41c2-a514-b1bb66fff163", + "embedUrl": "https://app.powerbi.com/reportEmbed?reportId=584cf13a-1485-41c2-a514-b1bb66fff163&groupId=f089354e-8366-4e18-aea3-4cb4a3a50b48", + }, ] }, }, @@ -550,12 +586,26 @@ def register_mock_api(request_mock: Any, override_data: Optional[dict] = None) - "json": { "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", "id": "5b218778-e7a5-4d73-8187-f10824047715", + "reportType": "PowerBIReport", "name": "SalesMarketing", "description": "Acryl sales marketing report", "webUrl": "https://app.powerbi.com/groups/f089354e-8366-4e18-aea3-4cb4a3a50b48/reports/5b218778-e7a5-4d73-8187-f10824047715", "embedUrl": "https://app.powerbi.com/reportEmbed?reportId=5b218778-e7a5-4d73-8187-f10824047715&groupId=f089354e-8366-4e18-aea3-4cb4a3a50b48", }, }, + "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/584cf13a-1485-41c2-a514-b1bb66fff163": { + "method": "GET", + "status_code": 200, + "json": { + "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", + "id": "584cf13a-1485-41c2-a514-b1bb66fff163", + "reportType": "PaginatedReport", + "name": "Printable SalesMarketing", + "description": "Acryl sales marketing report", + "webUrl": "https://app.powerbi.com/groups/f089354e-8366-4e18-aea3-4cb4a3a50b48/reports/584cf13a-1485-41c2-a514-b1bb66fff163", + "embedUrl": "https://app.powerbi.com/reportEmbed?reportId=584cf13a-1485-41c2-a514-b1bb66fff163&groupId=f089354e-8366-4e18-aea3-4cb4a3a50b48", + }, + }, "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/5b218778-e7a5-4d73-8187-f10824047715/pages": { "method": "GET", "status_code": 200, @@ -574,6 +624,11 @@ def register_mock_api(request_mock: Any, override_data: Optional[dict] = None) - ] }, }, + "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/584cf13a-1485-41c2-a514-b1bb66fff163/pages": { + "method": "GET", + "status_code": 400, # Pages API is not supported for PaginatedReport + "text": '{"error":{"code":"InvalidRequest","message":"Request is currently not supported for RDL reports"}}', + }, "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/parameters": { "method": "GET", "status_code": 200, @@ -612,7 +667,8 @@ def register_mock_api(request_mock: Any, override_data: Optional[dict] = None) - request_mock.register_uri( api_vs_response[url]["method"], url, - json=api_vs_response[url]["json"], + json=api_vs_response[url].get("json"), + text=api_vs_response[url].get("text"), status_code=api_vs_response[url]["status_code"], ) @@ -683,6 +739,131 @@ def test_powerbi_ingest( ) +@freeze_time(FROZEN_TIME) +@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca) +@pytest.mark.integration +def test_powerbi_workspace_type_filter( + mock_msal: MagicMock, + pytestconfig: pytest.Config, + tmp_path: str, + mock_time: datetime.datetime, + requests_mock: Any, +) -> None: + enable_logging() + + test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" + + register_mock_api( + request_mock=requests_mock, + override_data={ + "https://api.powerbi.com/v1.0/myorg/groups?%24skip=0&%24top=1000": { + "method": "GET", + "status_code": 200, + "json": { + "value": [ + { + "id": "90E9E256-3D6D-4D38-86C8-6CCCBD8C170C", + "isReadOnly": True, + "name": "Jane Smith Workspace", + "type": "PersonalGroup", + "state": "Active", + }, + { + "id": "C6B5DBBC-7580-406C-A6BE-72628C28801C", + "isReadOnly": True, + "name": "Sales", + "type": "Workspace", + "state": "Active", + }, + ], + }, + }, + "https://api.powerbi.com/v1.0/myorg/groups?%24skip=1000&%24top=1000": { + "method": "GET", + "status_code": 200, + "json": { + "value": [], + }, + }, + "https://api.powerbi.com/v1.0/myorg/admin/workspaces/scanResult/4278EDC0-85AA-4BF2-B96A-2BC6C82B73C3": { + "method": "GET", + "status_code": 200, + "json": { + "workspaces": [ + { + "id": "90E9E256-3D6D-4D38-86C8-6CCCBD8C170C", + "name": "Jane Smith Workspace", + "type": "PersonalGroup", + "state": "Active", + "datasets": [], + }, + ] + }, + }, + "https://api.powerbi.com/v1.0/myorg/groups/90E9E256-3D6D-4D38-86C8-6CCCBD8C170C/dashboards": { + "method": "GET", + "status_code": 200, + "json": { + "value": [ + { + "id": "7D668CAD-7FFC-4505-9215-655BCA5BEBAE", + "isReadOnly": True, + "displayName": "test_dashboard", + "description": "Description of test dashboard", + "embedUrl": "https://localhost/dashboards/embed/1", + "webUrl": "https://localhost/dashboards/web/1", + } + ] + }, + }, + "https://api.powerbi.com/v1.0/myorg/admin/workspaces/scanStatus/4278EDC0-85AA-4BF2-B96A-2BC6C82B73C3": { + "method": "GET", + "status_code": 200, + "json": { + "status": "SUCCEEDED", + }, + }, + }, + ) + + default_config: dict = default_source_config() + + del default_config["workspace_id"] + del default_config["workspace_id_pattern"] + + pipeline = Pipeline.create( + { + "run_id": "powerbi-test", + "source": { + "type": "powerbi", + "config": { + **default_config, + "extract_workspaces_to_containers": True, + "workspace_type_filter": [ + "PersonalGroup", + ], + }, + }, + "sink": { + "type": "file", + "config": { + "filename": f"{tmp_path}/powerbi_mces.json", + }, + }, + } + ) + + pipeline.run() + pipeline.raise_from_status() + golden_file = "golden_test_personal_ingest.json" + + mce_helpers.check_golden_file( + pytestconfig, + output_path=f"{tmp_path}/powerbi_mces.json", + golden_path=f"{test_resources_dir}/{golden_file}", + ) + + @freeze_time(FROZEN_TIME) @mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca) @pytest.mark.integration @@ -1439,6 +1620,7 @@ def validate_pipeline(pipeline: Pipeline) -> None: mock_workspace: Workspace = Workspace( id="64ED5CAD-7C10-4684-8180-826122881108", name="demo-workspace", + type="Workspace", datasets={}, dashboards=[], reports=[], @@ -1485,6 +1667,7 @@ def validate_pipeline(pipeline: Pipeline) -> None: Report( id=report[Constant.ID], name=report[Constant.NAME], + type=ReportType.PowerBIReport, webUrl="", embedUrl="", description=report[Constant.DESCRIPTION], @@ -1538,6 +1721,7 @@ def test_reports_with_failed_page_request( { "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", "id": "5b218778-e7a5-4d73-8187-f10824047715", + "reportType": "PowerBIReport", "name": "SalesMarketing", "description": "Acryl sales marketing report", "webUrl": "https://app.powerbi.com/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/5b218778-e7a5-4d73-8187-f10824047715", @@ -1546,6 +1730,7 @@ def test_reports_with_failed_page_request( { "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", "id": "e9fd6b0b-d8c8-4265-8c44-67e183aebf97", + "reportType": "PaginatedReport", "name": "Product", "description": "Acryl product report", "webUrl": "https://app.powerbi.com/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/e9fd6b0b-d8c8-4265-8c44-67e183aebf97", @@ -1561,6 +1746,7 @@ def test_reports_with_failed_page_request( "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", "id": "5b218778-e7a5-4d73-8187-f10824047715", "name": "SalesMarketing", + "reportType": "PowerBIReport", "description": "Acryl sales marketing report", "webUrl": "https://app.powerbi.com/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/5b218778-e7a5-4d73-8187-f10824047715", "embedUrl": "https://app.powerbi.com/reportEmbed?reportId=5b218778-e7a5-4d73-8187-f10824047715&groupId=64ED5CAD-7C10-4684-8180-826122881108", @@ -1572,6 +1758,7 @@ def test_reports_with_failed_page_request( "json": { "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445", "id": "e9fd6b0b-d8c8-4265-8c44-67e183aebf97", + "reportType": "PowerBIReport", "name": "Product", "description": "Acryl product report", "webUrl": "https://app.powerbi.com/groups/64ED5CAD-7C10-4684-8180-826122881108/reports/e9fd6b0b-d8c8-4265-8c44-67e183aebf97", @@ -1647,11 +1834,10 @@ def test_independent_datasets_extraction( register_mock_api( request_mock=requests_mock, override_data={ - "https://api.powerbi.com/v1.0/myorg/groups": { + "https://api.powerbi.com/v1.0/myorg/groups?%24skip=0&%24top=1000": { "method": "GET", "status_code": 200, "json": { - "@odata.count": 3, "value": [ { "id": "64ED5CAD-7C10-4684-8180-826122881108", @@ -1662,6 +1848,13 @@ def test_independent_datasets_extraction( ], }, }, + "https://api.powerbi.com/v1.0/myorg/groups?%24skip=1000&%24top=1000": { + "method": "GET", + "status_code": 200, + "json": { + "value": [], + }, + }, "https://api.powerbi.com/v1.0/myorg/admin/workspaces/scanResult/4674efd1-603c-4129-8d82-03cf2be05aff": { "method": "GET", "status_code": 200, @@ -1670,6 +1863,7 @@ def test_independent_datasets_extraction( { "id": "64ED5CAD-7C10-4684-8180-826122881108", "name": "demo-workspace", + "type": "Workspace", "state": "Active", "datasets": [ { diff --git a/metadata-ingestion/tests/integration/powerbi/test_profiling.py b/metadata-ingestion/tests/integration/powerbi/test_profiling.py index 7955386de8940..4b48bed003b1e 100644 --- a/metadata-ingestion/tests/integration/powerbi/test_profiling.py +++ b/metadata-ingestion/tests/integration/powerbi/test_profiling.py @@ -112,21 +112,28 @@ def register_mock_admin_api(request_mock: Any, override_data: dict = {}) -> None "status_code": 200, "json": admin_datasets_response, }, - "https://api.powerbi.com/v1.0/myorg/groups?%24top=1000&%24skip=0&%24filter=type+eq+%27Workspace%27": { + "https://api.powerbi.com/v1.0/myorg/groups?%24skip=0&%24top=1000": { "method": "GET", "status_code": 200, "json": { - "@odata.count": 3, "value": [ { "id": "64ED5CAD-7C10-4684-8180-826122881108", "isReadOnly": True, "name": "demo-workspace", "type": "Workspace", + "state": "Active", } ], }, }, + "https://api.powerbi.com/v1.0/myorg/groups?%24skip=1000&%24top=1000": { + "method": "GET", + "status_code": 200, + "json": { + "value": [], + }, + }, "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/dashboards": { "method": "GET", "status_code": 200, @@ -176,6 +183,7 @@ def register_mock_admin_api(request_mock: Any, override_data: dict = {}) -> None "id": "64ED5CAD-7C10-4684-8180-826122881108", "name": "demo-workspace", "state": "Active", + "type": "Workspace", "datasets": [ { "id": "05169CD2-E713-41E6-9600-1D8066D95445", diff --git a/metadata-ingestion/tests/integration/powerbi/test_stateful_ingestion.py b/metadata-ingestion/tests/integration/powerbi/test_stateful_ingestion.py index 077b48ca177b5..84f7a87ce5d2d 100644 --- a/metadata-ingestion/tests/integration/powerbi/test_stateful_ingestion.py +++ b/metadata-ingestion/tests/integration/powerbi/test_stateful_ingestion.py @@ -23,27 +23,35 @@ def register_mock_api_state1(request_mock): "status_code": 403, "json": {}, }, - "https://api.powerbi.com/v1.0/myorg/groups": { + "https://api.powerbi.com/v1.0/myorg/groups?%24skip=0&%24top=1000": { "method": "GET", "status_code": 200, "json": { - "@odata.count": 1, "value": [ { "id": "64ED5CAD-7C10-4684-8180-826122881108", "isReadOnly": True, "name": "Workspace 1", "type": "Workspace", + "state": "Active", }, { "id": "44444444-7C10-4684-8180-826122881108", "isReadOnly": True, "name": "Multi Workspace", "type": "Workspace", + "state": "Active", }, ], }, }, + "https://api.powerbi.com/v1.0/myorg/groups?%24skip=1000&%24top=1000": { + "method": "GET", + "status_code": 200, + "json": { + "value": [], + }, + }, "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/dashboards": { "method": "GET", "status_code": 200, @@ -114,7 +122,7 @@ def register_mock_api_state2(request_mock): "status_code": 403, "json": {}, }, - "https://api.powerbi.com/v1.0/myorg/groups": { + "https://api.powerbi.com/v1.0/myorg/groups?%24skip=0&%24top=1000": { "method": "GET", "status_code": 200, "json": { @@ -135,6 +143,13 @@ def register_mock_api_state2(request_mock): ], }, }, + "https://api.powerbi.com/v1.0/myorg/groups?%24skip=1000&%24top=1000": { + "method": "GET", + "status_code": 200, + "json": { + "value": [], + }, + }, "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/dashboards": { "method": "GET", "status_code": 200, diff --git a/metadata-ingestion/tests/unit/test_mlflow_source.py b/metadata-ingestion/tests/unit/test_mlflow_source.py index ae5a42bad229d..d213dd92352e6 100644 --- a/metadata-ingestion/tests/unit/test_mlflow_source.py +++ b/metadata-ingestion/tests/unit/test_mlflow_source.py @@ -1,6 +1,6 @@ import datetime from pathlib import Path -from typing import Any, TypeVar, Union +from typing import Any, Union import pytest from mlflow import MlflowClient @@ -11,8 +11,6 @@ from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.source.mlflow import MLflowConfig, MLflowSource -T = TypeVar("T") - @pytest.fixture def tracking_uri(tmp_path: Path) -> str: @@ -46,7 +44,7 @@ def model_version( ) -def dummy_search_func(page_token: Union[None, str], **kwargs: Any) -> PagedList[T]: +def dummy_search_func(page_token: Union[None, str], **kwargs: Any) -> PagedList[str]: dummy_pages = dict( page_1=PagedList(items=["a", "b"], token="page_2"), page_2=PagedList(items=["c", "d"], token="page_3"), diff --git a/metadata-integration/java/spark-lineage-legacy/scripts/check_jar.sh b/metadata-integration/java/spark-lineage-legacy/scripts/check_jar.sh index 81d6a541d1c2a..854c4227d08d9 100755 --- a/metadata-integration/java/spark-lineage-legacy/scripts/check_jar.sh +++ b/metadata-integration/java/spark-lineage-legacy/scripts/check_jar.sh @@ -40,7 +40,8 @@ jar -tvf $jarFile |\ grep -v "rootdoc.txt" |\ grep -v "VersionInfo.java" |\ grep -v "mime.types" |\ - grep -v "com/ibm/.*" + grep -v "com/ibm/.*" |\ + grep -v "google/" if [ $? -ne 0 ]; then diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java index 34c98bba01af4..00feb547ca330 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java @@ -1357,6 +1357,7 @@ private Stream ingestProposalSync( return IngestResult.builder() .urn(item.getUrn()) .request(item) + .result(result) .publishedMCL(result.getMclFuture() != null) .sqlCommitted(true) .isUpdate(result.getOldValue() != null) diff --git a/metadata-service/configuration/src/main/resources/bootstrap_mcps.yaml b/metadata-service/configuration/src/main/resources/bootstrap_mcps.yaml index b1612f95f9219..10ae176b2c31e 100644 --- a/metadata-service/configuration/src/main/resources/bootstrap_mcps.yaml +++ b/metadata-service/configuration/src/main/resources/bootstrap_mcps.yaml @@ -14,18 +14,26 @@ bootstrap: - name: data-platforms version: v1 + blocking: true + async: false mcps_location: "bootstrap_mcps/data-platforms.yaml" - name: data-types version: v1 + blocking: true + async: false mcps_location: "bootstrap_mcps/data-types.yaml" - name: ownership-types version: v1 + blocking: true + async: false mcps_location: "bootstrap_mcps/ownership-types.yaml" - name: roles version: v1 + blocking: true + async: false mcps_location: "bootstrap_mcps/roles.yaml" # Ingestion Recipes diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java index 7e7929e7f27d3..7427f293c848f 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java @@ -170,6 +170,9 @@ protected abstract E buildGenericEntity( @Nonnull UpdateAspectResult updateAspectResult, boolean withSystemMetadata); + protected abstract E buildGenericEntity( + @Nonnull String aspectName, @Nonnull IngestResult ingestResult, boolean withSystemMetadata); + protected abstract AspectsBatch toMCPBatch( @Nonnull OperationContext opContext, String entityArrayList, Actor actor) throws JsonProcessingException, InvalidUrnException; @@ -560,8 +563,11 @@ public ResponseEntity createAspect( @PathVariable("entityName") String entityName, @PathVariable("entityUrn") String entityUrn, @PathVariable("aspectName") String aspectName, + @RequestParam(value = "async", required = false, defaultValue = "false") Boolean async, @RequestParam(value = "systemMetadata", required = false, defaultValue = "false") Boolean withSystemMetadata, + @RequestParam(value = "createIfEntityNotExists", required = false, defaultValue = "false") + Boolean createIfEntityNotExists, @RequestParam(value = "createIfNotExists", required = false, defaultValue = "true") Boolean createIfNotExists, @RequestBody @Nonnull String jsonAspect) @@ -591,24 +597,38 @@ public ResponseEntity createAspect( opContext.getRetrieverContext().get().getAspectRetriever(), urn, aspectSpec, + createIfEntityNotExists, createIfNotExists, jsonAspect, authentication.getActor()); - List results = - entityService.ingestAspects( + Set results = + entityService.ingestProposal( opContext, AspectsBatchImpl.builder() .retrieverContext(opContext.getRetrieverContext().get()) .items(List.of(upsert)) .build(), - true, - true); + async); - return ResponseEntity.of( - results.stream() - .findFirst() - .map(result -> buildGenericEntity(aspectName, result, withSystemMetadata))); + if (!async) { + return ResponseEntity.of( + results.stream() + .filter(item -> aspectName.equals(item.getRequest().getAspectName())) + .findFirst() + .map( + result -> + buildGenericEntity(aspectName, result.getResult(), withSystemMetadata))); + } else { + return results.stream() + .filter(item -> aspectName.equals(item.getRequest().getAspectName())) + .map( + result -> + ResponseEntity.accepted() + .body(buildGenericEntity(aspectName, result, withSystemMetadata))) + .findFirst() + .orElse(ResponseEntity.accepted().build()); + } } @Tag(name = "Generic Aspects") @@ -789,6 +809,7 @@ protected abstract ChangeMCP toUpsertItem( @Nonnull AspectRetriever aspectRetriever, Urn entityUrn, AspectSpec aspectSpec, + Boolean createIfEntityNotExists, Boolean createIfNotExists, String jsonAspect, Actor actor) diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java index 28537b849b68a..7bec052a9fd5d 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java @@ -232,6 +232,20 @@ protected GenericEntityV2 buildGenericEntity( withSystemMetadata ? updateAspectResult.getNewSystemMetadata() : null))); } + @Override + protected GenericEntityV2 buildGenericEntity( + @Nonnull String aspectName, @Nonnull IngestResult ingestResult, boolean withSystemMetadata) { + return GenericEntityV2.builder() + .urn(ingestResult.getUrn().toString()) + .build( + objectMapper, + Map.of( + aspectName, + Pair.of( + ingestResult.getRequest().getRecordTemplate(), + withSystemMetadata ? ingestResult.getRequest().getSystemMetadata() : null))); + } + private List toRecordTemplates( @Nonnull OperationContext opContext, SearchEntityArray searchEntities, @@ -278,14 +292,25 @@ protected ChangeMCP toUpsertItem( @Nonnull AspectRetriever aspectRetriever, Urn entityUrn, AspectSpec aspectSpec, + Boolean createIfEntityNotExists, Boolean createIfNotExists, String jsonAspect, Actor actor) throws URISyntaxException { + + final ChangeType changeType; + if (Boolean.TRUE.equals(createIfEntityNotExists)) { + changeType = ChangeType.CREATE_ENTITY; + } else if (Boolean.TRUE.equals(createIfNotExists)) { + changeType = ChangeType.CREATE; + } else { + changeType = ChangeType.UPSERT; + } + return ChangeItemImpl.builder() .urn(entityUrn) .aspectName(aspectSpec.getName()) - .changeType(Boolean.TRUE.equals(createIfNotExists) ? ChangeType.CREATE : ChangeType.UPSERT) + .changeType(changeType) .auditStamp(AuditStampUtils.createAuditStamp(actor.toUrnStr())) .recordTemplate( GenericRecordUtils.deserializeAspect( diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java index e33ad24a6c248..d179ea8f3a068 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java @@ -1100,6 +1100,28 @@ private static PathItem buildSingleEntityAspectPath( new Operation() .summary(String.format("Create aspect %s on %s ", aspect, upperFirstEntity)) .tags(tags) + .parameters( + List.of( + new Parameter() + .in(NAME_QUERY) + .name("async") + .description("Use async ingestion for high throughput.") + .schema(new Schema().type(TYPE_BOOLEAN)._default(false)), + new Parameter() + .in(NAME_QUERY) + .name(NAME_SYSTEM_METADATA) + .description("Include systemMetadata with response.") + .schema(new Schema().type(TYPE_BOOLEAN)._default(false)), + new Parameter() + .in(NAME_QUERY) + .name("createIfEntityNotExists") + .description("Only create the aspect if the Entity doesn't exist.") + .schema(new Schema().type(TYPE_BOOLEAN)._default(false)), + new Parameter() + .in(NAME_QUERY) + .name("createIfNotExists") + .description("Only create the aspect if the Aspect doesn't exist.") + .schema(new Schema().type(TYPE_BOOLEAN)._default(true)))) .requestBody(requestBody) .responses(new ApiResponses().addApiResponse("201", successPostResponse)); // Patch Operation diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java index c7d8c72f8a1c3..55cf310be3438 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java @@ -328,6 +328,24 @@ protected GenericEntityV3 buildGenericEntity( .build())); } + @Override + protected GenericEntityV3 buildGenericEntity( + @Nonnull String aspectName, @Nonnull IngestResult ingestResult, boolean withSystemMetadata) { + return GenericEntityV3.builder() + .build( + objectMapper, + ingestResult.getUrn(), + Map.of( + aspectName, + AspectItem.builder() + .aspect(ingestResult.getRequest().getRecordTemplate()) + .systemMetadata( + withSystemMetadata ? ingestResult.getRequest().getSystemMetadata() : null) + .auditStamp( + withSystemMetadata ? ingestResult.getRequest().getAuditStamp() : null) + .build())); + } + private List toRecordTemplates( @Nonnull OperationContext opContext, SearchEntityArray searchEntities, @@ -472,16 +490,27 @@ protected ChangeMCP toUpsertItem( @Nonnull AspectRetriever aspectRetriever, Urn entityUrn, AspectSpec aspectSpec, + Boolean createIfEntityNotExists, Boolean createIfNotExists, String jsonAspect, Actor actor) throws JsonProcessingException { JsonNode jsonNode = objectMapper.readTree(jsonAspect); String aspectJson = jsonNode.get("value").toString(); + + final ChangeType changeType; + if (Boolean.TRUE.equals(createIfEntityNotExists)) { + changeType = ChangeType.CREATE_ENTITY; + } else if (Boolean.TRUE.equals(createIfNotExists)) { + changeType = ChangeType.CREATE; + } else { + changeType = ChangeType.UPSERT; + } + return ChangeItemImpl.builder() .urn(entityUrn) .aspectName(aspectSpec.getName()) - .changeType(Boolean.TRUE.equals(createIfNotExists) ? ChangeType.CREATE : ChangeType.UPSERT) + .changeType(changeType) .auditStamp(AuditStampUtils.createAuditStamp(actor.toUrnStr())) .recordTemplate( GenericRecordUtils.deserializeAspect( diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/IngestResult.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/IngestResult.java index d3f8b507bb14a..f8b76db110c08 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/IngestResult.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/IngestResult.java @@ -2,6 +2,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.metadata.aspect.batch.BatchItem; +import javax.annotation.Nullable; import lombok.Builder; import lombok.Value; @@ -10,6 +11,7 @@ public class IngestResult { Urn urn; BatchItem request; + @Nullable UpdateAspectResult result; boolean publishedMCL; boolean processedMCL; boolean publishedMCP; diff --git a/smoke-test/tests/platform_resources/test_platform_resource.py b/smoke-test/tests/platform_resources/test_platform_resource.py index 09d2503179572..7c53f72d843c9 100644 --- a/smoke-test/tests/platform_resources/test_platform_resource.py +++ b/smoke-test/tests/platform_resources/test_platform_resource.py @@ -99,3 +99,16 @@ def test_platform_resource_search(graph_client, test_id, cleanup_resources): ] assert len(search_results) == 1 assert search_results[0] == platform_resource + + +def test_platform_resource_non_existent(graph_client, test_id): + key = PlatformResourceKey( + platform=f"test_platform_{test_id}", + resource_type=f"test_resource_type_{test_id}", + primary_key=f"test_primary_key_{test_id}", + ) + platform_resource = PlatformResource.from_datahub( + key=key, + graph_client=graph_client, + ) + assert platform_resource is None