From 2755cf3559fe04a982326cca78ec44df12cb1be0 Mon Sep 17 00:00:00 2001
From: sid-acryl <155424659+sid-acryl@users.noreply.github.com>
Date: Wed, 7 Aug 2024 09:32:21 +0530
Subject: [PATCH 01/72] fix(ingest/powerbi): fix broken lineage between chart
and dataset (#11080)
---
.../ingestion/source/powerbi/powerbi.py | 15 +-
.../src/datahub/utilities/urns/urn_iter.py | 6 +-
.../golden_test_lower_case_urn_ingest.json | 696 +++++++++---------
.../tests/integration/powerbi/test_powerbi.py | 2 +
4 files changed, 368 insertions(+), 351 deletions(-)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
index e0a72c71a1ef0..a2d841c3f8fdc 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
@@ -89,6 +89,7 @@
from datahub.metadata.urns import ChartUrn
from datahub.sql_parsing.sqlglot_lineage import ColumnLineageInfo
from datahub.utilities.dedup_list import deduplicate_list
+from datahub.utilities.urns.urn_iter import lowercase_dataset_urn
# Logger instance
logger = logging.getLogger(__name__)
@@ -127,7 +128,7 @@ def __init__(
@staticmethod
def urn_to_lowercase(value: str, flag: bool) -> str:
if flag is True:
- return value.lower()
+ return lowercase_dataset_urn(value)
return value
@@ -390,11 +391,13 @@ def to_datahub_dataset(
for table in dataset.tables:
# Create a URN for dataset
- ds_urn = builder.make_dataset_urn_with_platform_instance(
- platform=self.__config.platform_name,
- name=self.assets_urn_to_lowercase(table.full_name),
- platform_instance=self.__config.platform_instance,
- env=self.__config.env,
+ ds_urn = self.assets_urn_to_lowercase(
+ builder.make_dataset_urn_with_platform_instance(
+ platform=self.__config.platform_name,
+ name=table.full_name,
+ platform_instance=self.__config.platform_instance,
+ env=self.__config.env,
+ )
)
logger.debug(f"dataset_urn={ds_urn}")
diff --git a/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py b/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py
index 5bef17119675e..f0e4c6f5ee14a 100644
--- a/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py
+++ b/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py
@@ -131,7 +131,7 @@ def _modify_at_path(
_modify_at_path(getattr(model, path[0]), path[1:], new_value)
-def _lowercase_dataset_urn(dataset_urn: str) -> str:
+def lowercase_dataset_urn(dataset_urn: str) -> str:
cur_urn = DatasetUrn.from_string(dataset_urn)
new_urn = DatasetUrn(
platform=cur_urn.platform, name=cur_urn.name.lower(), env=cur_urn.env
@@ -149,10 +149,10 @@ def lowercase_dataset_urns(
) -> None:
def modify_urn(urn: str) -> str:
if guess_entity_type(urn) == "dataset":
- return _lowercase_dataset_urn(urn)
+ return lowercase_dataset_urn(urn)
elif guess_entity_type(urn) == "schemaField":
cur_urn = Urn.from_string(urn)
- cur_urn._entity_ids[0] = _lowercase_dataset_urn(cur_urn._entity_ids[0])
+ cur_urn._entity_ids[0] = lowercase_dataset_urn(cur_urn._entity_ids[0])
return str(cur_urn)
return urn
diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_lower_case_urn_ingest.json b/metadata-ingestion/tests/integration/powerbi/golden_test_lower_case_urn_ingest.json
index d80aa02c4cb12..a4eb670a4b7f9 100644
--- a/metadata-ingestion/tests/integration/powerbi/golden_test_lower_case_urn_ingest.json
+++ b/metadata-ingestion/tests/integration/powerbi/golden_test_lower_case_urn_ingest.json
@@ -1,7 +1,7 @@
[
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.public_issue_history,PROD)",
"changeType": "UPSERT",
"aspectName": "viewProperties",
"aspect": {
@@ -17,9 +17,25 @@
"lastRunId": "no-run-id-provided"
}
},
+{
+ "entityType": "corpuser",
+ "entityUrn": "urn:li:corpuser:users.User1@foo.com",
+ "changeType": "UPSERT",
+ "aspectName": "corpUserKey",
+ "aspect": {
+ "json": {
+ "username": "User1@foo.com"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.public_issue_history,PROD)",
"changeType": "UPSERT",
"aspectName": "datasetProperties",
"aspect": {
@@ -40,13 +56,13 @@
}
},
{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)",
+ "entityType": "corpuser",
+ "entityUrn": "urn:li:corpuser:users.User2@foo.com",
"changeType": "UPSERT",
- "aspectName": "status",
+ "aspectName": "corpUserKey",
"aspect": {
"json": {
- "removed": false
+ "username": "User2@foo.com"
}
},
"systemMetadata": {
@@ -57,15 +73,14 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_testtable,PROD)",
"changeType": "UPSERT",
- "aspectName": "subTypes",
+ "aspectName": "viewProperties",
"aspect": {
"json": {
- "typeNames": [
- "PowerBI Dataset Table",
- "View"
- ]
+ "materialized": false,
+ "viewLogic": "let\n Source = Snowflake.Databases(\"hp123rt5.ap-southeast-2.fakecomputing.com\",\"PBI_TEST_WAREHOUSE_PROD\",[Role=\"PBI_TEST_MEMBER\"]),\n PBI_TEST_Database = Source{[Name=\"PBI_TEST\",Kind=\"Database\"]}[Data],\n TEST_Schema = PBI_TEST_Database{[Name=\"TEST\",Kind=\"Schema\"]}[Data],\n TESTTABLE_Table = TEST_Schema{[Name=\"TESTTABLE\",Kind=\"Table\"]}[Data]\nin\n TESTTABLE_Table",
+ "viewLanguage": "m_query"
}
},
"systemMetadata": {
@@ -76,14 +91,12 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_testtable,DEV)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.public_issue_history,PROD)",
"changeType": "UPSERT",
- "aspectName": "viewProperties",
+ "aspectName": "status",
"aspect": {
"json": {
- "materialized": false,
- "viewLogic": "let\n Source = Snowflake.Databases(\"hp123rt5.ap-southeast-2.fakecomputing.com\",\"PBI_TEST_WAREHOUSE_PROD\",[Role=\"PBI_TEST_MEMBER\"]),\n PBI_TEST_Database = Source{[Name=\"PBI_TEST\",Kind=\"Database\"]}[Data],\n TEST_Schema = PBI_TEST_Database{[Name=\"TEST\",Kind=\"Schema\"]}[Data],\n TESTTABLE_Table = TEST_Schema{[Name=\"TESTTABLE\",Kind=\"Table\"]}[Data]\nin\n TESTTABLE_Table",
- "viewLanguage": "m_query"
+ "removed": false
}
},
"systemMetadata": {
@@ -94,7 +107,7 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_testtable,DEV)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_testtable,PROD)",
"changeType": "UPSERT",
"aspectName": "datasetProperties",
"aspect": {
@@ -115,8 +128,8 @@
}
},
{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_testtable,DEV)",
+ "entityType": "corpuser",
+ "entityUrn": "urn:li:corpuser:users.User1@foo.com",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
@@ -132,15 +145,12 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_testtable,DEV)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_testtable,PROD)",
"changeType": "UPSERT",
- "aspectName": "subTypes",
+ "aspectName": "status",
"aspect": {
"json": {
- "typeNames": [
- "PowerBI Dataset Table",
- "View"
- ]
+ "removed": false
}
},
"systemMetadata": {
@@ -151,14 +161,15 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.public_issue_history,PROD)",
"changeType": "UPSERT",
- "aspectName": "viewProperties",
+ "aspectName": "subTypes",
"aspect": {
"json": {
- "materialized": false,
- "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"SELECT#(lf)concat((UPPER(REPLACE(SELLER,'-',''))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4\", null, [EnableFolding=true]),\n #\"Added Conditional Column\" = Table.AddColumn(Source, \"SME Units ENT\", each if [DEAL_TYPE] = \"SME Unit\" then [UNIT] else 0),\n #\"Added Conditional Column1\" = Table.AddColumn(#\"Added Conditional Column\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" then [UNIT] else 0),\n #\"Removed Columns\" = Table.RemoveColumns(#\"Added Conditional Column1\",{\"Banklink Units\"}),\n #\"Added Custom\" = Table.AddColumn(#\"Removed Columns\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" and [SALES_TYPE] = \"3 - Upsell\"\nthen [UNIT]\n\nelse if [SALES_TYPE] = \"Adjusted BL Migration\"\nthen [UNIT]\n\nelse 0),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"SME Units in $ (*$361)\", each if [DEAL_TYPE] = \"SME Unit\" \nand [SALES_TYPE] <> \"4 - Renewal\"\n then [UNIT] * 361\nelse 0),\n #\"Added Custom2\" = Table.AddColumn(#\"Added Custom1\", \"Banklink in $ (*$148)\", each [Banklink Units] * 148)\nin\n #\"Added Custom2\"",
- "viewLanguage": "m_query"
+ "typeNames": [
+ "PowerBI Dataset Table",
+ "View"
+ ]
}
},
"systemMetadata": {
@@ -169,18 +180,15 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_testtable,PROD)",
"changeType": "UPSERT",
- "aspectName": "datasetProperties",
+ "aspectName": "subTypes",
"aspect": {
"json": {
- "customProperties": {
- "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445"
- },
- "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details",
- "name": "snowflake native-query",
- "description": "Library dataset description",
- "tags": []
+ "typeNames": [
+ "PowerBI Dataset Table",
+ "View"
+ ]
}
},
"systemMetadata": {
@@ -190,8 +198,8 @@
}
},
{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)",
+ "entityType": "corpuser",
+ "entityUrn": "urn:li:corpuser:users.User2@foo.com",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
@@ -207,15 +215,14 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.big-query-with-parameter,PROD)",
"changeType": "UPSERT",
- "aspectName": "subTypes",
+ "aspectName": "viewProperties",
"aspect": {
"json": {
- "typeNames": [
- "PowerBI Dataset Table",
- "View"
- ]
+ "materialized": false,
+ "viewLogic": "let\n Source = GoogleBigQuery.Database([BillingProject = #\"Parameter - Source\"]),\n#\"gcp-project\" = Source{[Name=#\"Parameter - Source\"]}[Data],\nuniversal_Schema = #\"gcp-project\"{[Name=\"universal\",Kind=\"Schema\"]}[Data],\nD_WH_DATE_Table = universal_Schema{[Name=\"D_WH_DATE\",Kind=\"Table\"]}[Data],\n#\"Filtered Rows\" = Table.SelectRows(D_WH_DATE_Table, each [D_DATE] > #datetime(2019, 9, 10, 0, 0, 0)),\n#\"Filtered Rows1\" = Table.SelectRows(#\"Filtered Rows\", each DateTime.IsInPreviousNHours([D_DATE], 87600))\n in \n#\"Filtered Rows1\"",
+ "viewLanguage": "m_query"
}
},
"systemMetadata": {
@@ -226,13 +233,13 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_native-query,PROD)",
"changeType": "UPSERT",
"aspectName": "viewProperties",
"aspect": {
"json": {
"materialized": false,
- "viewLogic": "let\n Source = GoogleBigQuery.Database([BillingProject = #\"Parameter - Source\"]),\n#\"gcp-project\" = Source{[Name=#\"Parameter - Source\"]}[Data],\nuniversal_Schema = #\"gcp-project\"{[Name=\"universal\",Kind=\"Schema\"]}[Data],\nD_WH_DATE_Table = universal_Schema{[Name=\"D_WH_DATE\",Kind=\"Table\"]}[Data],\n#\"Filtered Rows\" = Table.SelectRows(D_WH_DATE_Table, each [D_DATE] > #datetime(2019, 9, 10, 0, 0, 0)),\n#\"Filtered Rows1\" = Table.SelectRows(#\"Filtered Rows\", each DateTime.IsInPreviousNHours([D_DATE], 87600))\n in \n#\"Filtered Rows1\"",
+ "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"SELECT#(lf)concat((UPPER(REPLACE(SELLER,'-',''))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4\", null, [EnableFolding=true]),\n #\"Added Conditional Column\" = Table.AddColumn(Source, \"SME Units ENT\", each if [DEAL_TYPE] = \"SME Unit\" then [UNIT] else 0),\n #\"Added Conditional Column1\" = Table.AddColumn(#\"Added Conditional Column\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" then [UNIT] else 0),\n #\"Removed Columns\" = Table.RemoveColumns(#\"Added Conditional Column1\",{\"Banklink Units\"}),\n #\"Added Custom\" = Table.AddColumn(#\"Removed Columns\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" and [SALES_TYPE] = \"3 - Upsell\"\nthen [UNIT]\n\nelse if [SALES_TYPE] = \"Adjusted BL Migration\"\nthen [UNIT]\n\nelse 0),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"SME Units in $ (*$361)\", each if [DEAL_TYPE] = \"SME Unit\" \nand [SALES_TYPE] <> \"4 - Renewal\"\n then [UNIT] * 361\nelse 0),\n #\"Added Custom2\" = Table.AddColumn(#\"Added Custom1\", \"Banklink in $ (*$148)\", each [Banklink Units] * 148)\nin\n #\"Added Custom2\"",
"viewLanguage": "m_query"
}
},
@@ -244,7 +251,7 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.big-query-with-parameter,PROD)",
"changeType": "UPSERT",
"aspectName": "datasetProperties",
"aspect": {
@@ -266,23 +273,7 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)",
- "changeType": "UPSERT",
- "aspectName": "status",
- "aspect": {
- "json": {
- "removed": false
- }
- },
- "systemMetadata": {
- "lastObserved": 1643871600000,
- "runId": "powerbi-test",
- "lastRunId": "no-run-id-provided"
- }
-},
-{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.job-history,PROD)",
"changeType": "UPSERT",
"aspectName": "subTypes",
"aspect": {
@@ -301,14 +292,12 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.big-query-with-parameter,PROD)",
"changeType": "UPSERT",
- "aspectName": "viewProperties",
+ "aspectName": "status",
"aspect": {
"json": {
- "materialized": false,
- "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"xaa48144.snowflakecomputing.com\",\"GSL_TEST_WH\",[Role=\"ACCOUNTADMIN\"]){[Name=\"GSL_TEST_DB\"]}[Data], \"select A.name from GSL_TEST_DB.PUBLIC.SALES_ANALYST as A inner join GSL_TEST_DB.PUBLIC.SALES_FORECAST as B on A.name = B.name where startswith(A.name, 'mo')\", null, [EnableFolding=true])\nin\n Source",
- "viewLanguage": "m_query"
+ "removed": false
}
},
"systemMetadata": {
@@ -319,7 +308,7 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_native-query,PROD)",
"changeType": "UPSERT",
"aspectName": "datasetProperties",
"aspect": {
@@ -328,7 +317,7 @@
"datasetId": "05169CD2-E713-41E6-9600-1D8066D95445"
},
"externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details",
- "name": "snowflake native-query-with-join",
+ "name": "snowflake native-query",
"description": "Library dataset description",
"tags": []
}
@@ -341,12 +330,15 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.big-query-with-parameter,PROD)",
"changeType": "UPSERT",
- "aspectName": "status",
+ "aspectName": "subTypes",
"aspect": {
"json": {
- "removed": false
+ "typeNames": [
+ "PowerBI Dataset Table",
+ "View"
+ ]
}
},
"systemMetadata": {
@@ -357,15 +349,12 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.job-history,PROD)",
"changeType": "UPSERT",
- "aspectName": "subTypes",
+ "aspectName": "status",
"aspect": {
"json": {
- "typeNames": [
- "PowerBI Dataset Table",
- "View"
- ]
+ "removed": false
}
},
"systemMetadata": {
@@ -376,14 +365,12 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_native-query,PROD)",
"changeType": "UPSERT",
- "aspectName": "viewProperties",
+ "aspectName": "status",
"aspect": {
"json": {
- "materialized": false,
- "viewLogic": "let\n Source = Oracle.Database(\"localhost:1521/salesdb.domain.com\", [HierarchicalNavigation=true]), HR = Source{[Schema=\"HR\"]}[Data], EMPLOYEES1 = HR{[Name=\"EMPLOYEES\"]}[Data] \n in EMPLOYEES1",
- "viewLanguage": "m_query"
+ "removed": false
}
},
"systemMetadata": {
@@ -394,7 +381,7 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.job-history,PROD)",
"changeType": "UPSERT",
"aspectName": "datasetProperties",
"aspect": {
@@ -416,12 +403,33 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_native-query,PROD)",
"changeType": "UPSERT",
- "aspectName": "status",
+ "aspectName": "subTypes",
"aspect": {
"json": {
- "removed": false
+ "typeNames": [
+ "PowerBI Dataset Table",
+ "View"
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.job-history,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "viewProperties",
+ "aspect": {
+ "json": {
+ "materialized": false,
+ "viewLogic": "let\n Source = Oracle.Database(\"localhost:1521/salesdb.domain.com\", [HierarchicalNavigation=true]), HR = Source{[Schema=\"HR\"]}[Data], EMPLOYEES1 = HR{[Name=\"EMPLOYEES\"]}[Data] \n in EMPLOYEES1",
+ "viewLanguage": "m_query"
}
},
"systemMetadata": {
@@ -432,7 +440,7 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_native-query-with-join,PROD)",
"changeType": "UPSERT",
"aspectName": "subTypes",
"aspect": {
@@ -451,14 +459,12 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_native-query-with-join,PROD)",
"changeType": "UPSERT",
- "aspectName": "viewProperties",
+ "aspectName": "status",
"aspect": {
"json": {
- "materialized": false,
- "viewLogic": "let\n Source = PostgreSQL.Database(\"localhost\" , \"mics\" ),\n public_order_date = Source{[Schema=\"public\",Item=\"order_date\"]}[Data] \n in \n public_order_date",
- "viewLanguage": "m_query"
+ "removed": false
}
},
"systemMetadata": {
@@ -469,7 +475,7 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_native-query-with-join,PROD)",
"changeType": "UPSERT",
"aspectName": "datasetProperties",
"aspect": {
@@ -478,7 +484,7 @@
"datasetId": "05169CD2-E713-41E6-9600-1D8066D95445"
},
"externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details",
- "name": "postgres_test_table",
+ "name": "snowflake native-query-with-join",
"description": "Library dataset description",
"tags": []
}
@@ -491,12 +497,14 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_native-query-with-join,PROD)",
"changeType": "UPSERT",
- "aspectName": "status",
+ "aspectName": "viewProperties",
"aspect": {
"json": {
- "removed": false
+ "materialized": false,
+ "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"xaa48144.snowflakecomputing.com\",\"GSL_TEST_WH\",[Role=\"ACCOUNTADMIN\"]){[Name=\"GSL_TEST_DB\"]}[Data], \"select A.name from GSL_TEST_DB.PUBLIC.SALES_ANALYST as A inner join GSL_TEST_DB.PUBLIC.SALES_FORECAST as B on A.name = B.name where startswith(A.name, 'mo')\", null, [EnableFolding=true])\nin\n Source",
+ "viewLanguage": "m_query"
}
},
"systemMetadata": {
@@ -507,15 +515,14 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.postgres_test_table,PROD)",
"changeType": "UPSERT",
- "aspectName": "subTypes",
+ "aspectName": "viewProperties",
"aspect": {
"json": {
- "typeNames": [
- "PowerBI Dataset Table",
- "View"
- ]
+ "materialized": false,
+ "viewLogic": "let\n Source = PostgreSQL.Database(\"localhost\" , \"mics\" ),\n public_order_date = Source{[Schema=\"public\",Item=\"order_date\"]}[Data] \n in \n public_order_date",
+ "viewLanguage": "m_query"
}
},
"systemMetadata": {
@@ -525,15 +532,52 @@
}
},
{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)",
+ "entityType": "chart",
+ "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
"changeType": "UPSERT",
- "aspectName": "viewProperties",
+ "aspectName": "chartInfo",
"aspect": {
"json": {
- "materialized": false,
- "viewLogic": "let\n Source = Sql.Database(\"localhost\", \"library\"),\n dbo_book_issue = Source{[Schema=\"dbo\",Item=\"book_issue\"]}[Data]\n in dbo_book_issue",
- "viewLanguage": "m_query"
+ "customProperties": {
+ "createdFrom": "Dataset",
+ "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445",
+ "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details"
+ },
+ "title": "test_tile",
+ "description": "test_tile",
+ "lastModified": {
+ "created": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ },
+ "lastModified": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ }
+ },
+ "inputs": [
+ {
+ "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.public_issue_history,PROD)"
+ },
+ {
+ "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_testtable,PROD)"
+ },
+ {
+ "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_native-query,PROD)"
+ },
+ {
+ "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.big-query-with-parameter,PROD)"
+ },
+ {
+ "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.snowflake_native-query-with-join,PROD)"
+ },
+ {
+ "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.job-history,PROD)"
+ },
+ {
+ "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.postgres_test_table,PROD)"
+ }
+ ]
}
},
"systemMetadata": {
@@ -544,17 +588,17 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.postgres_test_table,PROD)",
"changeType": "UPSERT",
"aspectName": "datasetProperties",
"aspect": {
"json": {
"customProperties": {
- "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed"
+ "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445"
},
- "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details",
- "name": "dbo_book_issue",
- "description": "hr pbi test description",
+ "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details",
+ "name": "postgres_test_table",
+ "description": "Library dataset description",
"tags": []
}
},
@@ -565,13 +609,15 @@
}
},
{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)",
+ "entityType": "chart",
+ "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
"changeType": "UPSERT",
- "aspectName": "status",
+ "aspectName": "subTypes",
"aspect": {
"json": {
- "removed": false
+ "typeNames": [
+ "PowerBI Tile"
+ ]
}
},
"systemMetadata": {
@@ -582,15 +628,12 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.postgres_test_table,PROD)",
"changeType": "UPSERT",
- "aspectName": "subTypes",
+ "aspectName": "status",
"aspect": {
"json": {
- "typeNames": [
- "PowerBI Dataset Table",
- "View"
- ]
+ "removed": false
}
},
"systemMetadata": {
@@ -600,15 +643,15 @@
}
},
{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)",
+ "entityType": "chart",
+ "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
"changeType": "UPSERT",
- "aspectName": "viewProperties",
+ "aspectName": "browsePaths",
"aspect": {
"json": {
- "materialized": false,
- "viewLogic": "let\n Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"select *,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTH_WID) as CD_AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,'-',''))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_PS_CD_RETENTION\", CommandTimeout=#duration(0, 1, 30, 0)]),\n #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"mth_date\", type date}}),\n #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"Month\", each Date.Month([mth_date])),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"TPV Opening\", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n #\"Added Custom1\"",
- "viewLanguage": "m_query"
+ "paths": [
+ "/powerbi/demo-workspace"
+ ]
}
},
"systemMetadata": {
@@ -619,18 +662,15 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.library-dataset.postgres_test_table,PROD)",
"changeType": "UPSERT",
- "aspectName": "datasetProperties",
+ "aspectName": "subTypes",
"aspect": {
"json": {
- "customProperties": {
- "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed"
- },
- "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details",
- "name": "ms_sql_native_table",
- "description": "hr pbi test description",
- "tags": []
+ "typeNames": [
+ "PowerBI Dataset Table",
+ "View"
+ ]
}
},
"systemMetadata": {
@@ -640,8 +680,49 @@
}
},
{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)",
+ "entityType": "chart",
+ "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
+ "changeType": "UPSERT",
+ "aspectName": "chartKey",
+ "aspect": {
+ "json": {
+ "dashboardTool": "powerbi",
+ "chartId": "myPlatformInstance.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "chart",
+ "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
+ "changeType": "UPSERT",
+ "aspectName": "browsePathsV2",
+ "aspect": {
+ "json": {
+ "path": [
+ {
+ "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:powerbi,myPlatformInstance)",
+ "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:powerbi,myPlatformInstance)"
+ },
+ {
+ "id": "demo-workspace"
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "chart",
+ "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
@@ -657,7 +738,7 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.hr_pbi_test.dbo_book_issue,PROD)",
"changeType": "UPSERT",
"aspectName": "subTypes",
"aspect": {
@@ -675,13 +756,13 @@
}
},
{
- "entityType": "corpuser",
- "entityUrn": "urn:li:corpuser:users.User1@foo.com",
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.hr_pbi_test.dbo_book_issue,PROD)",
"changeType": "UPSERT",
- "aspectName": "corpUserKey",
+ "aspectName": "status",
"aspect": {
"json": {
- "username": "User1@foo.com"
+ "removed": false
}
},
"systemMetadata": {
@@ -691,13 +772,19 @@
}
},
{
- "entityType": "corpuser",
- "entityUrn": "urn:li:corpuser:users.User2@foo.com",
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.hr_pbi_test.dbo_book_issue,PROD)",
"changeType": "UPSERT",
- "aspectName": "corpUserKey",
+ "aspectName": "datasetProperties",
"aspect": {
"json": {
- "username": "User2@foo.com"
+ "customProperties": {
+ "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed"
+ },
+ "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details",
+ "name": "dbo_book_issue",
+ "description": "hr pbi test description",
+ "tags": []
}
},
"systemMetadata": {
@@ -707,51 +794,33 @@
}
},
{
- "entityType": "chart",
- "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.hr_pbi_test.dbo_book_issue,PROD)",
"changeType": "UPSERT",
- "aspectName": "chartInfo",
+ "aspectName": "viewProperties",
"aspect": {
"json": {
- "customProperties": {
- "createdFrom": "Dataset",
- "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445",
- "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details"
- },
- "title": "test_tile",
- "description": "test_tile",
- "lastModified": {
- "created": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- },
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- },
- "inputs": [
- {
- "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)"
- },
- {
- "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_testtable,DEV)"
- },
- {
- "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)"
- },
- {
- "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)"
- },
- {
- "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)"
- },
- {
- "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)"
- },
- {
- "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)"
- }
+ "materialized": false,
+ "viewLogic": "let\n Source = Sql.Database(\"localhost\", \"library\"),\n dbo_book_issue = Source{[Schema=\"dbo\",Item=\"book_issue\"]}[Data]\n in dbo_book_issue",
+ "viewLanguage": "m_query"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.hr_pbi_test.ms_sql_native_table,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "subTypes",
+ "aspect": {
+ "json": {
+ "typeNames": [
+ "PowerBI Dataset Table",
+ "View"
]
}
},
@@ -762,8 +831,8 @@
}
},
{
- "entityType": "chart",
- "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.hr_pbi_test.ms_sql_native_table,PROD)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
@@ -778,15 +847,19 @@
}
},
{
- "entityType": "chart",
- "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.hr_pbi_test.ms_sql_native_table,PROD)",
"changeType": "UPSERT",
- "aspectName": "subTypes",
+ "aspectName": "datasetProperties",
"aspect": {
"json": {
- "typeNames": [
- "PowerBI Tile"
- ]
+ "customProperties": {
+ "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed"
+ },
+ "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details",
+ "name": "ms_sql_native_table",
+ "description": "hr pbi test description",
+ "tags": []
}
},
"systemMetadata": {
@@ -796,14 +869,15 @@
}
},
{
- "entityType": "chart",
- "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.hr_pbi_test.ms_sql_native_table,PROD)",
"changeType": "UPSERT",
- "aspectName": "chartKey",
+ "aspectName": "viewProperties",
"aspect": {
"json": {
- "dashboardTool": "powerbi",
- "chartId": "charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0"
+ "materialized": false,
+ "viewLogic": "let\n Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"select *,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTH_WID) as CD_AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,'-',''))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_PS_CD_RETENTION\", CommandTimeout=#duration(0, 1, 30, 0)]),\n #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"mth_date\", type date}}),\n #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"Month\", each Date.Month([mth_date])),\n #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"TPV Opening\", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n #\"Added Custom1\"",
+ "viewLanguage": "m_query"
}
},
"systemMetadata": {
@@ -813,15 +887,27 @@
}
},
{
- "entityType": "chart",
- "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
+ "entityType": "dashboard",
+ "entityUrn": "urn:li:dashboard:(powerbi,myPlatformInstance.dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
"changeType": "UPSERT",
- "aspectName": "browsePaths",
+ "aspectName": "ownership",
"aspect": {
"json": {
- "paths": [
- "/powerbi/demo-workspace"
- ]
+ "owners": [
+ {
+ "owner": "urn:li:corpuser:users.User1@foo.com",
+ "type": "NONE"
+ },
+ {
+ "owner": "urn:li:corpuser:users.User2@foo.com",
+ "type": "NONE"
+ }
+ ],
+ "ownerTypes": {},
+ "lastModified": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ }
}
},
"systemMetadata": {
@@ -831,17 +917,14 @@
}
},
{
- "entityType": "chart",
- "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
+ "entityType": "dashboard",
+ "entityUrn": "urn:li:dashboard:(powerbi,myPlatformInstance.dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
"changeType": "UPSERT",
- "aspectName": "browsePathsV2",
+ "aspectName": "dashboardKey",
"aspect": {
"json": {
- "path": [
- {
- "id": "demo-workspace"
- }
- ]
+ "dashboardTool": "powerbi",
+ "dashboardId": "powerbi.linkedin.com/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE"
}
},
"systemMetadata": {
@@ -852,7 +935,7 @@
},
{
"entityType": "chart",
- "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)",
+ "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.23212598-23b5-4980-87cc-5fc0ecd84385)",
"changeType": "UPSERT",
"aspectName": "chartInfo",
"aspect": {
@@ -876,10 +959,10 @@
},
"inputs": [
{
- "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)"
+ "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.hr_pbi_test.dbo_book_issue,PROD)"
},
{
- "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)"
+ "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,myplatforminstance.hr_pbi_test.ms_sql_native_table,PROD)"
}
]
}
@@ -892,7 +975,7 @@
},
{
"entityType": "chart",
- "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)",
+ "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.23212598-23b5-4980-87cc-5fc0ecd84385)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
@@ -907,43 +990,8 @@
}
},
{
- "entityType": "chart",
- "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)",
- "changeType": "UPSERT",
- "aspectName": "chartKey",
- "aspect": {
- "json": {
- "dashboardTool": "powerbi",
- "chartId": "charts.23212598-23b5-4980-87cc-5fc0ecd84385"
- }
- },
- "systemMetadata": {
- "lastObserved": 1643871600000,
- "runId": "powerbi-test",
- "lastRunId": "no-run-id-provided"
- }
-},
-{
- "entityType": "chart",
- "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)",
- "changeType": "UPSERT",
- "aspectName": "subTypes",
- "aspect": {
- "json": {
- "typeNames": [
- "PowerBI Tile"
- ]
- }
- },
- "systemMetadata": {
- "lastObserved": 1643871600000,
- "runId": "powerbi-test",
- "lastRunId": "no-run-id-provided"
- }
-},
-{
- "entityType": "chart",
- "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)",
+ "entityType": "dashboard",
+ "entityUrn": "urn:li:dashboard:(powerbi,myPlatformInstance.dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
"changeType": "UPSERT",
"aspectName": "browsePaths",
"aspect": {
@@ -961,15 +1009,13 @@
},
{
"entityType": "chart",
- "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)",
+ "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.23212598-23b5-4980-87cc-5fc0ecd84385)",
"changeType": "UPSERT",
- "aspectName": "browsePathsV2",
+ "aspectName": "subTypes",
"aspect": {
"json": {
- "path": [
- {
- "id": "demo-workspace"
- }
+ "typeNames": [
+ "PowerBI Tile"
]
}
},
@@ -980,15 +1026,14 @@
}
},
{
- "entityType": "dashboard",
- "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
+ "entityType": "chart",
+ "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.23212598-23b5-4980-87cc-5fc0ecd84385)",
"changeType": "UPSERT",
- "aspectName": "browsePaths",
+ "aspectName": "chartKey",
"aspect": {
"json": {
- "paths": [
- "/powerbi/demo-workspace"
- ]
+ "dashboardTool": "powerbi",
+ "chartId": "myPlatformInstance.charts.23212598-23b5-4980-87cc-5fc0ecd84385"
}
},
"systemMetadata": {
@@ -999,7 +1044,7 @@
},
{
"entityType": "dashboard",
- "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
+ "entityUrn": "urn:li:dashboard:(powerbi,myPlatformInstance.dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
"changeType": "PATCH",
"aspectName": "dashboardInfo",
"aspect": {
@@ -1031,13 +1076,13 @@
},
{
"op": "add",
- "path": "/charts/urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
- "value": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)"
+ "path": "/charts/urn:li:chart:(powerbi,myPlatformInstance.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
+ "value": "urn:li:chart:(powerbi,myPlatformInstance.charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)"
},
{
"op": "add",
- "path": "/charts/urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)",
- "value": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)"
+ "path": "/charts/urn:li:chart:(powerbi,myPlatformInstance.charts.23212598-23b5-4980-87cc-5fc0ecd84385)",
+ "value": "urn:li:chart:(powerbi,myPlatformInstance.charts.23212598-23b5-4980-87cc-5fc0ecd84385)"
},
{
"op": "add",
@@ -1067,30 +1112,21 @@
}
},
{
- "entityType": "dashboard",
- "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
- "changeType": "UPSERT",
- "aspectName": "status",
- "aspect": {
- "json": {
- "removed": false
- }
- },
- "systemMetadata": {
- "lastObserved": 1643871600000,
- "runId": "powerbi-test",
- "lastRunId": "no-run-id-provided"
- }
-},
-{
- "entityType": "dashboard",
- "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
+ "entityType": "chart",
+ "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.23212598-23b5-4980-87cc-5fc0ecd84385)",
"changeType": "UPSERT",
- "aspectName": "dashboardKey",
+ "aspectName": "browsePathsV2",
"aspect": {
"json": {
- "dashboardTool": "powerbi",
- "dashboardId": "powerbi.linkedin.com/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE"
+ "path": [
+ {
+ "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:powerbi,myPlatformInstance)",
+ "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:powerbi,myPlatformInstance)"
+ },
+ {
+ "id": "demo-workspace"
+ }
+ ]
}
},
"systemMetadata": {
@@ -1100,27 +1136,15 @@
}
},
{
- "entityType": "dashboard",
- "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
+ "entityType": "chart",
+ "entityUrn": "urn:li:chart:(powerbi,myPlatformInstance.charts.23212598-23b5-4980-87cc-5fc0ecd84385)",
"changeType": "UPSERT",
- "aspectName": "ownership",
+ "aspectName": "browsePaths",
"aspect": {
"json": {
- "owners": [
- {
- "owner": "urn:li:corpuser:users.User1@foo.com",
- "type": "NONE"
- },
- {
- "owner": "urn:li:corpuser:users.User2@foo.com",
- "type": "NONE"
- }
- ],
- "ownerTypes": {},
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
+ "paths": [
+ "/powerbi/demo-workspace"
+ ]
}
},
"systemMetadata": {
@@ -1131,12 +1155,16 @@
},
{
"entityType": "dashboard",
- "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
+ "entityUrn": "urn:li:dashboard:(powerbi,myPlatformInstance.dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
"changeType": "UPSERT",
"aspectName": "browsePathsV2",
"aspect": {
"json": {
"path": [
+ {
+ "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:powerbi,myPlatformInstance)",
+ "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:powerbi,myPlatformInstance)"
+ },
{
"id": "demo-workspace"
}
@@ -1150,24 +1178,8 @@
}
},
{
- "entityType": "corpuser",
- "entityUrn": "urn:li:corpuser:users.User1@foo.com",
- "changeType": "UPSERT",
- "aspectName": "status",
- "aspect": {
- "json": {
- "removed": false
- }
- },
- "systemMetadata": {
- "lastObserved": 1643871600000,
- "runId": "powerbi-test",
- "lastRunId": "no-run-id-provided"
- }
-},
-{
- "entityType": "corpuser",
- "entityUrn": "urn:li:corpuser:users.User2@foo.com",
+ "entityType": "dashboard",
+ "entityUrn": "urn:li:dashboard:(powerbi,myPlatformInstance.dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
diff --git a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
index 6a95ec2c1dda4..23b23ecada0d4 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
@@ -819,6 +819,8 @@ def test_powerbi_ingest_urn_lower_case(
"type": "powerbi",
"config": {
**default_source_config(),
+ "env": "PROD",
+ "platform_instance": "myPlatformInstance",
"convert_urns_to_lowercase": True,
"convert_lineage_urns_to_lowercase": True,
},
From 8bea5d2a3da503b5d4381bbf6dd0f9c0d2ce8d7b Mon Sep 17 00:00:00 2001
From: sid-acryl <155424659+sid-acryl@users.noreply.github.com>
Date: Wed, 7 Aug 2024 09:33:14 +0530
Subject: [PATCH 02/72] feat(ingest/lookml): CLL support for sql set in
sql_table_name attribute of lookml view (#11069)
---
.../ingestion/source/looker/looker_source.py | 21 --
.../source/looker/looker_template_language.py | 17 +-
.../source/looker/lookml_concept_context.py | 70 ++++-
.../ingestion/source/looker/view_upstream.py | 51 +++-
.../looker/golden_test_ingest.json | 34 ---
.../data.model.lkml | 4 +
.../rent_as_employee_income_source.view.lkml | 27 ++
.../vv_lineage_liquid_template_golden.json | 248 ++++++++++++++++++
8 files changed, 394 insertions(+), 78 deletions(-)
create mode 100644 metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/rent_as_employee_income_source.view.lkml
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py
index d61458d8e924a..ef329da930dda 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py
@@ -96,13 +96,11 @@
ChartTypeClass,
ContainerClass,
DashboardInfoClass,
- DataPlatformInfoClass,
InputFieldClass,
InputFieldsClass,
OwnerClass,
OwnershipClass,
OwnershipTypeClass,
- PlatformTypeClass,
SubTypesClass,
)
from datahub.utilities.backpressure_aware_executor import BackpressureAwareExecutor
@@ -1573,25 +1571,6 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
looker_dashboards_for_usage: List[looker_usage.LookerDashboardForUsage] = []
- # Emit platform instance entity
- if self.source_config.platform_instance:
- platform_instance_urn = builder.make_dataplatform_instance_urn(
- platform=self.source_config.platform_name,
- instance=self.source_config.platform_instance,
- )
-
- yield MetadataWorkUnit(
- id=f"{platform_instance_urn}-aspect-dataplatformInfo",
- mcp=MetadataChangeProposalWrapper(
- entityUrn=platform_instance_urn,
- aspect=DataPlatformInfoClass(
- name=self.source_config.platform_instance,
- type=PlatformTypeClass.OTHERS,
- datasetNameDelimiter=".",
- ),
- ),
- )
-
with self.reporter.report_stage("dashboard_chart_metadata"):
for job in BackpressureAwareExecutor.map(
self.process_dashboard,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py
index 2c523fcd98d08..99f83b5e922ba 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py
@@ -10,9 +10,6 @@
create_template,
)
from datahub.ingestion.source.looker.lookml_config import DERIVED_VIEW_PATTERN
-from datahub.ingestion.source.looker.str_functions import (
- remove_extra_spaces_and_newlines,
-)
logger = logging.getLogger(__name__)
@@ -95,6 +92,11 @@ def resolve_liquid_variable(text: str, liquid_variable: Dict[Any, Any]) -> str:
return text
+def _drop_derived_view_pattern(value: str) -> str:
+ # Drop ${ and }
+ return re.sub(DERIVED_VIEW_PATTERN, r"\1", value)
+
+
def _complete_incomplete_sql(raw_view: dict, sql: str) -> str:
# Looker supports sql fragments that omit the SELECT and FROM parts of the query
@@ -109,8 +111,7 @@ def _complete_incomplete_sql(raw_view: dict, sql: str) -> str:
# add a FROM clause at the end
sql_query = f"{sql_query} FROM {raw_view['name']}"
- # Drop ${ and }
- return re.sub(DERIVED_VIEW_PATTERN, r"\1", sql_query)
+ return _drop_derived_view_pattern(sql_query)
def resolve_liquid_variable_in_view_dict(
@@ -122,10 +123,14 @@ def resolve_liquid_variable_in_view_dict(
for view in raw_view["views"]:
if "sql_table_name" in view:
view["datahub_transformed_sql_table_name"] = resolve_liquid_variable(
- text=remove_extra_spaces_and_newlines(view["sql_table_name"]),
+ text=view["sql_table_name"],
liquid_variable=liquid_variable,
) # keeping original sql_table_name as is to avoid any visualization issue later
+ view["datahub_transformed_sql_table_name"] = _drop_derived_view_pattern(
+ value=view["datahub_transformed_sql_table_name"]
+ )
+
if "derived_table" in view and "sql" in view["derived_table"]:
# In sql we don't need to remove the extra spaces as sql parser takes care of extra spaces and \n
# while generating URN from sql
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py
index a83aa2638ec96..7805b8b7b7d9a 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py
@@ -11,12 +11,14 @@
from datahub.ingestion.source.looker.looker_dataclasses import LookerViewFile
from datahub.ingestion.source.looker.looker_file_loader import LookerViewFileLoader
from datahub.ingestion.source.looker.lookml_config import (
- DERIVED_VIEW_PATTERN,
DERIVED_VIEW_SUFFIX,
NAME,
LookMLSourceReport,
)
from datahub.ingestion.source.looker.lookml_refinement import LookerRefinementResolver
+from datahub.ingestion.source.looker.str_functions import (
+ remove_extra_spaces_and_newlines,
+)
logger = logging.getLogger(__name__)
@@ -56,7 +58,7 @@ def column_name_in_sql_attribute(self) -> List[str]:
class LookerViewContext:
"""
- There are six patterns to associate the view's fields with dataset
+ There are seven patterns to associate the view's fields with dataset
Pattern1:
view: view_name {
@@ -161,6 +163,36 @@ class LookerViewContext:
For all possible options of "sql" attribute please refer looker doc:
https://cloud.google.com/looker/docs/reference/param-field-sql
+ For pattern 6 i.e. view.derived.sql, The looker creates a temporary table to store the sql result,
+ However if we don't want to have a temporary table and want looker to always execute the sql to fetch the result then
+ in that case pattern 7 is useful (mentioned below).
+
+ Pattern7:
+ view: customer_sales {
+ sql_table_name: (
+ SELECT
+ customer_id,
+ SUM(sales_amount) AS total_sales
+ FROM
+ sales
+ GROUP BY
+ customer_id
+ ) ;;
+
+ dimension: customer_id {
+ sql: ${TABLE}.customer_id ;;
+ }
+
+ measure: total_sales {
+ type: sum
+ sql: ${TABLE}.total_sales ;;
+ }
+ }
+
+
+ In Pattern7 the fields' upstream dataset is the output of sql mentioned in
+ customer_sales.sql_table_name.
+
"""
raw_view: Dict
@@ -252,6 +284,7 @@ def _get_sql_table_name_field(self) -> Optional[str]:
return self.get_including_extends(field="sql_table_name")
def _is_dot_sql_table_name_present(self) -> bool:
+
sql_table_name: Optional[str] = self._get_sql_table_name_field()
if sql_table_name is None:
@@ -268,7 +301,7 @@ def sql_table_name(self) -> str:
if sql_table_name is None:
sql_table_name = self.raw_view[NAME].lower()
- return sql_table_name
+ return sql_table_name.lower()
def datahub_transformed_sql_table_name(self) -> str:
table_name: Optional[str] = self.raw_view.get(
@@ -278,13 +311,13 @@ def datahub_transformed_sql_table_name(self) -> str:
if not table_name:
table_name = self.sql_table_name()
- # sql_table_name is in the format "${view-name}.SQL_TABLE_NAME"
- # remove extra characters
- if self._is_dot_sql_table_name_present():
- table_name = re.sub(DERIVED_VIEW_PATTERN, r"\1", table_name)
+ # remove extra spaces and new lines from sql_table_name if it is not a sql
+ if not self.is_direct_sql_query_case():
+ table_name = remove_extra_spaces_and_newlines(table_name)
+ # Some sql_table_name fields contain quotes like: optimizely."group", just remove the quotes
+ table_name = table_name.replace('"', "").replace("`", "").lower()
- # Some sql_table_name fields contain quotes like: optimizely."group", just remove the quotes
- return table_name.replace('"', "").replace("`", "").lower()
+ return table_name
def derived_table(self) -> Dict[Any, Any]:
"""
@@ -371,6 +404,11 @@ def is_materialized_derived_view(self) -> bool:
def is_regular_case(self) -> bool:
# regular-case is pattern1 and 2 where upstream table is either view-name or
# table name mentioned in sql_table_name attribute
+
+ # It should not be the sql query
+ if self.is_direct_sql_query_case():
+ return False
+
if (
self.is_sql_table_name_referring_to_view()
or self.is_sql_based_derived_case()
@@ -381,6 +419,9 @@ def is_regular_case(self) -> bool:
return True
def is_sql_table_name_referring_to_view(self) -> bool:
+ if self.is_direct_sql_query_case():
+ return False
+
# It is pattern3
return self._is_dot_sql_table_name_present()
@@ -413,3 +454,14 @@ def is_sql_based_derived_view_without_fields_case(self) -> bool:
return True
return False
+
+ def is_direct_sql_query_case(self) -> bool:
+ # pattern 7
+ # sqlglot doesn't have a function to validate whether text is valid SQL or not.
+ # Applying a simple logic to check if sql_table_name contains a sql.
+ # if sql_table_name contains sql then its value starts with "(" and checking if "select" is present in side the
+ # text
+ return (
+ self.sql_table_name().strip().startswith("(")
+ and "select" in self.sql_table_name()
+ )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py b/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py
index 98646e19a7014..d5929b52aea3a 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py
@@ -237,7 +237,7 @@ def create_fields(self) -> List[ViewField]:
return [] # it is for the special case
-class SqlBasedDerivedViewUpstream(AbstractViewUpstream):
+class SqlBasedDerivedViewUpstream(AbstractViewUpstream, ABC):
"""
Handle the case where upstream dataset is defined in derived_table.sql
"""
@@ -263,7 +263,7 @@ def __get_spr(self) -> Optional[SqlParsingResult]:
return None
spr = create_lineage_sql_parsed_result(
- query=self.view_context.datahub_transformed_sql(),
+ query=self.get_sql_query(),
default_schema=self.view_context.view_connection.default_schema,
default_db=self.view_context.view_connection.default_db,
platform=self.view_context.view_connection.platform,
@@ -390,6 +390,28 @@ def get_upstream_column_ref(
def get_upstream_dataset_urn(self) -> List[Urn]:
return self._get_upstream_dataset_urn()
+ @abstractmethod
+ def get_sql_query(self) -> str:
+ pass
+
+
+class DirectQueryUpstreamSource(SqlBasedDerivedViewUpstream):
+ """
+ Pattern 7 as per view-context documentation
+ """
+
+ def get_sql_query(self) -> str:
+ return self.view_context.datahub_transformed_sql_table_name()
+
+
+class DerivedQueryUpstreamSource(SqlBasedDerivedViewUpstream):
+ """
+ Pattern 4 as per view-context documentation
+ """
+
+ def get_sql_query(self) -> str:
+ return self.view_context.datahub_transformed_sql()
+
class NativeDerivedViewUpstream(AbstractViewUpstream):
"""
@@ -611,6 +633,7 @@ def create_view_upstream(
ctx: PipelineContext,
reporter: LookMLSourceReport,
) -> AbstractViewUpstream:
+
if view_context.is_regular_case():
return RegularViewUpstream(
view_context=view_context,
@@ -629,11 +652,23 @@ def create_view_upstream(
looker_view_id_cache=looker_view_id_cache,
)
- if (
- view_context.is_sql_based_derived_case()
- or view_context.is_sql_based_derived_view_without_fields_case()
+ if any(
+ [
+ view_context.is_sql_based_derived_case(),
+ view_context.is_sql_based_derived_view_without_fields_case(),
+ ]
):
- return SqlBasedDerivedViewUpstream(
+
+ return DerivedQueryUpstreamSource(
+ view_context=view_context,
+ config=config,
+ reporter=reporter,
+ ctx=ctx,
+ looker_view_id_cache=looker_view_id_cache,
+ )
+
+ if view_context.is_direct_sql_query_case():
+ return DirectQueryUpstreamSource(
view_context=view_context,
config=config,
reporter=reporter,
@@ -651,9 +686,9 @@ def create_view_upstream(
)
reporter.report_warning(
- title="Implementation Not Found",
+ title="ViewUpstream Implementation Not Found",
message="No implementation found to resolve upstream of the view",
- context=view_context.view_file_name(),
+ context=f"view_name={view_context.name()} , view_file_name={view_context.view_file_name()}",
)
return EmptyImplementation(
diff --git a/metadata-ingestion/tests/integration/looker/golden_test_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_ingest.json
index 76c8f04e8447a..9c0363e0892f0 100644
--- a/metadata-ingestion/tests/integration/looker/golden_test_ingest.json
+++ b/metadata-ingestion/tests/integration/looker/golden_test_ingest.json
@@ -1,22 +1,4 @@
[
-{
- "entityType": "dataPlatformInstance",
- "entityUrn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)",
- "changeType": "UPSERT",
- "aspectName": "dataPlatformInfo",
- "aspect": {
- "json": {
- "name": "ap-south-1",
- "type": "OTHERS",
- "datasetNameDelimiter": "."
- }
- },
- "systemMetadata": {
- "lastObserved": 1586847600000,
- "runId": "looker-test",
- "lastRunId": "no-run-id-provided"
- }
-},
{
"entityType": "container",
"entityUrn": "urn:li:container:e7fe6fc9c3ca70e78694dcc5dd9c05b7",
@@ -805,22 +787,6 @@
"lastRunId": "no-run-id-provided"
}
},
-{
- "entityType": "dataPlatformInstance",
- "entityUrn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)",
- "changeType": "UPSERT",
- "aspectName": "status",
- "aspect": {
- "json": {
- "removed": false
- }
- },
- "systemMetadata": {
- "lastObserved": 1586847600000,
- "runId": "looker-test",
- "lastRunId": "no-run-id-provided"
- }
-},
{
"entityType": "tag",
"entityUrn": "urn:li:tag:Dimension",
diff --git a/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/data.model.lkml b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/data.model.lkml
index 6eb92d749c9f7..2cc6ae994d245 100644
--- a/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/data.model.lkml
+++ b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/data.model.lkml
@@ -6,6 +6,7 @@ include: "employee_total_income.view.lkml"
include: "top_10_employee_income_source.view.lkml"
include: "employee_tax_report.view.lkml"
include: "employee_salary_rating.view.lkml"
+include: "rent_as_employee_income_source.view.lkml"
explore: activity_logs {
}
@@ -23,4 +24,7 @@ explore: employee_tax_report {
}
explore: employee_salary_rating {
+}
+
+explore: rent_as_employee_income_source {
}
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/rent_as_employee_income_source.view.lkml b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/rent_as_employee_income_source.view.lkml
new file mode 100644
index 0000000000000..40b6e3642f3b3
--- /dev/null
+++ b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/rent_as_employee_income_source.view.lkml
@@ -0,0 +1,27 @@
+view: rent_as_employee_income_source {
+ sql_table_name: (
+ SELECT id,
+ name,
+ source
+ FROM ${employee_income_source.SQL_TABLE_NAME}
+ WHERE source = "RENT"
+ ORDER BY source desc
+ LIMIT 10
+ );;
+
+
+ dimension: id {
+ type: number
+ sql: ${TABLE}.id ;;
+ }
+
+ dimension: name {
+ type: string
+ sql: ${TABLE}.name ;;
+ }
+
+ dimension: source {
+ type: string
+ sql: ${TABLE}.source ;;
+ }
+}
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/lookml/vv_lineage_liquid_template_golden.json b/metadata-ingestion/tests/integration/lookml/vv_lineage_liquid_template_golden.json
index d12ced5e42506..2e55971b65bd4 100644
--- a/metadata-ingestion/tests/integration/lookml/vv_lineage_liquid_template_golden.json
+++ b/metadata-ingestion/tests/integration/lookml/vv_lineage_liquid_template_golden.json
@@ -1580,6 +1580,254 @@
"lastRunId": "no-run-id-provided"
}
},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.rent_as_employee_income_source,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "subTypes",
+ "aspect": {
+ "json": {
+ "typeNames": [
+ "View"
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "lookml-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.rent_as_employee_income_source,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "viewProperties",
+ "aspect": {
+ "json": {
+ "materialized": false,
+ "viewLogic": "view: rent_as_employee_income_source {\n sql_table_name: (\n SELECT id,\n name,\n source\n FROM ${employee_income_source.SQL_TABLE_NAME}\n WHERE source = \"RENT\"\n ORDER BY source desc\n LIMIT 10\n );;\n\n\n dimension: id {\n type: number\n sql: ${TABLE}.id ;;\n }\n\n dimension: name {\n type: string\n sql: ${TABLE}.name ;;\n }\n\n dimension: source {\n type: string\n sql: ${TABLE}.source ;;\n }\n}",
+ "viewLanguage": "lookml"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "lookml-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.rent_as_employee_income_source,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "container",
+ "aspect": {
+ "json": {
+ "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "lookml-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "proposedSnapshot": {
+ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+ "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.rent_as_employee_income_source,PROD)",
+ "aspects": [
+ {
+ "com.linkedin.pegasus2avro.common.BrowsePaths": {
+ "paths": [
+ "/Develop/lkml_samples/"
+ ]
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.Status": {
+ "removed": false
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.dataset.UpstreamLineage": {
+ "upstreams": [
+ {
+ "auditStamp": {
+ "time": 1586847600000,
+ "actor": "urn:li:corpuser:datahub"
+ },
+ "dataset": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_income_source,PROD)",
+ "type": "VIEW"
+ }
+ ],
+ "fineGrainedLineages": [
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_income_source,PROD),id)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.rent_as_employee_income_source,PROD),id)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_income_source,PROD),name)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.rent_as_employee_income_source,PROD),name)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_income_source,PROD),source)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.rent_as_employee_income_source,PROD),source)"
+ ],
+ "confidenceScore": 1.0
+ }
+ ]
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+ "schemaName": "rent_as_employee_income_source",
+ "platform": "urn:li:dataPlatform:looker",
+ "version": 0,
+ "created": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ },
+ "lastModified": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ },
+ "hash": "",
+ "platformSchema": {
+ "com.linkedin.pegasus2avro.schema.OtherSchema": {
+ "rawSchema": ""
+ }
+ },
+ "fields": [
+ {
+ "fieldPath": "id",
+ "nullable": false,
+ "description": "",
+ "label": "",
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.NumberType": {}
+ }
+ },
+ "nativeDataType": "number",
+ "recursive": false,
+ "globalTags": {
+ "tags": [
+ {
+ "tag": "urn:li:tag:Dimension"
+ }
+ ]
+ },
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "name",
+ "nullable": false,
+ "description": "",
+ "label": "",
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "globalTags": {
+ "tags": [
+ {
+ "tag": "urn:li:tag:Dimension"
+ }
+ ]
+ },
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "source",
+ "nullable": false,
+ "description": "",
+ "label": "",
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "globalTags": {
+ "tags": [
+ {
+ "tag": "urn:li:tag:Dimension"
+ }
+ ]
+ },
+ "isPartOfKey": false
+ }
+ ],
+ "primaryKeys": []
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+ "customProperties": {
+ "looker.file.path": "rent_as_employee_income_source.view.lkml",
+ "looker.model": "data"
+ },
+ "name": "rent_as_employee_income_source",
+ "tags": []
+ }
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "lookml-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.rent_as_employee_income_source,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "browsePathsV2",
+ "aspect": {
+ "json": {
+ "path": [
+ {
+ "id": "Develop"
+ },
+ {
+ "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e",
+ "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e"
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "lookml-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
{
"entityType": "tag",
"entityUrn": "urn:li:tag:Dimension",
From 40e61f9d6e96cc2c741be654ca9f7adbc868e9c7 Mon Sep 17 00:00:00 2001
From: Hyejin Yoon <0327jane@gmail.com>
Date: Wed, 7 Aug 2024 13:12:02 +0900
Subject: [PATCH 03/72] docs: update graphql docs on forms & structured
properties (#11100)
---
docs/api/tutorials/forms.md | 134 +++++++++++++++++-
docs/api/tutorials/structured-properties.md | 117 ++++++++++++++-
.../feature-guides/documentation-forms.md | 2 +-
3 files changed, 244 insertions(+), 9 deletions(-)
diff --git a/docs/api/tutorials/forms.md b/docs/api/tutorials/forms.md
index 3f28353595be7..eb555910f18eb 100644
--- a/docs/api/tutorials/forms.md
+++ b/docs/api/tutorials/forms.md
@@ -9,16 +9,16 @@ Documentation Forms are a way for end-users to fill out all mandatory attributes
Learn more about forms in the [Documentation Forms Feature Guide](../../../docs/features/feature-guides/documentation-forms.md).
-
### Goal Of This Guide
-This guide will show you how to create and read forms.
+This guide will show you how to
+- Create, Update, Read, and Delete a form
+- Assign and Remove a form from entities
## Prerequisites
For this tutorial, you need to deploy DataHub Quickstart and ingest sample data.
For detailed information, please refer to [Datahub Quickstart Guide](/docs/quickstart.md).
-
@@ -29,14 +29,45 @@ Connect to your instance via [init](https://datahubproject.io/docs/cli/#init):
2. Set the server to your sandbox instance, `https://{your-instance-address}/gms`
3. Set the token to your access token
-
-
## Create a Form
+
+
+```graphql
+mutation createForm {
+ createForm(
+ input: {
+ id: "metadataInitiative2024",
+ name: "Metadata Initiative 2024",
+ description: "How we want to ensure the most important data assets in our organization have all of the most important and expected pieces of metadata filled out",
+ type: VERIFICATION,
+ prompts: [
+ {
+ id: "123",
+ title: "retentionTime",
+ description: "Apply Retention Time structured property to form",
+ type: STRUCTURED_PROPERTY,
+ structuredPropertyParams: {
+ urn: "urn:li:structuredProperty:retentionTime"
+ }
+ }
+ ],
+ actors: {
+ users: ["urn:li:corpuser:jane@email.com", "urn:li:corpuser:john@email.com"],
+ groups: ["urn:li:corpGroup:team@email.com"]
+ }
+ }
+ ) {
+ urn
+ }
+}
+```
+
+
Create a yaml file representing the forms you’d like to load.
@@ -111,8 +142,42 @@ If successful, you should see `Created form urn:li:form:...`
-## Read Property Definition
+## Update Form
+
+
+
+```graphql
+mutation updateForm {
+ updateForm(
+ input: {
+ urn: "urn:li:form:metadataInitiative2024",
+ name: "Metadata Initiative 2024",
+ description: "How we want to ensure the most important data assets in our organization have all of the most important and expected pieces of metadata filled out",
+ type: VERIFICATION,
+ promptsToAdd: [
+ {
+ id: "456",
+ title: "deprecationDate",
+ description: "Deprecation date for dataset",
+ type: STRUCTURED_PROPERTY,
+ structuredPropertyParams: {
+ urn: "urn:li:structuredProperty:deprecationDate"
+ }
+ }
+ ]
+ promptsToRemove: ["123"]
+ }
+ ) {
+ urn
+ }
+}
+```
+
+
+
+
+## Read Property Definition
@@ -146,3 +211,60 @@ If successful, you should see metadata about your form returned like below.
+
+## Delete Form
+
+
+
+
+```graphql
+mutation deleteForm {
+ deleteForm(
+ input: {
+ urn: "urn:li:form:metadataInitiative2024"
+ }
+ )
+}
+```
+
+
+
+## Assign Form to Entities
+
+For assigning a form to a given list of entities:
+
+
+
+
+```graphql
+mutation batchAssignForm {
+ batchAssignForm(
+ input: {
+ formUrn: "urn:li:form:myform",
+ entityUrns: ["urn:li:dataset:mydataset1", "urn:li:dataset:mydataset2"]
+ }
+ )
+}
+```
+
+
+
+## Remove Form from Entities
+
+For removing a form from a given list of entities:
+
+
+
+
+```graphql
+mutation batchRemoveForm {
+ batchRemoveForm(
+ input: {
+ formUrn: "urn:li:form:myform",
+ entityUrns: ["urn:li:dataset:mydataset1", "urn:li:dataset:mydataset2"]
+ }
+ )
+}
+```
+
+
diff --git a/docs/api/tutorials/structured-properties.md b/docs/api/tutorials/structured-properties.md
index c56a2848638fc..6f6c6541554d9 100644
--- a/docs/api/tutorials/structured-properties.md
+++ b/docs/api/tutorials/structured-properties.md
@@ -56,7 +56,33 @@ Requirements for OpenAPI are:
The following code will create a structured property `io.acryl.privacy.retentionTime`.
-
+
+
+```graphql
+mutation createStructuredProperty {
+ createStructuredProperty(
+ input: {
+ id: "retentionTime",
+ qualifiedName:"retentionTime",
+ displayName: "Retention Time",
+ description: "Retention Time is used to figure out how long to retain records in a dataset",
+ valueType: "urn:li:dataType:number",
+ allowedValues: [
+ {numberValue: 30, description: "30 days, usually reserved for datasets that are ephemeral and contain pii"},
+ {numberValue: 90, description:"description: Use this for datasets that drive monthly reporting but contain pii"},
+ {numberValue: 365, description:"Use this for non-sensitive data that can be retained for longer"}
+ ],
+ cardinality: SINGLE,
+ entityTypes: ["urn:li:entityType:dataset", "urn:li:entityType:dataFlow"],
+ }
+ ) {
+ urn
+ }
+}
+```
+
+
+
Create a yaml file representing the properties you’d like to load.
For example, below file represents a property `io.acryl.privacy.retentionTime`. You can see the full example [here](https://github.com/datahub-project/datahub/blob/example-yaml-sp/metadata-ingestion/examples/structured_properties/struct_props.yaml).
@@ -355,7 +381,37 @@ Example Response:
This action will set/replace all structured properties on the entity. See PATCH operations to add/remove a single property.
-
+
+
+```graphql
+mutation upsertStructuredProperties {
+ upsertStructuredProperties(
+ input: {
+ assetUrn: "urn:li:mydataset1",
+ structuredPropertyInputParams: [
+ {
+ structuredPropertyUrn: "urn:li:structuredProperty:mystructuredproperty",
+ values: [
+ {
+ stringValue: "123"
+ }
+ ]
+ }
+ ]
+ }
+ ) {
+ properties {
+ structuredProperty {
+ urn
+ }
+ }
+ }
+}
+
+```
+
+
+
You can set structured properties to a dataset by creating a dataset yaml file with structured properties. For example, below is a dataset yaml file with structured properties in both the field and dataset level.
@@ -466,6 +522,31 @@ Or you can run the following command to view the properties associated with the
datahub dataset get --urn {urn}
```
+## Remove Structured Properties From a Dataset
+
+For removing a structured property or list of structured properties from a dataset:
+
+
+
+
+```graphql
+mutation removeStructuredProperties {
+ removeStructuredProperties(
+ input: {
+ assetUrn: "urn:li:mydataset1",
+ structuredPropertyUrns: ["urn:li:structuredProperty:mystructuredproperty"]
+ }
+ ) {
+ properties {
+ structuredProperty {urn}
+ }
+ }
+}
+```
+
+
+
+
## Patch Structured Property Value
This section will show you how to patch a structured property value - either by removing, adding, or upserting a single property.
@@ -780,6 +861,38 @@ You can see that the first property has been removed and the second property is
In this example, we'll add the property back with a different value, preserving the existing property.
+
+
+```graphql
+mutation updateStructuredProperty {
+ updateStructuredProperty(
+ input: {
+ urn: "urn:li:structuredProperty:retentionTime",
+ displayName: "Retention Time",
+ description: "Retention Time is used to figure out how long to retain records in a dataset",
+ newAllowedValues: [
+ {
+ numberValue: 30,
+ description: "30 days, usually reserved for datasets that are ephemeral and contain pii"
+ },
+ {
+ numberValue: 90,
+ description: "Use this for datasets that drive monthly reporting but contain pii"
+ },
+ {
+ numberValue: 365,
+ description: "Use this for non-sensitive data that can be retained for longer"
+ }
+ ]
+ }
+ ) {
+ urn
+ }
+}
+
+```
+
+
```shell
diff --git a/docs/features/feature-guides/documentation-forms.md b/docs/features/feature-guides/documentation-forms.md
index b007892e66094..2edeb8ce302d7 100644
--- a/docs/features/feature-guides/documentation-forms.md
+++ b/docs/features/feature-guides/documentation-forms.md
@@ -101,7 +101,7 @@ You sure can! Please keep in mind that an Asset will only be considered Document
### API Tutorials
-- [Create a Documentation Form](../../../docs/api/tutorials/forms.md)
+- [API Guides on Documentation Form](../../../docs/api/tutorials/forms.md)
:::note
You must create a Structured Property before including it in a Documentation Form.
From 900c25986cb36ca61d723426b4b207a2a67b93aa Mon Sep 17 00:00:00 2001
From: Kunal-kankriya <127090035+Kunal-kankriya@users.noreply.github.com>
Date: Wed, 7 Aug 2024 14:54:58 +0530
Subject: [PATCH 04/72] test(search): search openAPI v3 test (#11049)
---
smoke-test/tests/read_only/test_search.py | 75 +++++++++++++++++------
1 file changed, 57 insertions(+), 18 deletions(-)
diff --git a/smoke-test/tests/read_only/test_search.py b/smoke-test/tests/read_only/test_search.py
index 90385c5228bc1..3b9635f3da2cd 100644
--- a/smoke-test/tests/read_only/test_search.py
+++ b/smoke-test/tests/read_only/test_search.py
@@ -1,10 +1,13 @@
import pytest
+import requests
from tests.test_result_msg import add_datahub_stats
-from tests.utils import get_frontend_session, get_frontend_url
+from tests.utils import get_frontend_session, get_frontend_url, get_gms_url
-restli_default_headers = {
- "X-RestLi-Protocol-Version": "2.0.0",
+BASE_URL_V3 = f"{get_gms_url()}/openapi/v3"
+
+default_headers = {
+ "Content-Type": "application/json",
}
ENTITY_TO_MAP = {
@@ -59,16 +62,8 @@ def _get_search_result(frontend_session, entity: str):
("chart", "chart"),
("dataset", "dataset"),
("dashboard", "dashboard"),
- (
- # Task
- "dataJob",
- "dataJob",
- ),
- (
- # Pipeline
- "dataFlow",
- "dataFlow",
- ),
+ ("dataJob", "dataJob"),
+ ("dataFlow", "dataFlow"),
("container", "container"),
("tag", "tag"),
("corpUser", "corpUser"),
@@ -78,11 +73,7 @@ def _get_search_result(frontend_session, entity: str):
("mlPrimaryKey", "mlPrimaryKey"),
("corpGroup", "corpGroup"),
("mlFeatureTable", "mlFeatureTable"),
- (
- # Term group
- "glossaryNode",
- "glossaryNode",
- ),
+ ("glossaryNode", "glossaryNode"),
("mlModel", "mlModel"),
],
)
@@ -112,8 +103,56 @@ def test_search_works(entity_type, api_name):
""",
"variables": {"input": first_urn},
}
+
response = frontend_session.post(f"{get_frontend_url()}/api/v2/graphql", json=json)
response.raise_for_status()
res_data = response.json()
assert res_data["data"], f"res_data was {res_data}"
assert res_data["data"][api_name]["urn"] == first_urn, f"res_data was {res_data}"
+
+
+@pytest.mark.read_only
+@pytest.mark.parametrize(
+ "entity_type",
+ [
+ "chart",
+ "dataset",
+ "dashboard",
+ "dataJob",
+ "dataFlow",
+ "container",
+ "tag",
+ "corpUser",
+ "mlFeature",
+ "glossaryTerm",
+ "domain",
+ "mlPrimaryKey",
+ "corpGroup",
+ "mlFeatureTable",
+ "glossaryNode",
+ "mlModel",
+ ],
+)
+def test_openapi_v3_entity(entity_type):
+ frontend_session = get_frontend_session()
+ search_result = _get_search_result(frontend_session, entity_type)
+ num_entities = search_result["total"]
+ if num_entities == 0:
+ print(f"[WARN] No results for {entity_type}")
+ return
+ entities = search_result["searchResults"]
+
+ first_urn = entities[0]["entity"]["urn"]
+
+ session = requests.Session()
+ url = f"{BASE_URL_V3}/entity/{entity_type}/{first_urn}"
+ response = session.get(url, headers=default_headers)
+ response.raise_for_status()
+ actual_data = response.json()
+ print(f"Entity Data for URN {first_urn}: {actual_data}")
+
+ expected_data = {"urn": first_urn}
+
+ assert (
+ actual_data["urn"] == expected_data["urn"]
+ ), f"Mismatch: expected {expected_data}, got {actual_data}"
From edb0f19f1b594d662ed12584fe07e6fd348e8f12 Mon Sep 17 00:00:00 2001
From: Harshal Sheth
Date: Wed, 7 Aug 2024 13:50:08 -0700
Subject: [PATCH 05/72] fix(ingest/tableau): prevent empty site content urls
(#11057)
Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
---
.../src/datahub/ingestion/source/tableau.py | 16 ++++++++++++----
1 file changed, 12 insertions(+), 4 deletions(-)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau.py
index 9cde3b1f8d3a0..510cb6c96d1f2 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/tableau.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/tableau.py
@@ -757,6 +757,12 @@ def _re_authenticate(self):
] = self.config.get_tableau_auth(self.site.content_url)
self.server.auth.sign_in(tableau_auth)
+ @property
+ def site_content_url(self) -> Optional[str]:
+ if self.site and self.site.content_url:
+ return self.site.content_url
+ return None
+
def _populate_usage_stat_registry(self) -> None:
if self.server is None:
return
@@ -2524,7 +2530,9 @@ def emit_sheets_as_charts(
last_modified = self.get_last_modified(creator, created_at, updated_at)
if sheet.get(c.PATH):
- site_part = f"/site/{self.site.content_url}" if self.site else ""
+ site_part = (
+ f"/site/{self.site_content_url}" if self.site_content_url else ""
+ )
sheet_external_url = (
f"{self.config.connect_uri}/#{site_part}/views/{sheet.get(c.PATH)}"
)
@@ -2535,7 +2543,7 @@ def emit_sheets_as_charts(
and sheet[c.CONTAINED_IN_DASHBOARDS][0].get(c.PATH)
):
# sheet contained in dashboard
- site_part = f"/t/{self.site.content_url}" if self.site else ""
+ site_part = f"/t/{self.site_content_url}" if self.site_content_url else ""
dashboard_path = sheet[c.CONTAINED_IN_DASHBOARDS][0][c.PATH]
sheet_external_url = f"{self.config.connect_uri}{site_part}/authoring/{dashboard_path}/{quote(sheet.get(c.NAME, ''), safe='')}"
else:
@@ -2667,7 +2675,7 @@ def emit_workbook_as_container(self, workbook: Dict) -> Iterable[MetadataWorkUni
else None
)
- site_part = f"/site/{self.site.content_url}" if self.site else ""
+ site_part = f"/site/{self.site_content_url}" if self.site_content_url else ""
workbook_uri = workbook.get("uri")
workbook_part = (
workbook_uri[workbook_uri.index("/workbooks/") :] if workbook_uri else None
@@ -2826,7 +2834,7 @@ def emit_dashboard(
updated_at = dashboard.get(c.UPDATED_AT, datetime.now())
last_modified = self.get_last_modified(creator, created_at, updated_at)
- site_part = f"/site/{self.site.content_url}" if self.site else ""
+ site_part = f"/site/{self.site_content_url}" if self.site_content_url else ""
dashboard_external_url = (
f"{self.config.connect_uri}/#{site_part}/views/{dashboard.get(c.PATH, '')}"
)
From c226883097d01daf2fcb18689aee72ac5bf9f1a0 Mon Sep 17 00:00:00 2001
From: david-leifker <114954101+david-leifker@users.noreply.github.com>
Date: Wed, 7 Aug 2024 15:53:36 -0500
Subject: [PATCH 06/72] feat(entity-client): implement client batch interface
(#11106)
---
.../entity/ebean/batch/AspectsBatchImpl.java | 2 +-
.../entity/ebean/batch/ProposedItem.java | 8 ++-
.../metadata/client/JavaEntityClient.java | 58 ++++++++++-----
.../linkedin/entity/client/EntityClient.java | 44 +++++-------
.../entity/client/RestliEntityClient.java | 27 +++----
.../tests/privileges/test_privileges.py | 8 +++
smoke-test/tests/privileges/utils.py | 70 ++++++++++++++++++-
7 files changed, 157 insertions(+), 60 deletions(-)
diff --git a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java
index a23f6ab175046..7a1af12272ac5 100644
--- a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java
+++ b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java
@@ -123,7 +123,7 @@ public AspectsBatchImplBuilder one(BatchItem data, RetrieverContext retrieverCon
}
public AspectsBatchImplBuilder mcps(
- List mcps,
+ Collection mcps,
AuditStamp auditStamp,
RetrieverContext retrieverContext) {
diff --git a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/ProposedItem.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/ProposedItem.java
index 452ed39ddf317..132a731d278af 100644
--- a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/ProposedItem.java
+++ b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/ProposedItem.java
@@ -7,6 +7,7 @@
import com.linkedin.metadata.aspect.batch.MCPItem;
import com.linkedin.metadata.models.AspectSpec;
import com.linkedin.metadata.models.EntitySpec;
+import com.linkedin.metadata.utils.EntityKeyUtils;
import com.linkedin.metadata.utils.GenericRecordUtils;
import com.linkedin.mxe.MetadataChangeProposal;
import com.linkedin.mxe.SystemMetadata;
@@ -63,7 +64,12 @@ public RecordTemplate getRecordTemplate() {
@Nonnull
@Override
public Urn getUrn() {
- return metadataChangeProposal.getEntityUrn();
+ Urn urn = metadataChangeProposal.getEntityUrn();
+ if (urn == null) {
+ urn =
+ EntityKeyUtils.getUrnFromProposal(metadataChangeProposal, entitySpec.getKeyAspectSpec());
+ }
+ return urn;
}
@Nullable
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java
index 337288ab59c60..f8370c9efe3e6 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java
@@ -12,7 +12,6 @@
import com.linkedin.common.AuditStamp;
import com.linkedin.common.VersionedUrn;
import com.linkedin.common.urn.Urn;
-import com.linkedin.common.urn.UrnUtils;
import com.linkedin.data.DataMap;
import com.linkedin.data.template.RecordTemplate;
import com.linkedin.data.template.StringArray;
@@ -24,6 +23,7 @@
import com.linkedin.metadata.aspect.EnvelopedAspectArray;
import com.linkedin.metadata.aspect.VersionedAspect;
import com.linkedin.metadata.aspect.batch.AspectsBatch;
+import com.linkedin.metadata.aspect.batch.BatchItem;
import com.linkedin.metadata.browse.BrowseResult;
import com.linkedin.metadata.browse.BrowseResultV2;
import com.linkedin.metadata.entity.DeleteEntityService;
@@ -48,6 +48,7 @@
import com.linkedin.metadata.search.client.CachingEntitySearchService;
import com.linkedin.metadata.service.RollbackService;
import com.linkedin.metadata.timeseries.TimeseriesAspectService;
+import com.linkedin.metadata.utils.AuditStampUtils;
import com.linkedin.metadata.utils.metrics.MetricUtils;
import com.linkedin.mxe.MetadataChangeProposal;
import com.linkedin.mxe.PlatformEvent;
@@ -60,6 +61,7 @@
import java.net.URISyntaxException;
import java.time.Clock;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
@@ -68,6 +70,7 @@
import java.util.Optional;
import java.util.Set;
import java.util.function.Supplier;
+import java.util.stream.Collectors;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import lombok.RequiredArgsConstructor;
@@ -738,35 +741,54 @@ public List getTimeseriesAspectValues(
return response.getValues();
}
- // TODO: Factor out ingest logic into a util that can be accessed by the java client and the
- // resource
@Override
- public String ingestProposal(
+ @Nonnull
+ public List batchIngestProposals(
@Nonnull OperationContext opContext,
- @Nonnull final MetadataChangeProposal metadataChangeProposal,
- final boolean async)
- throws RemoteInvocationException {
+ @Nonnull Collection metadataChangeProposals,
+ boolean async) {
String actorUrnStr =
opContext.getSessionAuthentication().getActor() != null
? opContext.getSessionAuthentication().getActor().toUrnStr()
: Constants.UNKNOWN_ACTOR;
- final AuditStamp auditStamp =
- new AuditStamp().setTime(_clock.millis()).setActor(UrnUtils.getUrn(actorUrnStr));
+ final AuditStamp auditStamp = AuditStampUtils.createAuditStamp(actorUrnStr);
AspectsBatch batch =
AspectsBatchImpl.builder()
- .mcps(
- List.of(metadataChangeProposal), auditStamp, opContext.getRetrieverContext().get())
+ .mcps(metadataChangeProposals, auditStamp, opContext.getRetrieverContext().get())
.build();
- Optional one =
- entityService.ingestProposal(opContext, batch, async).stream().findFirst();
+ Map> resultMap =
+ entityService.ingestProposal(opContext, batch, async).stream()
+ .collect(Collectors.groupingBy(IngestResult::getRequest));
+
+ // Update runIds
+ batch.getItems().stream()
+ .filter(resultMap::containsKey)
+ .forEach(
+ requestItem -> {
+ List results = resultMap.get(requestItem);
+ Optional resultUrn =
+ results.stream().map(IngestResult::getUrn).filter(Objects::nonNull).findFirst();
+ resultUrn.ifPresent(
+ urn -> tryIndexRunId(opContext, urn, requestItem.getSystemMetadata()));
+ });
- Urn urn = one.map(IngestResult::getUrn).orElse(metadataChangeProposal.getEntityUrn());
- if (one.isPresent()) {
- tryIndexRunId(opContext, urn, metadataChangeProposal.getSystemMetadata());
- }
- return urn.toString();
+ // Preserve ordering
+ return batch.getItems().stream()
+ .map(
+ requestItem -> {
+ if (resultMap.containsKey(requestItem)) {
+ List results = resultMap.get(requestItem);
+ return results.stream()
+ .filter(r -> r.getUrn() != null)
+ .findFirst()
+ .map(r -> r.getUrn().toString())
+ .orElse(null);
+ }
+ return null;
+ })
+ .collect(Collectors.toList());
}
@SneakyThrows
diff --git a/metadata-service/restli-client-api/src/main/java/com/linkedin/entity/client/EntityClient.java b/metadata-service/restli-client-api/src/main/java/com/linkedin/entity/client/EntityClient.java
index 8821143cde6cc..cb5c691d0cb61 100644
--- a/metadata-service/restli-client-api/src/main/java/com/linkedin/entity/client/EntityClient.java
+++ b/metadata-service/restli-client-api/src/main/java/com/linkedin/entity/client/EntityClient.java
@@ -38,7 +38,6 @@
import java.util.Map;
import java.util.Optional;
import java.util.Set;
-import java.util.stream.Collectors;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
@@ -519,27 +518,17 @@ default String ingestProposal(
return ingestProposal(opContext, metadataChangeProposal, false);
}
- String ingestProposal(
+ /**
+ * Ingest a MetadataChangeProposal event.
+ *
+ * @return the urn string ingested
+ */
+ default String ingestProposal(
@Nonnull OperationContext opContext,
@Nonnull final MetadataChangeProposal metadataChangeProposal,
final boolean async)
- throws RemoteInvocationException;
-
- @Deprecated
- default String wrappedIngestProposal(
- @Nonnull OperationContext opContext, @Nonnull MetadataChangeProposal metadataChangeProposal) {
- return wrappedIngestProposal(opContext, metadataChangeProposal, false);
- }
-
- default String wrappedIngestProposal(
- @Nonnull OperationContext opContext,
- @Nonnull MetadataChangeProposal metadataChangeProposal,
- final boolean async) {
- try {
- return ingestProposal(opContext, metadataChangeProposal, async);
- } catch (RemoteInvocationException e) {
- throw new RuntimeException(e);
- }
+ throws RemoteInvocationException {
+ return batchIngestProposals(opContext, List.of(metadataChangeProposal), async).get(0);
}
@Deprecated
@@ -550,15 +539,20 @@ default List batchIngestProposals(
return batchIngestProposals(opContext, metadataChangeProposals, false);
}
- default List batchIngestProposals(
+ /**
+ * Ingest a list of proposals in a batch.
+ *
+ * @param opContext operation context
+ * @param metadataChangeProposals list of proposals
+ * @param async async or sync ingestion path
+ * @return ingested urns
+ */
+ @Nonnull
+ List batchIngestProposals(
@Nonnull OperationContext opContext,
@Nonnull final Collection metadataChangeProposals,
final boolean async)
- throws RemoteInvocationException {
- return metadataChangeProposals.stream()
- .map(proposal -> wrappedIngestProposal(opContext, proposal, async))
- .collect(Collectors.toList());
- }
+ throws RemoteInvocationException;
@Deprecated
Optional getVersionedAspect(
diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java
index fe1ca571efea5..2a3ae5d006ae0 100644
--- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java
+++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java
@@ -12,7 +12,7 @@
import com.linkedin.data.template.RecordTemplate;
import com.linkedin.data.template.StringArray;
import com.linkedin.entity.AspectsDoGetTimeseriesAspectValuesRequestBuilder;
-import com.linkedin.entity.AspectsDoIngestProposalRequestBuilder;
+import com.linkedin.entity.AspectsDoIngestProposalBatchRequestBuilder;
import com.linkedin.entity.AspectsGetRequestBuilder;
import com.linkedin.entity.AspectsRequestBuilders;
import com.linkedin.entity.EntitiesBatchGetRequestBuilder;
@@ -67,6 +67,7 @@
import com.linkedin.metadata.search.ScrollResult;
import com.linkedin.metadata.search.SearchResult;
import com.linkedin.mxe.MetadataChangeProposal;
+import com.linkedin.mxe.MetadataChangeProposalArray;
import com.linkedin.mxe.PlatformEvent;
import com.linkedin.mxe.SystemMetadata;
import com.linkedin.parseq.retry.backoff.BackoffPolicy;
@@ -1047,23 +1048,23 @@ public List getTimeseriesAspectValues(
.getValues();
}
- /**
- * Ingest a MetadataChangeProposal event.
- *
- * @return the urn string ingested
- */
+ @Nonnull
@Override
- public String ingestProposal(
+ public List batchIngestProposals(
@Nonnull OperationContext opContext,
- @Nonnull final MetadataChangeProposal metadataChangeProposal,
- final boolean async)
+ @Nonnull Collection metadataChangeProposals,
+ boolean async)
throws RemoteInvocationException {
- final AspectsDoIngestProposalRequestBuilder requestBuilder =
+ final AspectsDoIngestProposalBatchRequestBuilder requestBuilder =
ASPECTS_REQUEST_BUILDERS
- .actionIngestProposal()
- .proposalParam(metadataChangeProposal)
+ .actionIngestProposalBatch()
+ .proposalsParam(new MetadataChangeProposalArray(metadataChangeProposals))
.asyncParam(String.valueOf(async));
- return sendClientRequest(requestBuilder, opContext.getSessionAuthentication()).getEntity();
+ String result =
+ sendClientRequest(requestBuilder, opContext.getSessionAuthentication()).getEntity();
+ return metadataChangeProposals.stream()
+ .map(proposal -> "success".equals(result) ? proposal.getEntityUrn().toString() : null)
+ .collect(Collectors.toList());
}
@Override
diff --git a/smoke-test/tests/privileges/test_privileges.py b/smoke-test/tests/privileges/test_privileges.py
index c9a0b62159314..bce7b8a238c38 100644
--- a/smoke-test/tests/privileges/test_privileges.py
+++ b/smoke-test/tests/privileges/test_privileges.py
@@ -4,11 +4,13 @@
from tests.privileges.utils import (
assign_role,
assign_user_to_group,
+ clear_polices,
create_group,
create_user,
create_user_policy,
remove_group,
remove_policy,
+ remove_secret,
remove_user,
set_base_platform_privileges_policy_status,
set_view_dataset_sensitive_info_policy_status,
@@ -65,6 +67,12 @@ def privileges_and_test_user_setup(admin_session):
# Remove test user
remove_user(admin_session, "urn:li:corpuser:user")
+ # Remove secret
+ remove_secret(admin_session, "urn:li:dataHubSecret:TestSecretName")
+
+ # Remove test policies
+ clear_polices(admin_session)
+
# Restore All users privileges
set_base_platform_privileges_policy_status("ACTIVE", admin_session)
set_view_dataset_sensitive_info_policy_status("ACTIVE", admin_session)
diff --git a/smoke-test/tests/privileges/utils.py b/smoke-test/tests/privileges/utils.py
index 1e58ec4085b70..72ad94a42a462 100644
--- a/smoke-test/tests/privileges/utils.py
+++ b/smoke-test/tests/privileges/utils.py
@@ -246,8 +246,8 @@ def create_user_policy(user_urn, privileges, session):
"variables": {
"input": {
"type": "PLATFORM",
- "name": "Policy Name",
- "description": "Policy Description",
+ "name": "Test Policy Name",
+ "description": "Test Policy Description",
"state": "ACTIVE",
"resources": {"filter": {"criteria": []}},
"privileges": privileges,
@@ -288,3 +288,69 @@ def remove_policy(urn, session):
assert res_data["data"]
assert res_data["data"]["deletePolicy"]
assert res_data["data"]["deletePolicy"] == urn
+
+
+def clear_polices(session):
+ list_policy_json = {
+ "query": """query listPolicies($input: ListPoliciesInput!) {
+ listPolicies(input: $input) {
+ start
+ count
+ total
+ policies {
+ urn
+ editable
+ name
+ description
+ __typename
+ }
+ __typename
+ }
+ }""",
+ "variables": {
+ "input": {
+ "count": 100,
+ "start": 0,
+ "orFilters": [
+ {
+ "and": [
+ {
+ "field": "state",
+ "values": ["ACTIVE"],
+ "condition": "EQUAL",
+ },
+ {
+ "field": "editable",
+ "values": ["true"],
+ "condition": "EQUAL",
+ },
+ ]
+ }
+ ],
+ }
+ },
+ }
+
+ response = session.post(
+ f"{get_frontend_url()}/api/v2/graphql", json=list_policy_json
+ )
+ response.raise_for_status()
+ res_data = response.json()
+
+ assert res_data
+ assert res_data["data"]
+ assert res_data["data"]["listPolicies"]
+ for policy in res_data["data"]["listPolicies"]["policies"]:
+ if "test" in policy["name"].lower() or "test" in policy["description"].lower():
+ remove_policy(policy["urn"], session)
+
+
+def remove_secret(session, urn):
+ remove_secret = {
+ "query": """mutation deleteSecret($urn: String!) {\n
+ deleteSecret(urn: $urn)\n}""",
+ "variables": {"urn": urn},
+ }
+
+ response = session.post(f"{get_frontend_url()}/api/v2/graphql", json=remove_secret)
+ response.raise_for_status()
From a25df8e6a0ab5c36605e674380721edc8f72e95f Mon Sep 17 00:00:00 2001
From: Harshal Sheth
Date: Wed, 7 Aug 2024 14:04:18 -0700
Subject: [PATCH 07/72] fix(snowflake): avoid reporting warnings/info for sys
tables (#11114)
---
.../src/datahub/ingestion/api/source.py | 2 +-
.../source/snowflake/snowflake_schema_gen.py | 2 +-
.../source/snowflake/snowflake_utils.py | 20 +++++++++++++------
3 files changed, 16 insertions(+), 8 deletions(-)
diff --git a/metadata-ingestion/src/datahub/ingestion/api/source.py b/metadata-ingestion/src/datahub/ingestion/api/source.py
index a4de8b382430c..3dea3d36f41f1 100644
--- a/metadata-ingestion/src/datahub/ingestion/api/source.py
+++ b/metadata-ingestion/src/datahub/ingestion/api/source.py
@@ -47,7 +47,7 @@
logger = logging.getLogger(__name__)
-_MAX_CONTEXT_STRING_LENGTH = 300
+_MAX_CONTEXT_STRING_LENGTH = 1000
class SourceCapability(Enum):
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py
index 1d4a5b377da14..a64589bcfed02 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py
@@ -440,7 +440,7 @@ def _process_schema(
yield from self._process_tag(tag)
if not snowflake_schema.views and not snowflake_schema.tables:
- self.structured_reporter.warning(
+ self.structured_reporter.info(
title="No tables/views found in schema",
message="If tables exist, please grant REFERENCES or SELECT permissions on them.",
context=f"{db_name}.{schema_name}",
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py
index a1878963d3798..0177d59ef6b21 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py
@@ -127,6 +127,8 @@ def is_dataset_pattern_allowed(
SnowflakeObjectDomain.MATERIALIZED_VIEW,
):
return False
+ if _is_sys_table(dataset_name):
+ return False
if len(dataset_params) != 3:
self.structured_reporter.info(
@@ -176,6 +178,11 @@ def _combine_identifier_parts(
return f"{db_name}.{schema_name}.{table_name}"
+def _is_sys_table(table_name: str) -> bool:
+ # Often will look like `SYS$_UNPIVOT_VIEW1737` or `sys$_pivot_view19`.
+ return table_name.lower().startswith("sys$")
+
+
# Qualified Object names from snowflake audit logs have quotes for for snowflake quoted identifiers,
# For example "test-database"."test-schema".test_table
# whereas we generate urns without quotes even for quoted identifiers for backward compatibility
@@ -186,12 +193,13 @@ def _cleanup_qualified_name(
) -> str:
name_parts = qualified_name.split(".")
if len(name_parts) != 3:
- structured_reporter.info(
- title="Unexpected dataset pattern",
- message="We failed to parse a Snowflake qualified name into its constituent parts. "
- "DB/schema/table filtering may not work as expected on these entities.",
- context=f"{qualified_name} has {len(name_parts)} parts",
- )
+ if not _is_sys_table(qualified_name):
+ structured_reporter.info(
+ title="Unexpected dataset pattern",
+ message="We failed to parse a Snowflake qualified name into its constituent parts. "
+ "DB/schema/table filtering may not work as expected on these entities.",
+ context=f"{qualified_name} has {len(name_parts)} parts",
+ )
return qualified_name.replace('"', "")
return _combine_identifier_parts(
db_name=name_parts[0].strip('"'),
From d6e46b9bcf3b8e8b1e8719fb352f5837bf6b402c Mon Sep 17 00:00:00 2001
From: Harshal Sheth
Date: Wed, 7 Aug 2024 14:57:05 -0700
Subject: [PATCH 08/72] fix(ingest): downgrade column type mapping warning to
info (#11115)
---
.../datahub/ingestion/source/abs/source.py | 74 +----------------
.../ingestion/source/dbt/dbt_common.py | 7 +-
.../src/datahub/ingestion/source/s3/source.py | 79 +------------------
.../ingestion/source/sql/sql_common.py | 7 +-
4 files changed, 15 insertions(+), 152 deletions(-)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/abs/source.py b/metadata-ingestion/src/datahub/ingestion/source/abs/source.py
index 39ebd79c2e226..66f268799b2f1 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/abs/source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/abs/source.py
@@ -8,29 +8,10 @@
from collections import OrderedDict
from datetime import datetime
from pathlib import PurePath
-from typing import Any, Dict, Iterable, List, Optional, Tuple
+from typing import Dict, Iterable, List, Optional, Tuple
import smart_open.compression as so_compression
from more_itertools import peekable
-from pyspark.sql.types import (
- ArrayType,
- BinaryType,
- BooleanType,
- ByteType,
- DateType,
- DecimalType,
- DoubleType,
- FloatType,
- IntegerType,
- LongType,
- MapType,
- NullType,
- ShortType,
- StringType,
- StructField,
- StructType,
- TimestampType,
-)
from smart_open import open as smart_open
from datahub.emitter.mce_builder import (
@@ -48,7 +29,7 @@
platform_name,
support_status,
)
-from datahub.ingestion.api.source import MetadataWorkUnitProcessor, SourceReport
+from datahub.ingestion.api.source import MetadataWorkUnitProcessor
from datahub.ingestion.api.workunit import MetadataWorkUnit
from datahub.ingestion.source.abs.config import DataLakeSourceConfig, PathSpec
from datahub.ingestion.source.abs.report import DataLakeSourceReport
@@ -72,22 +53,14 @@
StatefulIngestionSourceBase,
)
from datahub.metadata.com.linkedin.pegasus2avro.schema import (
- BooleanTypeClass,
- BytesTypeClass,
- DateTypeClass,
- NullTypeClass,
- NumberTypeClass,
- RecordTypeClass,
SchemaField,
SchemaFieldDataType,
SchemaMetadata,
StringTypeClass,
- TimeTypeClass,
)
from datahub.metadata.schema_classes import (
DataPlatformInstanceClass,
DatasetPropertiesClass,
- MapTypeClass,
OperationClass,
OperationTypeClass,
OtherSchemaClass,
@@ -100,55 +73,12 @@
logging.getLogger("py4j").setLevel(logging.ERROR)
logger: logging.Logger = logging.getLogger(__name__)
-# for a list of all types, see https://spark.apache.org/docs/3.0.3/api/python/_modules/pyspark/sql/types.html
-_field_type_mapping = {
- NullType: NullTypeClass,
- StringType: StringTypeClass,
- BinaryType: BytesTypeClass,
- BooleanType: BooleanTypeClass,
- DateType: DateTypeClass,
- TimestampType: TimeTypeClass,
- DecimalType: NumberTypeClass,
- DoubleType: NumberTypeClass,
- FloatType: NumberTypeClass,
- ByteType: BytesTypeClass,
- IntegerType: NumberTypeClass,
- LongType: NumberTypeClass,
- ShortType: NumberTypeClass,
- ArrayType: NullTypeClass,
- MapType: MapTypeClass,
- StructField: RecordTypeClass,
- StructType: RecordTypeClass,
-}
PAGE_SIZE = 1000
# Hack to support the .gzip extension with smart_open.
so_compression.register_compressor(".gzip", so_compression._COMPRESSOR_REGISTRY[".gz"])
-def get_column_type(
- report: SourceReport, dataset_name: str, column_type: str
-) -> SchemaFieldDataType:
- """
- Maps known Spark types to datahub types
- """
- TypeClass: Any = None
-
- for field_type, type_class in _field_type_mapping.items():
- if isinstance(column_type, field_type):
- TypeClass = type_class
- break
-
- # if still not found, report the warning
- if TypeClass is None:
- report.report_warning(
- dataset_name, f"unable to map type {column_type} to metadata schema"
- )
- TypeClass = NullTypeClass
-
- return SchemaFieldDataType(type=TypeClass())
-
-
# config flags to emit telemetry for
config_options_to_report = [
"platform",
diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
index ead86acc299ca..e2b5f8378732c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
@@ -849,8 +849,11 @@ def get_column_type(
# if still not found, report the warning
if TypeClass is None:
if column_type:
- report.report_warning(
- dataset_name, f"unable to map type {column_type} to metadata schema"
+ report.info(
+ title="Unable to map column types to DataHub types",
+ message="Got an unexpected column type. The column's parsed field type will not be populated.",
+ context=f"{dataset_name} - {column_type}",
+ log=False,
)
TypeClass = NullTypeClass
diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py
index b8c7fd5aa88fc..f81d06c35e3b0 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py
@@ -8,32 +8,13 @@
from collections import OrderedDict
from datetime import datetime
from pathlib import PurePath
-from typing import Any, Dict, Iterable, List, Optional, Tuple
+from typing import Dict, Iterable, List, Optional, Tuple
import smart_open.compression as so_compression
from more_itertools import peekable
from pyspark.conf import SparkConf
from pyspark.sql import SparkSession
from pyspark.sql.dataframe import DataFrame
-from pyspark.sql.types import (
- ArrayType,
- BinaryType,
- BooleanType,
- ByteType,
- DateType,
- DecimalType,
- DoubleType,
- FloatType,
- IntegerType,
- LongType,
- MapType,
- NullType,
- ShortType,
- StringType,
- StructField,
- StructType,
- TimestampType,
-)
from pyspark.sql.utils import AnalysisException
from smart_open import open as smart_open
@@ -52,7 +33,7 @@
platform_name,
support_status,
)
-from datahub.ingestion.api.source import MetadataWorkUnitProcessor, SourceReport
+from datahub.ingestion.api.source import MetadataWorkUnitProcessor
from datahub.ingestion.api.workunit import MetadataWorkUnit
from datahub.ingestion.source.aws.s3_boto_utils import get_s3_tags, list_folders
from datahub.ingestion.source.aws.s3_util import (
@@ -72,22 +53,13 @@
StatefulIngestionSourceBase,
)
from datahub.metadata.com.linkedin.pegasus2avro.schema import (
- BooleanTypeClass,
- BytesTypeClass,
- DateTypeClass,
- NullTypeClass,
- NumberTypeClass,
- RecordTypeClass,
SchemaField,
- SchemaFieldDataType,
SchemaMetadata,
StringTypeClass,
- TimeTypeClass,
)
from datahub.metadata.schema_classes import (
DataPlatformInstanceClass,
DatasetPropertiesClass,
- MapTypeClass,
OperationClass,
OperationTypeClass,
OtherSchemaClass,
@@ -101,55 +73,12 @@
logging.getLogger("py4j").setLevel(logging.ERROR)
logger: logging.Logger = logging.getLogger(__name__)
-# for a list of all types, see https://spark.apache.org/docs/3.0.3/api/python/_modules/pyspark/sql/types.html
-_field_type_mapping = {
- NullType: NullTypeClass,
- StringType: StringTypeClass,
- BinaryType: BytesTypeClass,
- BooleanType: BooleanTypeClass,
- DateType: DateTypeClass,
- TimestampType: TimeTypeClass,
- DecimalType: NumberTypeClass,
- DoubleType: NumberTypeClass,
- FloatType: NumberTypeClass,
- ByteType: BytesTypeClass,
- IntegerType: NumberTypeClass,
- LongType: NumberTypeClass,
- ShortType: NumberTypeClass,
- ArrayType: NullTypeClass,
- MapType: MapTypeClass,
- StructField: RecordTypeClass,
- StructType: RecordTypeClass,
-}
PAGE_SIZE = 1000
# Hack to support the .gzip extension with smart_open.
so_compression.register_compressor(".gzip", so_compression._COMPRESSOR_REGISTRY[".gz"])
-def get_column_type(
- report: SourceReport, dataset_name: str, column_type: str
-) -> SchemaFieldDataType:
- """
- Maps known Spark types to datahub types
- """
- TypeClass: Any = None
-
- for field_type, type_class in _field_type_mapping.items():
- if isinstance(column_type, field_type):
- TypeClass = type_class
- break
-
- # if still not found, report the warning
- if TypeClass is None:
- report.report_warning(
- dataset_name, f"unable to map type {column_type} to metadata schema"
- )
- TypeClass = NullTypeClass
-
- return SchemaFieldDataType(type=TypeClass())
-
-
# config flags to emit telemetry for
config_options_to_report = [
"platform",
@@ -490,9 +419,7 @@ def add_partition_columns_to_schema(
if not is_fieldpath_v2
else f"[version=2.0].[type=string].{partition_key}",
nativeDataType="string",
- type=SchemaFieldDataType(StringTypeClass())
- if not is_fieldpath_v2
- else SchemaFieldDataTypeClass(type=StringTypeClass()),
+ type=SchemaFieldDataTypeClass(StringTypeClass()),
isPartitioningKey=True,
nullable=True,
recursive=False,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
index 1fa308eae6b76..2ab1e6bb41af1 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
@@ -263,8 +263,11 @@ def get_column_type(
break
if TypeClass is None:
- sql_report.report_warning(
- dataset_name, f"unable to map type {column_type!r} to metadata schema"
+ sql_report.info(
+ title="Unable to map column types to DataHub types",
+ message="Got an unexpected column type. The column's parsed field type will not be populated.",
+ context=f"{dataset_name} - {column_type!r}",
+ log=False,
)
TypeClass = NullTypeClass
From e08412e513215405902a61713895bbefa2ed624e Mon Sep 17 00:00:00 2001
From: Ajoy Majumdar
Date: Thu, 8 Aug 2024 08:00:38 -0700
Subject: [PATCH 09/72] feat(api): add AuditStamp to the V3 API entity/aspect
response (#11118)
---
.../openapi/v3/models/AspectItem.java | 15 +++++++
.../openapi/v3/models/GenericAspectV3.java | 1 +
.../openapi/v3/models/GenericEntityV3.java | 19 ++++----
.../openapi/v3/OpenAPIV3Generator.java | 26 +++++++----
.../v3/controller/EntityController.java | 44 ++++++++++++++-----
5 files changed, 77 insertions(+), 28 deletions(-)
create mode 100644 metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/AspectItem.java
diff --git a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/AspectItem.java b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/AspectItem.java
new file mode 100644
index 0000000000000..ec5dff7817231
--- /dev/null
+++ b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/AspectItem.java
@@ -0,0 +1,15 @@
+package io.datahubproject.openapi.v3.models;
+
+import com.linkedin.common.AuditStamp;
+import com.linkedin.data.template.RecordTemplate;
+import com.linkedin.mxe.SystemMetadata;
+import lombok.Builder;
+import lombok.Value;
+
+@Builder(toBuilder = true)
+@Value
+public class AspectItem {
+ RecordTemplate aspect;
+ SystemMetadata systemMetadata;
+ AuditStamp auditStamp;
+}
diff --git a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/GenericAspectV3.java b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/GenericAspectV3.java
index 4db2c3288d154..70bf2182c29f4 100644
--- a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/GenericAspectV3.java
+++ b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/GenericAspectV3.java
@@ -19,4 +19,5 @@ public class GenericAspectV3 implements GenericAspect {
@Nonnull Map value;
@Nullable Map systemMetadata;
@Nullable Map headers;
+ @Nullable Map auditStamp;
}
diff --git a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/GenericEntityV3.java b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/GenericEntityV3.java
index 3af3b25028fad..54d6ac2c1736f 100644
--- a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/GenericEntityV3.java
+++ b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/GenericEntityV3.java
@@ -5,9 +5,6 @@
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.linkedin.common.urn.Urn;
-import com.linkedin.data.template.RecordTemplate;
-import com.linkedin.mxe.SystemMetadata;
-import com.linkedin.util.Pair;
import io.datahubproject.openapi.models.GenericEntity;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
@@ -42,9 +39,7 @@ public Map getAspects() {
public static class GenericEntityV3Builder {
public GenericEntityV3 build(
- ObjectMapper objectMapper,
- @Nonnull Urn urn,
- Map> aspects) {
+ ObjectMapper objectMapper, @Nonnull Urn urn, Map aspects) {
Map jsonObjectMap =
aspects.entrySet().stream()
.map(
@@ -53,13 +48,18 @@ public GenericEntityV3 build(
String aspectName = entry.getKey();
Map aspectValue =
objectMapper.readValue(
- RecordUtils.toJsonString(entry.getValue().getFirst())
+ RecordUtils.toJsonString(entry.getValue().getAspect())
.getBytes(StandardCharsets.UTF_8),
new TypeReference<>() {});
Map systemMetadata =
- entry.getValue().getSecond() != null
+ entry.getValue().getSystemMetadata() != null
? objectMapper.convertValue(
- entry.getValue().getSecond(), new TypeReference<>() {})
+ entry.getValue().getSystemMetadata(), new TypeReference<>() {})
+ : null;
+ Map auditStamp =
+ entry.getValue().getAuditStamp() != null
+ ? objectMapper.convertValue(
+ entry.getValue().getAuditStamp().data(), new TypeReference<>() {})
: null;
return Map.entry(
@@ -67,6 +67,7 @@ public GenericEntityV3 build(
GenericAspectV3.builder()
.value(aspectValue)
.systemMetadata(systemMetadata)
+ .auditStamp(auditStamp)
.build());
} catch (IOException ex) {
throw new RuntimeException(ex);
diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java
index f26ad6821c583..f6f248be77c67 100644
--- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java
+++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java
@@ -40,7 +40,7 @@ public class OpenAPIV3Generator {
private static final String NAME_QUERY = "query";
private static final String NAME_PATH = "path";
private static final String NAME_SYSTEM_METADATA = "systemMetadata";
- private static final String NAME_ASYNC = "async";
+ private static final String NAME_AUDIT_STAMP = "auditStamp";
private static final String NAME_VERSION = "version";
private static final String NAME_SCROLL_ID = "scrollId";
private static final String NAME_INCLUDE_SOFT_DELETE = "includeSoftDelete";
@@ -77,9 +77,6 @@ public static OpenAPI generateOpenApiSpec(EntityRegistry entityRegistry) {
// Components
final Components components = new Components();
// --> Aspect components
- // TODO: Correct handling of SystemMetadata and SortOrder
- components.addSchemas(
- "SystemMetadata", new Schema().type(TYPE_OBJECT).additionalProperties(true));
components.addSchemas("SortOrder", new Schema()._enum(List.of("ASCENDING", "DESCENDING")));
components.addSchemas("AspectPatch", buildAspectPatchSchema());
components.addSchemas(
@@ -167,6 +164,10 @@ public static OpenAPI generateOpenApiSpec(EntityRegistry entityRegistry) {
buildSingleEntityAspectPath(
e, a.getName(), a.getPegasusSchema().getName())));
});
+ // TODO: Correct handling of SystemMetadata and AuditStamp
+ components.addSchemas(
+ "SystemMetadata", new Schema().type(TYPE_OBJECT).additionalProperties(true));
+ components.addSchemas("AuditStamp", new Schema().type(TYPE_OBJECT).additionalProperties(true));
return new OpenAPI().openapi("3.0.1").info(info).paths(paths).components(components);
}
@@ -185,7 +186,7 @@ private static PathItem buildSingleEntityPath(final EntitySpec entity) {
.schema(new Schema().type(TYPE_STRING)),
new Parameter()
.in(NAME_QUERY)
- .name("systemMetadata")
+ .name(NAME_SYSTEM_METADATA)
.description("Include systemMetadata with response.")
.schema(new Schema().type(TYPE_BOOLEAN)._default(false)),
new Parameter()
@@ -424,7 +425,7 @@ private static PathItem buildBatchGetEntityPath(final EntitySpec entity) {
List.of(
new Parameter()
.in(NAME_QUERY)
- .name("systemMetadata")
+ .name(NAME_SYSTEM_METADATA)
.description("Include systemMetadata with response.")
.schema(new Schema().type(TYPE_BOOLEAN)._default(false))))
.requestBody(
@@ -575,12 +576,19 @@ private static Schema buildAspectRefResponseSchema(final String aspectName) {
.required(List.of(PROPERTY_VALUE))
.addProperty(PROPERTY_VALUE, new Schema<>().$ref(PATH_DEFINITIONS + aspectName));
result.addProperty(
- "systemMetadata",
+ NAME_SYSTEM_METADATA,
new Schema<>()
.type(TYPE_OBJECT)
.anyOf(List.of(new Schema().$ref(PATH_DEFINITIONS + "SystemMetadata")))
.description("System metadata for the aspect.")
.nullable(true));
+ result.addProperty(
+ NAME_AUDIT_STAMP,
+ new Schema<>()
+ .type(TYPE_OBJECT)
+ .anyOf(List.of(new Schema().$ref(PATH_DEFINITIONS + "AuditStamp")))
+ .description("Audit stamp for the aspect.")
+ .nullable(true));
return result;
}
@@ -592,7 +600,7 @@ private static Schema buildAspectRefRequestSchema(final String aspectName) {
.required(List.of(PROPERTY_VALUE))
.addProperty(PROPERTY_VALUE, new Schema<>().$ref(PATH_DEFINITIONS + aspectName));
result.addProperty(
- "systemMetadata",
+ NAME_SYSTEM_METADATA,
new Schema<>()
.type(TYPE_OBJECT)
.anyOf(List.of(new Schema().$ref(PATH_DEFINITIONS + "SystemMetadata")))
@@ -867,7 +875,7 @@ private static PathItem buildSingleEntityAspectPath(
List.of(
new Parameter()
.in(NAME_QUERY)
- .name("systemMetadata")
+ .name(NAME_SYSTEM_METADATA)
.description("Include systemMetadata with response.")
.schema(new Schema().type(TYPE_BOOLEAN)._default(false))))
.summary(String.format("Patch aspect %s on %s ", aspect, upperFirstEntity))
diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java
index 9ca34934e4c65..a0478c9af1609 100644
--- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java
+++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java
@@ -13,7 +13,6 @@
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.linkedin.common.urn.Urn;
import com.linkedin.data.ByteString;
-import com.linkedin.data.template.RecordTemplate;
import com.linkedin.entity.EnvelopedAspect;
import com.linkedin.metadata.aspect.batch.AspectsBatch;
import com.linkedin.metadata.aspect.batch.BatchItem;
@@ -28,12 +27,12 @@
import com.linkedin.metadata.utils.AuditStampUtils;
import com.linkedin.metadata.utils.GenericRecordUtils;
import com.linkedin.mxe.SystemMetadata;
-import com.linkedin.util.Pair;
import io.datahubproject.metadata.context.OperationContext;
import io.datahubproject.metadata.context.RequestContext;
import io.datahubproject.openapi.controller.GenericEntitiesController;
import io.datahubproject.openapi.exception.InvalidUrnException;
import io.datahubproject.openapi.exception.UnauthorizedException;
+import io.datahubproject.openapi.v3.models.AspectItem;
import io.datahubproject.openapi.v3.models.GenericAspectV3;
import io.datahubproject.openapi.v3.models.GenericEntityScrollResultV3;
import io.datahubproject.openapi.v3.models.GenericEntityV3;
@@ -143,11 +142,27 @@ protected List buildEntityVersionedAspectList(
.map(
u ->
GenericEntityV3.builder()
- .build(objectMapper, u, toAspectMap(u, aspects.get(u), withSystemMetadata)))
+ .build(
+ objectMapper, u, toAspectItemMap(u, aspects.get(u), withSystemMetadata)))
.collect(Collectors.toList());
}
}
+ private Map toAspectItemMap(
+ Urn urn, List aspects, boolean withSystemMetadata) {
+ return aspects.stream()
+ .map(
+ a ->
+ Map.entry(
+ a.getName(),
+ AspectItem.builder()
+ .aspect(toRecordTemplate(lookupAspectSpec(urn, a.getName()), a))
+ .systemMetadata(withSystemMetadata ? a.getSystemMetadata() : null)
+ .auditStamp(withSystemMetadata ? a.getCreated() : null)
+ .build()))
+ .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
+ }
+
@Override
protected List buildEntityList(
Set ingestResults, boolean withSystemMetadata) {
@@ -156,15 +171,21 @@ protected List buildEntityList(
Map> entityMap =
ingestResults.stream().collect(Collectors.groupingBy(IngestResult::getUrn));
for (Map.Entry> urnAspects : entityMap.entrySet()) {
- Map> aspectsMap =
+ Map aspectsMap =
urnAspects.getValue().stream()
.map(
ingest ->
Map.entry(
ingest.getRequest().getAspectName(),
- Pair.of(
- ingest.getRequest().getRecordTemplate(),
- withSystemMetadata ? ingest.getRequest().getSystemMetadata() : null)))
+ AspectItem.builder()
+ .aspect(ingest.getRequest().getRecordTemplate())
+ .systemMetadata(
+ withSystemMetadata
+ ? ingest.getRequest().getSystemMetadata()
+ : null)
+ .auditStamp(
+ withSystemMetadata ? ingest.getRequest().getAuditStamp() : null)
+ .build()))
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
responseList.add(
GenericEntityV3.builder().build(objectMapper, urnAspects.getKey(), aspectsMap));
@@ -183,9 +204,12 @@ protected GenericEntityV3 buildGenericEntity(
updateAspectResult.getUrn(),
Map.of(
aspectName,
- Pair.of(
- updateAspectResult.getNewValue(),
- withSystemMetadata ? updateAspectResult.getNewSystemMetadata() : null)));
+ AspectItem.builder()
+ .aspect(updateAspectResult.getNewValue())
+ .systemMetadata(
+ withSystemMetadata ? updateAspectResult.getNewSystemMetadata() : null)
+ .auditStamp(withSystemMetadata ? updateAspectResult.getAuditStamp() : null)
+ .build()));
}
private List toRecordTemplates(
From a4a887c866c362ecdd9ccb9e4df2591a01b90a3f Mon Sep 17 00:00:00 2001
From: AndreasHegerNuritas
<163423418+AndreasHegerNuritas@users.noreply.github.com>
Date: Thu, 8 Aug 2024 16:38:16 +0100
Subject: [PATCH 10/72] =?UTF-8?q?fix(ingest/redshift):=20replace=20r'\n'?=
=?UTF-8?q?=20with=20'\n'=20to=20avoid=20token=20error=20redshift=20server?=
=?UTF-8?q?less=E2=80=A6=20(#11111)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../datahub/ingestion/source/redshift/redshift_schema.py | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift_schema.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift_schema.py
index 6e88a50f898a5..2e628269edbc3 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift_schema.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift_schema.py
@@ -504,7 +504,11 @@ def get_alter_table_commands(
yield AlterTableRow(
transaction_id=row[field_names.index("transaction_id")],
session_id=session_id,
- query_text=row[field_names.index("query_text")],
+ # See https://docs.aws.amazon.com/redshift/latest/dg/r_STL_QUERYTEXT.html
+ # for why we need to replace the \n with a newline.
+ query_text=row[field_names.index("query_text")].replace(
+ r"\n", "\n"
+ ),
start_time=row[field_names.index("start_time")],
)
rows = cursor.fetchmany()
From 3d9a9541f1ff37ea80dd9d7d44fe501909269495 Mon Sep 17 00:00:00 2001
From: david-leifker <114954101+david-leifker@users.noreply.github.com>
Date: Thu, 8 Aug 2024 13:54:14 -0500
Subject: [PATCH 11/72] fix(entiy-client): handle null entityUrn case for
restli (#11122)
---
.../entity/client/RestliEntityClient.java | 17 ++++++++++++++++-
1 file changed, 16 insertions(+), 1 deletion(-)
diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java
index 2a3ae5d006ae0..780c6c6a007c2 100644
--- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java
+++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java
@@ -50,6 +50,7 @@
import com.linkedin.metadata.browse.BrowseResult;
import com.linkedin.metadata.browse.BrowseResultV2;
import com.linkedin.metadata.graph.LineageDirection;
+import com.linkedin.metadata.models.EntitySpec;
import com.linkedin.metadata.query.AutoCompleteResult;
import com.linkedin.metadata.query.LineageFlags;
import com.linkedin.metadata.query.ListResult;
@@ -66,6 +67,7 @@
import com.linkedin.metadata.search.LineageSearchResult;
import com.linkedin.metadata.search.ScrollResult;
import com.linkedin.metadata.search.SearchResult;
+import com.linkedin.metadata.utils.EntityKeyUtils;
import com.linkedin.mxe.MetadataChangeProposal;
import com.linkedin.mxe.MetadataChangeProposalArray;
import com.linkedin.mxe.PlatformEvent;
@@ -1063,7 +1065,20 @@ public List batchIngestProposals(
String result =
sendClientRequest(requestBuilder, opContext.getSessionAuthentication()).getEntity();
return metadataChangeProposals.stream()
- .map(proposal -> "success".equals(result) ? proposal.getEntityUrn().toString() : null)
+ .map(
+ proposal -> {
+ if ("success".equals(result)) {
+ if (proposal.getEntityUrn() != null) {
+ return proposal.getEntityUrn().toString();
+ } else {
+ EntitySpec entitySpec =
+ opContext.getEntityRegistry().getEntitySpec(proposal.getEntityType());
+ return EntityKeyUtils.getUrnFromProposal(proposal, entitySpec.getKeyAspectSpec())
+ .toString();
+ }
+ }
+ return null;
+ })
.collect(Collectors.toList());
}
From 840b15083a17c5347c63b0e74b079e7b5ea70a1e Mon Sep 17 00:00:00 2001
From: Harshal Sheth
Date: Thu, 8 Aug 2024 14:05:55 -0700
Subject: [PATCH 12/72] fix(sql-parser): prevent bad urns from alter table
lineage (#11092)
---
.../goldens/v2_sqlite_operator.json | 112 +++++++++---------
.../v2_sqlite_operator_no_dag_listener.json | 64 +++++-----
.../datahub/sql_parsing/sqlglot_lineage.py | 62 ++++++----
.../testing/check_sql_parser_result.py | 1 -
.../test_bigquery_alter_table_column.json | 14 +++
.../goldens/test_snowflake_drop_schema.json | 12 ++
.../goldens/test_sqlite_drop_table.json | 14 +++
.../goldens/test_sqlite_drop_view.json | 14 +++
.../unit/sql_parsing/test_sqlglot_lineage.py | 51 ++++++++
9 files changed, 231 insertions(+), 113 deletions(-)
create mode 100644 metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_alter_table_column.json
create mode 100644 metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_drop_schema.json
create mode 100644 metadata-ingestion/tests/unit/sql_parsing/goldens/test_sqlite_drop_table.json
create mode 100644 metadata-ingestion/tests/unit/sql_parsing/goldens/test_sqlite_drop_view.json
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json
index e7902d165051b..4bc34b7b0d3ce 100644
--- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json
@@ -350,8 +350,8 @@
"json": {
"timestampMillis": 1717179743558,
"partitionSpec": {
- "type": "FULL_TABLE",
- "partition": "FULL_TABLE_SNAPSHOT"
+ "partition": "FULL_TABLE_SNAPSHOT",
+ "type": "FULL_TABLE"
},
"status": "STARTED",
"attempt": 1
@@ -367,8 +367,8 @@
"json": {
"timestampMillis": 1717179743932,
"partitionSpec": {
- "type": "FULL_TABLE",
- "partition": "FULL_TABLE_SNAPSHOT"
+ "partition": "FULL_TABLE_SNAPSHOT",
+ "type": "FULL_TABLE"
},
"actor": "urn:li:corpuser:airflow",
"operationType": "CREATE",
@@ -552,8 +552,8 @@
"json": {
"timestampMillis": 1717179743960,
"partitionSpec": {
- "type": "FULL_TABLE",
- "partition": "FULL_TABLE_SNAPSHOT"
+ "partition": "FULL_TABLE_SNAPSHOT",
+ "type": "FULL_TABLE"
},
"status": "COMPLETE",
"result": {
@@ -742,8 +742,8 @@
"json": {
"timestampMillis": 1717179748679,
"partitionSpec": {
- "type": "FULL_TABLE",
- "partition": "FULL_TABLE_SNAPSHOT"
+ "partition": "FULL_TABLE_SNAPSHOT",
+ "type": "FULL_TABLE"
},
"status": "STARTED",
"attempt": 1
@@ -759,8 +759,8 @@
"json": {
"timestampMillis": 1717179749258,
"partitionSpec": {
- "type": "FULL_TABLE",
- "partition": "FULL_TABLE_SNAPSHOT"
+ "partition": "FULL_TABLE_SNAPSHOT",
+ "type": "FULL_TABLE"
},
"actor": "urn:li:corpuser:airflow",
"operationType": "CREATE",
@@ -875,8 +875,8 @@
"json": {
"timestampMillis": 1717179749324,
"partitionSpec": {
- "type": "FULL_TABLE",
- "partition": "FULL_TABLE_SNAPSHOT"
+ "partition": "FULL_TABLE_SNAPSHOT",
+ "type": "FULL_TABLE"
},
"status": "COMPLETE",
"result": {
@@ -1161,8 +1161,8 @@
"json": {
"timestampMillis": 1717179757397,
"partitionSpec": {
- "type": "FULL_TABLE",
- "partition": "FULL_TABLE_SNAPSHOT"
+ "partition": "FULL_TABLE_SNAPSHOT",
+ "type": "FULL_TABLE"
},
"status": "STARTED",
"attempt": 1
@@ -1178,8 +1178,8 @@
"json": {
"timestampMillis": 1717179758424,
"partitionSpec": {
- "type": "FULL_TABLE",
- "partition": "FULL_TABLE_SNAPSHOT"
+ "partition": "FULL_TABLE_SNAPSHOT",
+ "type": "FULL_TABLE"
},
"actor": "urn:li:corpuser:airflow",
"operationType": "CREATE",
@@ -1420,8 +1420,8 @@
"json": {
"timestampMillis": 1717179758496,
"partitionSpec": {
- "type": "FULL_TABLE",
- "partition": "FULL_TABLE_SNAPSHOT"
+ "partition": "FULL_TABLE_SNAPSHOT",
+ "type": "FULL_TABLE"
},
"status": "COMPLETE",
"result": {
@@ -1483,10 +1483,10 @@
"aspectName": "dataJobInputOutput",
"aspect": {
"json": {
- "inputDatasets": [
+ "inputDatasets": [],
+ "outputDatasets": [
"urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
],
- "outputDatasets": [],
"inputDatajobs": [
"urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)"
],
@@ -1555,6 +1555,19 @@
}
}
},
+{
+ "entityType": "dataProcessInstance",
+ "entityUrn": "urn:li:dataProcessInstance:07285de22276959612189d51336cc21a",
+ "changeType": "UPSERT",
+ "aspectName": "dataProcessInstanceOutput",
+ "aspect": {
+ "json": {
+ "outputs": [
+ "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+ ]
+ }
+ }
+},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)",
@@ -1640,19 +1653,6 @@
}
}
},
-{
- "entityType": "dataProcessInstance",
- "entityUrn": "urn:li:dataProcessInstance:07285de22276959612189d51336cc21a",
- "changeType": "UPSERT",
- "aspectName": "dataProcessInstanceInput",
- "aspect": {
- "json": {
- "inputs": [
- "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
- ]
- }
- }
-},
{
"entityType": "dataProcessInstance",
"entityUrn": "urn:li:dataProcessInstance:07285de22276959612189d51336cc21a",
@@ -1662,8 +1662,8 @@
"json": {
"timestampMillis": 1718733767964,
"partitionSpec": {
- "type": "FULL_TABLE",
- "partition": "FULL_TABLE_SNAPSHOT"
+ "partition": "FULL_TABLE_SNAPSHOT",
+ "type": "FULL_TABLE"
},
"status": "STARTED",
"attempt": 1
@@ -1679,8 +1679,8 @@
"json": {
"timestampMillis": 1718733768638,
"partitionSpec": {
- "type": "FULL_TABLE",
- "partition": "FULL_TABLE_SNAPSHOT"
+ "partition": "FULL_TABLE_SNAPSHOT",
+ "type": "FULL_TABLE"
},
"status": "COMPLETE",
"result": {
@@ -1697,10 +1697,10 @@
"aspectName": "dataJobInputOutput",
"aspect": {
"json": {
- "inputDatasets": [
+ "inputDatasets": [],
+ "outputDatasets": [
"urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)"
],
- "outputDatasets": [],
"inputDatajobs": [
"urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)"
],
@@ -1809,19 +1809,6 @@
}
}
},
-{
- "entityType": "dataProcessInstance",
- "entityUrn": "urn:li:dataProcessInstance:bab908abccf3cd6607b50fdaf3003372",
- "changeType": "UPSERT",
- "aspectName": "dataProcessInstanceInput",
- "aspect": {
- "json": {
- "inputs": [
- "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)"
- ]
- }
- }
-},
{
"entityType": "dataProcessInstance",
"entityUrn": "urn:li:dataProcessInstance:bab908abccf3cd6607b50fdaf3003372",
@@ -1843,8 +1830,8 @@
"json": {
"timestampMillis": 1718733773354,
"partitionSpec": {
- "type": "FULL_TABLE",
- "partition": "FULL_TABLE_SNAPSHOT"
+ "partition": "FULL_TABLE_SNAPSHOT",
+ "type": "FULL_TABLE"
},
"status": "STARTED",
"attempt": 1
@@ -1860,8 +1847,8 @@
"json": {
"timestampMillis": 1718733774147,
"partitionSpec": {
- "type": "FULL_TABLE",
- "partition": "FULL_TABLE_SNAPSHOT"
+ "partition": "FULL_TABLE_SNAPSHOT",
+ "type": "FULL_TABLE"
},
"status": "COMPLETE",
"result": {
@@ -1870,5 +1857,18 @@
}
}
}
+},
+{
+ "entityType": "dataProcessInstance",
+ "entityUrn": "urn:li:dataProcessInstance:bab908abccf3cd6607b50fdaf3003372",
+ "changeType": "UPSERT",
+ "aspectName": "dataProcessInstanceOutput",
+ "aspect": {
+ "json": {
+ "outputs": [
+ "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)"
+ ]
+ }
+ }
}
]
\ No newline at end of file
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json
index a9af068e2e4e9..99bda0e0f2569 100644
--- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json
@@ -336,8 +336,8 @@
"json": {
"timestampMillis": 1717180072004,
"partitionSpec": {
- "type": "FULL_TABLE",
- "partition": "FULL_TABLE_SNAPSHOT"
+ "partition": "FULL_TABLE_SNAPSHOT",
+ "type": "FULL_TABLE"
},
"status": "STARTED",
"attempt": 1
@@ -382,8 +382,8 @@
"json": {
"timestampMillis": 1719864194882,
"partitionSpec": {
- "type": "FULL_TABLE",
- "partition": "FULL_TABLE_SNAPSHOT"
+ "partition": "FULL_TABLE_SNAPSHOT",
+ "type": "FULL_TABLE"
},
"actor": "urn:li:corpuser:airflow",
"operationType": "CREATE",
@@ -435,8 +435,8 @@
"json": {
"timestampMillis": 1717180072275,
"partitionSpec": {
- "type": "FULL_TABLE",
- "partition": "FULL_TABLE_SNAPSHOT"
+ "partition": "FULL_TABLE_SNAPSHOT",
+ "type": "FULL_TABLE"
},
"status": "COMPLETE",
"result": {
@@ -641,8 +641,8 @@
"json": {
"timestampMillis": 1717180078196,
"partitionSpec": {
- "type": "FULL_TABLE",
- "partition": "FULL_TABLE_SNAPSHOT"
+ "partition": "FULL_TABLE_SNAPSHOT",
+ "type": "FULL_TABLE"
},
"status": "STARTED",
"attempt": 1
@@ -722,8 +722,8 @@
"json": {
"timestampMillis": 1717180078619,
"partitionSpec": {
- "type": "FULL_TABLE",
- "partition": "FULL_TABLE_SNAPSHOT"
+ "partition": "FULL_TABLE_SNAPSHOT",
+ "type": "FULL_TABLE"
},
"status": "COMPLETE",
"result": {
@@ -1000,8 +1000,8 @@
"json": {
"timestampMillis": 1717180084642,
"partitionSpec": {
- "type": "FULL_TABLE",
- "partition": "FULL_TABLE_SNAPSHOT"
+ "partition": "FULL_TABLE_SNAPSHOT",
+ "type": "FULL_TABLE"
},
"status": "STARTED",
"attempt": 1
@@ -1081,8 +1081,8 @@
"json": {
"timestampMillis": 1717180085266,
"partitionSpec": {
- "type": "FULL_TABLE",
- "partition": "FULL_TABLE_SNAPSHOT"
+ "partition": "FULL_TABLE_SNAPSHOT",
+ "type": "FULL_TABLE"
},
"status": "COMPLETE",
"result": {
@@ -1186,10 +1186,10 @@
"aspectName": "dataJobInputOutput",
"aspect": {
"json": {
- "inputDatasets": [
+ "inputDatasets": [],
+ "outputDatasets": [
"urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
],
- "outputDatasets": [],
"inputDatajobs": [
"urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)"
],
@@ -1287,8 +1287,8 @@
"json": {
"timestampMillis": 1717180091148,
"partitionSpec": {
- "type": "FULL_TABLE",
- "partition": "FULL_TABLE_SNAPSHOT"
+ "partition": "FULL_TABLE_SNAPSHOT",
+ "type": "FULL_TABLE"
},
"status": "STARTED",
"attempt": 1
@@ -1368,8 +1368,8 @@
"json": {
"timestampMillis": 1717180091923,
"partitionSpec": {
- "type": "FULL_TABLE",
- "partition": "FULL_TABLE_SNAPSHOT"
+ "partition": "FULL_TABLE_SNAPSHOT",
+ "type": "FULL_TABLE"
},
"status": "COMPLETE",
"result": {
@@ -1499,10 +1499,10 @@
"aspectName": "dataJobInputOutput",
"aspect": {
"json": {
- "inputDatasets": [
+ "inputDatasets": [],
+ "outputDatasets": [
"urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)"
],
- "outputDatasets": [],
"inputDatajobs": [
"urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)"
],
@@ -1613,8 +1613,8 @@
"json": {
"timestampMillis": 1717180096108,
"partitionSpec": {
- "type": "FULL_TABLE",
- "partition": "FULL_TABLE_SNAPSHOT"
+ "partition": "FULL_TABLE_SNAPSHOT",
+ "type": "FULL_TABLE"
},
"status": "STARTED",
"attempt": 1
@@ -1630,8 +1630,8 @@
"json": {
"timestampMillis": 1719864203487,
"partitionSpec": {
- "type": "FULL_TABLE",
- "partition": "FULL_TABLE_SNAPSHOT"
+ "partition": "FULL_TABLE_SNAPSHOT",
+ "type": "FULL_TABLE"
},
"actor": "urn:li:corpuser:airflow",
"operationType": "CREATE",
@@ -1712,8 +1712,8 @@
"json": {
"timestampMillis": 1717180096993,
"partitionSpec": {
- "type": "FULL_TABLE",
- "partition": "FULL_TABLE_SNAPSHOT"
+ "partition": "FULL_TABLE_SNAPSHOT",
+ "type": "FULL_TABLE"
},
"status": "COMPLETE",
"result": {
@@ -1727,10 +1727,10 @@
"entityType": "dataProcessInstance",
"entityUrn": "urn:li:dataProcessInstance:bab908abccf3cd6607b50fdaf3003372",
"changeType": "UPSERT",
- "aspectName": "dataProcessInstanceInput",
+ "aspectName": "dataProcessInstanceOutput",
"aspect": {
"json": {
- "inputs": [
+ "outputs": [
"urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)"
]
}
@@ -1740,10 +1740,10 @@
"entityType": "dataProcessInstance",
"entityUrn": "urn:li:dataProcessInstance:07285de22276959612189d51336cc21a",
"changeType": "UPSERT",
- "aspectName": "dataProcessInstanceInput",
+ "aspectName": "dataProcessInstanceOutput",
"aspect": {
"json": {
- "inputs": [
+ "outputs": [
"urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
]
}
diff --git a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py
index 976ff8bcc9b3f..0146343002171 100644
--- a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py
+++ b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py
@@ -189,35 +189,49 @@ def _table_level_lineage(
statement: sqlglot.Expression, dialect: sqlglot.Dialect
) -> Tuple[Set[_TableName], Set[_TableName]]:
# Generate table-level lineage.
- modified = {
- _TableName.from_sqlglot_table(expr.this)
- for expr in statement.find_all(
- sqlglot.exp.Create,
- sqlglot.exp.Insert,
- sqlglot.exp.Update,
- sqlglot.exp.Delete,
- sqlglot.exp.Merge,
- )
- # In some cases like "MERGE ... then INSERT (col1, col2) VALUES (col1, col2)",
- # the `this` on the INSERT part isn't a table.
- if isinstance(expr.this, sqlglot.exp.Table)
- } | {
- # For statements that include a column list, like
- # CREATE DDL statements and `INSERT INTO table (col1, col2) SELECT ...`
- # the table name is nested inside a Schema object.
- _TableName.from_sqlglot_table(expr.this.this)
- for expr in statement.find_all(
- sqlglot.exp.Create,
- sqlglot.exp.Insert,
- )
- if isinstance(expr.this, sqlglot.exp.Schema)
- and isinstance(expr.this.this, sqlglot.exp.Table)
- }
+ modified = (
+ {
+ _TableName.from_sqlglot_table(expr.this)
+ for expr in statement.find_all(
+ sqlglot.exp.Create,
+ sqlglot.exp.Insert,
+ sqlglot.exp.Update,
+ sqlglot.exp.Delete,
+ sqlglot.exp.Merge,
+ sqlglot.exp.AlterTable,
+ )
+ # In some cases like "MERGE ... then INSERT (col1, col2) VALUES (col1, col2)",
+ # the `this` on the INSERT part isn't a table.
+ if isinstance(expr.this, sqlglot.exp.Table)
+ }
+ | {
+ # For statements that include a column list, like
+ # CREATE DDL statements and `INSERT INTO table (col1, col2) SELECT ...`
+ # the table name is nested inside a Schema object.
+ _TableName.from_sqlglot_table(expr.this.this)
+ for expr in statement.find_all(
+ sqlglot.exp.Create,
+ sqlglot.exp.Insert,
+ )
+ if isinstance(expr.this, sqlglot.exp.Schema)
+ and isinstance(expr.this.this, sqlglot.exp.Table)
+ }
+ | {
+ # For drop statements, we only want it if a table/view is being dropped.
+ # Other "kinds" will not have table.name populated.
+ _TableName.from_sqlglot_table(expr.this)
+ for expr in ([statement] if isinstance(statement, sqlglot.exp.Drop) else [])
+ if isinstance(expr.this, sqlglot.exp.Table)
+ and expr.this.this
+ and expr.this.name
+ }
+ )
tables = (
{
_TableName.from_sqlglot_table(table)
for table in statement.find_all(sqlglot.exp.Table)
+ if not isinstance(table.parent, sqlglot.exp.Drop)
}
# ignore references created in this query
- modified
diff --git a/metadata-ingestion/src/datahub/testing/check_sql_parser_result.py b/metadata-ingestion/src/datahub/testing/check_sql_parser_result.py
index 39c0dddd31400..72b5f6c5e26e4 100644
--- a/metadata-ingestion/src/datahub/testing/check_sql_parser_result.py
+++ b/metadata-ingestion/src/datahub/testing/check_sql_parser_result.py
@@ -15,7 +15,6 @@
logger = logging.getLogger(__name__)
-# TODO: Hook this into the standard --update-golden-files mechanism.
UPDATE_FILES = os.environ.get("UPDATE_SQLPARSER_FILES", "false").lower() == "true"
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_alter_table_column.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_alter_table_column.json
new file mode 100644
index 0000000000000..3c6c9737e8e19
--- /dev/null
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_alter_table_column.json
@@ -0,0 +1,14 @@
+{
+ "query_type": "UNKNOWN",
+ "query_type_props": {},
+ "query_fingerprint": "7d04253c3add0194c557942ef9b7485f38e68762d300dad364b9cec8656035b3",
+ "in_tables": [],
+ "out_tables": [
+ "urn:li:dataset:(urn:li:dataPlatform:bigquery,my-bq-project.covid_data.covid_deaths,PROD)"
+ ],
+ "column_lineage": null,
+ "debug_info": {
+ "confidence": 0.2,
+ "generalized_statement": "ALTER TABLE `my-bq-project.covid_data.covid_deaths` DROP COLUMN patient_name"
+ }
+}
\ No newline at end of file
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_drop_schema.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_drop_schema.json
new file mode 100644
index 0000000000000..2784b8e9543b2
--- /dev/null
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_drop_schema.json
@@ -0,0 +1,12 @@
+{
+ "query_type": "UNKNOWN",
+ "query_type_props": {},
+ "query_fingerprint": "4eefab57619a812a94030acce0071857561265945e79d798563adb53bd0b9646",
+ "in_tables": [],
+ "out_tables": [],
+ "column_lineage": null,
+ "debug_info": {
+ "confidence": 0.9,
+ "generalized_statement": "DROP SCHEMA my_schema"
+ }
+}
\ No newline at end of file
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_sqlite_drop_table.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_sqlite_drop_table.json
new file mode 100644
index 0000000000000..ae8b3f99897dc
--- /dev/null
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_sqlite_drop_table.json
@@ -0,0 +1,14 @@
+{
+ "query_type": "UNKNOWN",
+ "query_type_props": {},
+ "query_fingerprint": "d1c29ad73325b08bb66e62ec00ba1d5be4412ec72b4bbc9c094f1272b9da4f86",
+ "in_tables": [],
+ "out_tables": [
+ "urn:li:dataset:(urn:li:dataPlatform:sqlite,my_schema.my_table,PROD)"
+ ],
+ "column_lineage": null,
+ "debug_info": {
+ "confidence": 0.2,
+ "generalized_statement": "DROP TABLE my_schema.my_table"
+ }
+}
\ No newline at end of file
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_sqlite_drop_view.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_sqlite_drop_view.json
new file mode 100644
index 0000000000000..6650ef396a570
--- /dev/null
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_sqlite_drop_view.json
@@ -0,0 +1,14 @@
+{
+ "query_type": "UNKNOWN",
+ "query_type_props": {},
+ "query_fingerprint": "35a3c60e7ed98884dde3f1f5fe9079f844832430589a3326b97d617b8303f191",
+ "in_tables": [],
+ "out_tables": [
+ "urn:li:dataset:(urn:li:dataPlatform:sqlite,my_schema.my_view,PROD)"
+ ],
+ "column_lineage": null,
+ "debug_info": {
+ "confidence": 0.2,
+ "generalized_statement": "DROP VIEW my_schema.my_view"
+ }
+}
\ No newline at end of file
diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
index e5b669329f16c..3096c9b8269a1 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
+++ b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
@@ -2,11 +2,22 @@
import pytest
+import datahub.testing.check_sql_parser_result as checker
from datahub.testing.check_sql_parser_result import assert_sql_result
RESOURCE_DIR = pathlib.Path(__file__).parent / "goldens"
+@pytest.fixture(autouse=True)
+def set_update_sql_parser(
+ pytestconfig: pytest.Config, monkeypatch: pytest.MonkeyPatch
+) -> None:
+ update_golden = pytestconfig.getoption("--update-golden-files")
+
+ if update_golden:
+ monkeypatch.setattr(checker, "UPDATE_FILES", True)
+
+
def test_invalid_sql():
assert_sql_result(
"""
@@ -1202,3 +1213,43 @@ def test_bigquery_information_schema_query() -> None:
dialect="bigquery",
expected_file=RESOURCE_DIR / "test_bigquery_information_schema_query.json",
)
+
+
+def test_bigquery_alter_table_column() -> None:
+ assert_sql_result(
+ """\
+ALTER TABLE `my-bq-project.covid_data.covid_deaths` drop COLUMN patient_name
+ """,
+ dialect="bigquery",
+ expected_file=RESOURCE_DIR / "test_bigquery_alter_table_column.json",
+ )
+
+
+def test_sqlite_drop_table() -> None:
+ assert_sql_result(
+ """\
+DROP TABLE my_schema.my_table
+""",
+ dialect="sqlite",
+ expected_file=RESOURCE_DIR / "test_sqlite_drop_table.json",
+ )
+
+
+def test_sqlite_drop_view() -> None:
+ assert_sql_result(
+ """\
+DROP VIEW my_schema.my_view
+""",
+ dialect="sqlite",
+ expected_file=RESOURCE_DIR / "test_sqlite_drop_view.json",
+ )
+
+
+def test_snowflake_drop_schema() -> None:
+ assert_sql_result(
+ """\
+DROP SCHEMA my_schema
+""",
+ dialect="snowflake",
+ expected_file=RESOURCE_DIR / "test_snowflake_drop_schema.json",
+ )
From 78336c9f58fb89a25f4228b2e8b5c0322d66807f Mon Sep 17 00:00:00 2001
From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
Date: Fri, 9 Aug 2024 09:18:51 +0530
Subject: [PATCH 13/72] fix(ingest/bigquery): use small batch size if
use_tables_list_query_v2 is set (#11121)
---
.../datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py
index 46ec75edb9734..c6a50a1c977f4 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py
@@ -985,7 +985,7 @@ def get_tables_for_dataset(
# https://cloud.google.com/bigquery/docs/information-schema-partitions
max_batch_size: int = (
self.config.number_of_datasets_process_in_batch
- if not self.config.is_profiling_enabled()
+ if not self.config.have_table_data_read_permission
else self.config.number_of_datasets_process_in_batch_if_profiling_enabled
)
From aa07e2a9371e8a5c90c87217dfc136dda38f60f2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20L=C3=BCdin?=
<13187726+Masterchen09@users.noreply.github.com>
Date: Fri, 9 Aug 2024 17:59:38 +0200
Subject: [PATCH 14/72] fix(graphql): add missing entities to EntityTypeMapper
and EntityTypeUrnMapper (#10366)
---
.../types/entitytype/EntityTypeMapper.java | 33 ++++++++------
.../types/entitytype/EntityTypeUrnMapper.java | 43 ++++++++++++++++---
2 files changed, 57 insertions(+), 19 deletions(-)
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeMapper.java
index 26835f9e57dcd..77457a814bd67 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeMapper.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeMapper.java
@@ -15,40 +15,49 @@ public class EntityTypeMapper {
static final Map ENTITY_TYPE_TO_NAME =
ImmutableMap.builder()
+ .put(EntityType.DOMAIN, Constants.DOMAIN_ENTITY_NAME)
.put(EntityType.DATASET, Constants.DATASET_ENTITY_NAME)
- .put(EntityType.ROLE, Constants.ROLE_ENTITY_NAME)
.put(EntityType.CORP_USER, Constants.CORP_USER_ENTITY_NAME)
.put(EntityType.CORP_GROUP, Constants.CORP_GROUP_ENTITY_NAME)
.put(EntityType.DATA_PLATFORM, Constants.DATA_PLATFORM_ENTITY_NAME)
+ .put(EntityType.ER_MODEL_RELATIONSHIP, Constants.ER_MODEL_RELATIONSHIP_ENTITY_NAME)
.put(EntityType.DASHBOARD, Constants.DASHBOARD_ENTITY_NAME)
+ .put(EntityType.NOTEBOOK, Constants.NOTEBOOK_ENTITY_NAME)
.put(EntityType.CHART, Constants.CHART_ENTITY_NAME)
- .put(EntityType.TAG, Constants.TAG_ENTITY_NAME)
.put(EntityType.DATA_FLOW, Constants.DATA_FLOW_ENTITY_NAME)
.put(EntityType.DATA_JOB, Constants.DATA_JOB_ENTITY_NAME)
- .put(EntityType.DATA_PROCESS_INSTANCE, Constants.DATA_PROCESS_INSTANCE_ENTITY_NAME)
+ .put(EntityType.TAG, Constants.TAG_ENTITY_NAME)
.put(EntityType.GLOSSARY_TERM, Constants.GLOSSARY_TERM_ENTITY_NAME)
.put(EntityType.GLOSSARY_NODE, Constants.GLOSSARY_NODE_ENTITY_NAME)
+ .put(EntityType.CONTAINER, Constants.CONTAINER_ENTITY_NAME)
.put(EntityType.MLMODEL, Constants.ML_MODEL_ENTITY_NAME)
.put(EntityType.MLMODEL_GROUP, Constants.ML_MODEL_GROUP_ENTITY_NAME)
.put(EntityType.MLFEATURE_TABLE, Constants.ML_FEATURE_TABLE_ENTITY_NAME)
.put(EntityType.MLFEATURE, Constants.ML_FEATURE_ENTITY_NAME)
.put(EntityType.MLPRIMARY_KEY, Constants.ML_PRIMARY_KEY_ENTITY_NAME)
- .put(EntityType.CONTAINER, Constants.CONTAINER_ENTITY_NAME)
- .put(EntityType.DOMAIN, Constants.DOMAIN_ENTITY_NAME)
- .put(EntityType.NOTEBOOK, Constants.NOTEBOOK_ENTITY_NAME)
+ .put(EntityType.INGESTION_SOURCE, Constants.INGESTION_SOURCE_ENTITY_NAME)
+ .put(EntityType.EXECUTION_REQUEST, Constants.EXECUTION_REQUEST_ENTITY_NAME)
+ .put(EntityType.ASSERTION, Constants.ASSERTION_ENTITY_NAME)
+ .put(EntityType.DATA_PROCESS_INSTANCE, Constants.DATA_PROCESS_INSTANCE_ENTITY_NAME)
.put(EntityType.DATA_PLATFORM_INSTANCE, Constants.DATA_PLATFORM_INSTANCE_ENTITY_NAME)
+ .put(EntityType.ACCESS_TOKEN, Constants.ACCESS_TOKEN_ENTITY_NAME)
.put(EntityType.TEST, Constants.TEST_ENTITY_NAME)
- .put(EntityType.ER_MODEL_RELATIONSHIP, Constants.ER_MODEL_RELATIONSHIP_ENTITY_NAME)
+ .put(EntityType.DATAHUB_POLICY, Constants.POLICY_ENTITY_NAME)
+ .put(EntityType.DATAHUB_ROLE, Constants.DATAHUB_ROLE_ENTITY_NAME)
+ .put(EntityType.POST, Constants.POST_ENTITY_NAME)
+ .put(EntityType.SCHEMA_FIELD, Constants.SCHEMA_FIELD_ENTITY_NAME)
.put(EntityType.DATAHUB_VIEW, Constants.DATAHUB_VIEW_ENTITY_NAME)
+ .put(EntityType.QUERY, Constants.QUERY_ENTITY_NAME)
.put(EntityType.DATA_PRODUCT, Constants.DATA_PRODUCT_ENTITY_NAME)
- .put(EntityType.SCHEMA_FIELD, Constants.SCHEMA_FIELD_ENTITY_NAME)
+ .put(EntityType.CUSTOM_OWNERSHIP_TYPE, Constants.OWNERSHIP_TYPE_ENTITY_NAME)
+ .put(EntityType.INCIDENT, Constants.INCIDENT_ENTITY_NAME)
+ .put(EntityType.ROLE, Constants.ROLE_ENTITY_NAME)
.put(EntityType.STRUCTURED_PROPERTY, Constants.STRUCTURED_PROPERTY_ENTITY_NAME)
- .put(EntityType.ASSERTION, Constants.ASSERTION_ENTITY_NAME)
+ .put(EntityType.FORM, Constants.FORM_ENTITY_NAME)
+ .put(EntityType.DATA_TYPE, Constants.DATA_TYPE_ENTITY_NAME)
+ .put(EntityType.ENTITY_TYPE, Constants.ENTITY_TYPE_ENTITY_NAME)
.put(EntityType.RESTRICTED, Constants.RESTRICTED_ENTITY_NAME)
.put(EntityType.BUSINESS_ATTRIBUTE, Constants.BUSINESS_ATTRIBUTE_ENTITY_NAME)
- .put(EntityType.QUERY, Constants.QUERY_ENTITY_NAME)
- .put(EntityType.POST, Constants.POST_ENTITY_NAME)
- .put(EntityType.FORM, Constants.FORM_ENTITY_NAME)
.build();
private static final Map ENTITY_NAME_TO_TYPE =
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeUrnMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeUrnMapper.java
index 9e9bf86e5fe7f..334faf753cb8b 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeUrnMapper.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeUrnMapper.java
@@ -20,34 +20,63 @@ public class EntityTypeUrnMapper {
static final Map ENTITY_NAME_TO_ENTITY_TYPE_URN =
ImmutableMap.builder()
+ .put(Constants.DOMAIN_ENTITY_NAME, "urn:li:entityType:datahub.domain")
.put(Constants.DATASET_ENTITY_NAME, "urn:li:entityType:datahub.dataset")
- .put(Constants.ROLE_ENTITY_NAME, "urn:li:entityType:datahub.role")
.put(Constants.CORP_USER_ENTITY_NAME, "urn:li:entityType:datahub.corpuser")
.put(Constants.CORP_GROUP_ENTITY_NAME, "urn:li:entityType:datahub.corpGroup")
.put(Constants.DATA_PLATFORM_ENTITY_NAME, "urn:li:entityType:datahub.dataPlatform")
+ .put(
+ Constants.ER_MODEL_RELATIONSHIP_ENTITY_NAME,
+ "urn:li:entityType:datahub.erModelRelationship")
.put(Constants.DASHBOARD_ENTITY_NAME, "urn:li:entityType:datahub.dashboard")
+ .put(Constants.NOTEBOOK_ENTITY_NAME, "urn:li:entityType:datahub.notebook")
.put(Constants.CHART_ENTITY_NAME, "urn:li:entityType:datahub.chart")
- .put(Constants.TAG_ENTITY_NAME, "urn:li:entityType:datahub.tag")
.put(Constants.DATA_FLOW_ENTITY_NAME, "urn:li:entityType:datahub.dataFlow")
.put(Constants.DATA_JOB_ENTITY_NAME, "urn:li:entityType:datahub.dataJob")
+ .put(Constants.TAG_ENTITY_NAME, "urn:li:entityType:datahub.tag")
.put(Constants.GLOSSARY_TERM_ENTITY_NAME, "urn:li:entityType:datahub.glossaryTerm")
.put(Constants.GLOSSARY_NODE_ENTITY_NAME, "urn:li:entityType:datahub.glossaryNode")
+ .put(Constants.CONTAINER_ENTITY_NAME, "urn:li:entityType:datahub.container")
.put(Constants.ML_MODEL_ENTITY_NAME, "urn:li:entityType:datahub.mlModel")
.put(Constants.ML_MODEL_GROUP_ENTITY_NAME, "urn:li:entityType:datahub.mlModelGroup")
.put(Constants.ML_FEATURE_TABLE_ENTITY_NAME, "urn:li:entityType:datahub.mlFeatureTable")
.put(Constants.ML_FEATURE_ENTITY_NAME, "urn:li:entityType:datahub.mlFeature")
.put(Constants.ML_PRIMARY_KEY_ENTITY_NAME, "urn:li:entityType:datahub.mlPrimaryKey")
- .put(Constants.CONTAINER_ENTITY_NAME, "urn:li:entityType:datahub.container")
- .put(Constants.DOMAIN_ENTITY_NAME, "urn:li:entityType:datahub.domain")
- .put(Constants.NOTEBOOK_ENTITY_NAME, "urn:li:entityType:datahub.notebook")
+ .put(
+ Constants.INGESTION_SOURCE_ENTITY_NAME,
+ "urn:li:entityType:datahub.dataHubIngestionSource")
+ .put(
+ Constants.EXECUTION_REQUEST_ENTITY_NAME,
+ "urn:li:entityType:datahub.dataHubExecutionRequest")
+ .put(Constants.ASSERTION_ENTITY_NAME, "urn:li:entityType:datahub.assertion")
+ .put(
+ Constants.DATA_PROCESS_INSTANCE_ENTITY_NAME,
+ "urn:li:entityType:datahub.dataProcessInstance")
.put(
Constants.DATA_PLATFORM_INSTANCE_ENTITY_NAME,
"urn:li:entityType:datahub.dataPlatformInstance")
+ .put(Constants.ACCESS_TOKEN_ENTITY_NAME, "urn:li:entityType:datahub.dataHubAccessToken")
.put(Constants.TEST_ENTITY_NAME, "urn:li:entityType:datahub.test")
+ .put(Constants.POLICY_ENTITY_NAME, "urn:li:entityType:datahub.dataHubPolicy")
+ .put(Constants.DATAHUB_ROLE_ENTITY_NAME, "urn:li:entityType:datahub.dataHubRole")
+ .put(Constants.POST_ENTITY_NAME, "urn:li:entityType:datahub.post")
+ .put(Constants.SCHEMA_FIELD_ENTITY_NAME, "urn:li:entityType:datahub.schemaField")
.put(Constants.DATAHUB_VIEW_ENTITY_NAME, "urn:li:entityType:datahub.dataHubView")
+ .put(Constants.QUERY_ENTITY_NAME, "urn:li:entityType:datahub.query")
.put(Constants.DATA_PRODUCT_ENTITY_NAME, "urn:li:entityType:datahub.dataProduct")
- .put(Constants.ASSERTION_ENTITY_NAME, "urn:li:entityType:datahub.assertion")
- .put(Constants.SCHEMA_FIELD_ENTITY_NAME, "urn:li:entityType:datahub.schemaField")
+ .put(Constants.OWNERSHIP_TYPE_ENTITY_NAME, "urn:li:entityType:datahub.ownershipType")
+ .put(Constants.INCIDENT_ENTITY_NAME, "urn:li:entityType:datahub.incident")
+ .put(Constants.ROLE_ENTITY_NAME, "urn:li:entityType:datahub.role")
+ .put(
+ Constants.STRUCTURED_PROPERTY_ENTITY_NAME,
+ "urn:li:entityType:datahub.structuredProperty")
+ .put(Constants.FORM_ENTITY_NAME, "urn:li:entityType:datahub.form")
+ .put(Constants.DATA_TYPE_ENTITY_NAME, "urn:li:entityType:datahub.dataType")
+ .put(Constants.ENTITY_TYPE_ENTITY_NAME, "urn:li:entityType:datahub.entityType")
+ .put(Constants.RESTRICTED_ENTITY_NAME, "urn:li:entityType:datahub.restricted")
+ .put(
+ Constants.BUSINESS_ATTRIBUTE_ENTITY_NAME,
+ "urn:li:entityType:datahub.businessAttribute")
.build();
private static final Map ENTITY_TYPE_URN_TO_NAME =
From 3a38415d6b1497f439cb8fffa2b69032e35cf04a Mon Sep 17 00:00:00 2001
From: jayasimhankv <145704974+jayasimhankv@users.noreply.github.com>
Date: Fri, 9 Aug 2024 11:02:17 -0500
Subject: [PATCH 15/72] feat(ui): Changes to allow editable dataset name
(#10608)
Co-authored-by: Jay Kadambi
---
.../graphql/featureflags/FeatureFlags.java | 1 +
.../resolvers/config/AppConfigResolver.java | 1 +
.../resolvers/mutate/UpdateNameResolver.java | 36 +++++++++++++++++++
.../types/dataset/mappers/DatasetMapper.java | 4 +++
.../mappers/DatasetUpdateInputMapper.java | 9 +++--
.../src/main/resources/app.graphql | 5 +++
.../src/main/resources/entity.graphql | 9 +++++
.../src/app/entity/dataset/DatasetEntity.tsx | 7 ++--
.../profile/header/EntityHeader.tsx | 10 ++++--
datahub-web-react/src/app/useAppConfig.ts | 5 +++
datahub-web-react/src/appConfigContext.tsx | 1 +
datahub-web-react/src/graphql/app.graphql | 1 +
datahub-web-react/src/graphql/browse.graphql | 1 +
.../src/graphql/fragments.graphql | 1 +
datahub-web-react/src/graphql/preview.graphql | 1 +
datahub-web-react/src/graphql/search.graphql | 1 +
.../dataset/EditableDatasetProperties.pdl | 9 +++++
...com.linkedin.entity.entities.snapshot.json | 9 +++++
...m.linkedin.platform.platform.snapshot.json | 9 +++++
19 files changed, 113 insertions(+), 7 deletions(-)
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java
index 85a2c09ed79a7..167515a13c4da 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java
@@ -21,5 +21,6 @@ public class FeatureFlags {
private boolean schemaFieldEntityFetchEnabled = false;
private boolean businessAttributeEntityEnabled = false;
private boolean dataContractsEnabled = false;
+ private boolean editableDatasetNameEnabled = false;
private boolean showSeparateSiblings = false;
}
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java
index fb1672d54dc97..259d05c631557 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java
@@ -186,6 +186,7 @@ public CompletableFuture get(final DataFetchingEnvironment environmen
.setNestedDomainsEnabled(_featureFlags.isNestedDomainsEnabled())
.setPlatformBrowseV2(_featureFlags.isPlatformBrowseV2())
.setDataContractsEnabled(_featureFlags.isDataContractsEnabled())
+ .setEditableDatasetNameEnabled(_featureFlags.isEditableDatasetNameEnabled())
.setShowSeparateSiblings(_featureFlags.isShowSeparateSiblings())
.build();
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateNameResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateNameResolver.java
index 1d90720fc6902..ad6dbbe635ed1 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateNameResolver.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateNameResolver.java
@@ -4,9 +4,11 @@
import static com.linkedin.datahub.graphql.resolvers.mutate.MutationUtils.persistAspect;
import com.linkedin.businessattribute.BusinessAttributeInfo;
+import com.linkedin.common.AuditStamp;
import com.linkedin.common.urn.CorpuserUrn;
import com.linkedin.common.urn.Urn;
import com.linkedin.common.urn.UrnUtils;
+import com.linkedin.data.template.SetMode;
import com.linkedin.datahub.graphql.QueryContext;
import com.linkedin.datahub.graphql.authorization.AuthorizationUtils;
import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils;
@@ -20,6 +22,7 @@
import com.linkedin.datahub.graphql.resolvers.mutate.util.DomainUtils;
import com.linkedin.datahub.graphql.resolvers.mutate.util.GlossaryUtils;
import com.linkedin.dataproduct.DataProductProperties;
+import com.linkedin.dataset.EditableDatasetProperties;
import com.linkedin.domain.DomainProperties;
import com.linkedin.domain.Domains;
import com.linkedin.entity.client.EntityClient;
@@ -70,6 +73,8 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw
return updateDataProductName(targetUrn, input, context);
case Constants.BUSINESS_ATTRIBUTE_ENTITY_NAME:
return updateBusinessAttributeName(targetUrn, input, environment.getContext());
+ case Constants.DATASET_ENTITY_NAME:
+ return updateDatasetName(targetUrn, input, environment.getContext());
default:
throw new RuntimeException(
String.format(
@@ -236,6 +241,37 @@ private Boolean updateGroupName(Urn targetUrn, UpdateNameInput input, QueryConte
"Unauthorized to perform this action. Please contact your DataHub administrator.");
}
+ // udpates editable dataset properties aspect's name field
+ private Boolean updateDatasetName(Urn targetUrn, UpdateNameInput input, QueryContext context) {
+ if (AuthorizationUtils.canEditProperties(targetUrn, context)) {
+ try {
+ if (input.getName() != null) {
+ final EditableDatasetProperties editableDatasetProperties =
+ new EditableDatasetProperties();
+ editableDatasetProperties.setName(input.getName());
+ final AuditStamp auditStamp = new AuditStamp();
+ Urn actor = UrnUtils.getUrn(context.getActorUrn());
+ auditStamp.setActor(actor, SetMode.IGNORE_NULL);
+ auditStamp.setTime(System.currentTimeMillis());
+ editableDatasetProperties.setLastModified(auditStamp);
+ persistAspect(
+ context.getOperationContext(),
+ targetUrn,
+ Constants.EDITABLE_DATASET_PROPERTIES_ASPECT_NAME,
+ editableDatasetProperties,
+ actor,
+ _entityService);
+ }
+ return true;
+ } catch (Exception e) {
+ throw new RuntimeException(
+ String.format("Failed to perform update against input %s", input), e);
+ }
+ }
+ throw new AuthorizationException(
+ "Unauthorized to perform this action. Please contact your DataHub administrator.");
+ }
+
private Boolean updateDataProductName(
Urn targetUrn, UpdateNameInput input, QueryContext context) {
try {
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetMapper.java
index 89d5aa8621bf0..a7b5f6de0c183 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetMapper.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetMapper.java
@@ -222,6 +222,7 @@ private void mapDatasetProperties(
properties.setQualifiedName(gmsProperties.getQualifiedName());
dataset.setProperties(properties);
dataset.setDescription(properties.getDescription());
+ dataset.setName(properties.getName());
if (gmsProperties.getUri() != null) {
dataset.setUri(gmsProperties.getUri().toString());
}
@@ -248,6 +249,9 @@ private void mapEditableDatasetProperties(@Nonnull Dataset dataset, @Nonnull Dat
new EditableDatasetProperties(dataMap);
final DatasetEditableProperties editableProperties = new DatasetEditableProperties();
editableProperties.setDescription(editableDatasetProperties.getDescription());
+ if (editableDatasetProperties.getName() != null) {
+ editableProperties.setName(editableDatasetProperties.getName());
+ }
dataset.setEditableProperties(editableProperties);
}
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetUpdateInputMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetUpdateInputMapper.java
index 122298bcab654..104dc0e104341 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetUpdateInputMapper.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetUpdateInputMapper.java
@@ -111,8 +111,13 @@ public Collection apply(
if (datasetUpdateInput.getEditableProperties() != null) {
final EditableDatasetProperties editableDatasetProperties = new EditableDatasetProperties();
- editableDatasetProperties.setDescription(
- datasetUpdateInput.getEditableProperties().getDescription());
+ if (datasetUpdateInput.getEditableProperties().getDescription() != null) {
+ editableDatasetProperties.setDescription(
+ datasetUpdateInput.getEditableProperties().getDescription());
+ }
+ if (datasetUpdateInput.getEditableProperties().getName() != null) {
+ editableDatasetProperties.setName(datasetUpdateInput.getEditableProperties().getName());
+ }
editableDatasetProperties.setLastModified(auditStamp);
editableDatasetProperties.setCreated(auditStamp);
proposals.add(
diff --git a/datahub-graphql-core/src/main/resources/app.graphql b/datahub-graphql-core/src/main/resources/app.graphql
index 024a7a989f9db..262d2384d84ad 100644
--- a/datahub-graphql-core/src/main/resources/app.graphql
+++ b/datahub-graphql-core/src/main/resources/app.graphql
@@ -508,6 +508,11 @@ type FeatureFlagsConfig {
"""
dataContractsEnabled: Boolean!
+ """
+ Whether dataset names are editable
+ """
+ editableDatasetNameEnabled: Boolean!
+
"""
If turned on, all siblings will be separated with no way to get to a "combined" sibling view
"""
diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql
index 941a6a28ceb2c..609597beee51b 100644
--- a/datahub-graphql-core/src/main/resources/entity.graphql
+++ b/datahub-graphql-core/src/main/resources/entity.graphql
@@ -3482,6 +3482,11 @@ type DatasetEditableProperties {
Description of the Dataset
"""
description: String
+
+ """
+ Editable name of the Dataset
+ """
+ name: String
}
"""
@@ -4850,6 +4855,10 @@ input DatasetEditablePropertiesUpdate {
Writable description aka documentation for a Dataset
"""
description: String!
+ """
+ Editable name of the Dataset
+ """
+ name: String
}
"""
diff --git a/datahub-web-react/src/app/entity/dataset/DatasetEntity.tsx b/datahub-web-react/src/app/entity/dataset/DatasetEntity.tsx
index c30fee7abc0b6..21ae085832cb3 100644
--- a/datahub-web-react/src/app/entity/dataset/DatasetEntity.tsx
+++ b/datahub-web-react/src/app/entity/dataset/DatasetEntity.tsx
@@ -220,6 +220,7 @@ export class DatasetEntity implements Entity {
},
]}
sidebarSections={this.getSidebarSections()}
+ isNameEditable
/>
);
@@ -283,7 +284,7 @@ export class DatasetEntity implements Entity {
return (
{
return (
{
};
displayName = (data: Dataset) => {
- return data?.properties?.name || data.name || data.urn;
+ return data?.editableProperties?.name || data?.properties?.name || data.name || data.urn;
};
platformLogoUrl = (data: Dataset) => {
diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityHeader.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityHeader.tsx
index 09fa23dbc9f57..11335d0378760 100644
--- a/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityHeader.tsx
+++ b/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityHeader.tsx
@@ -17,6 +17,7 @@ import { capitalizeFirstLetterOnly } from '../../../../../shared/textUtil';
import { useUserContext } from '../../../../../context/useUserContext';
import { useEntityRegistry } from '../../../../../useEntityRegistry';
import EntityHeaderLoadingSection from './EntityHeaderLoadingSection';
+import { useIsEditableDatasetNameEnabled } from '../../../../../useAppConfig';
const TitleWrapper = styled.div`
display: flex;
@@ -71,6 +72,8 @@ export function getCanEditName(
return true; // TODO: add permissions for data products
case EntityType.BusinessAttribute:
return privileges?.manageBusinessAttributes;
+ case EntityType.Dataset:
+ return entityData?.privileges?.canEditProperties;
default:
return false;
}
@@ -94,8 +97,11 @@ export const EntityHeader = ({ headerDropdownItems, headerActionItems, isNameEdi
const entityName = entityData?.name;
const subType = capitalizeFirstLetterOnly(entityData?.subTypes?.typeNames?.[0]) || undefined;
+ const isEditableDatasetNameEnabled = useIsEditableDatasetNameEnabled();
const canEditName =
- isNameEditable && getCanEditName(entityType, entityData, me?.platformPrivileges as PlatformPrivileges);
+ isEditableDatasetNameEnabled &&
+ isNameEditable &&
+ getCanEditName(entityType, entityData, me?.platformPrivileges as PlatformPrivileges);
const entityRegistry = useEntityRegistry();
return (
@@ -106,7 +112,7 @@ export const EntityHeader = ({ headerDropdownItems, headerActionItems, isNameEdi
<>
-
+
{entityData?.deprecation?.deprecated && (
Date: Sat, 10 Aug 2024 01:18:11 +0900
Subject: [PATCH 16/72] fix: remove saxo (#11127)
---
README.md | 1 -
docs-website/adoptionStoriesIndexes.json | 11 -----
.../src/pages/_components/Logos/index.js | 42 +++++++++---------
.../logos/scrollingCompanies/saxo_bank.webp | Bin 3592 -> 0 bytes
4 files changed, 21 insertions(+), 33 deletions(-)
delete mode 100644 docs-website/static/img/logos/scrollingCompanies/saxo_bank.webp
diff --git a/README.md b/README.md
index b3c2e2d545941..3ac0668918f70 100644
--- a/README.md
+++ b/README.md
@@ -138,7 +138,6 @@ Here are the companies that have officially adopted DataHub. Please feel free to
- [Peloton](https://www.onepeloton.com)
- [PITS Global Data Recovery Services](https://www.pitsdatarecovery.net/)
- [Razer](https://www.razer.com)
-- [Saxo Bank](https://www.home.saxo)
- [Showroomprive](https://www.showroomprive.com/)
- [SpotHero](https://spothero.com)
- [Stash](https://www.stash.com)
diff --git a/docs-website/adoptionStoriesIndexes.json b/docs-website/adoptionStoriesIndexes.json
index 3fe666ccf1c13..9697bdfcf39a9 100644
--- a/docs-website/adoptionStoriesIndexes.json
+++ b/docs-website/adoptionStoriesIndexes.json
@@ -77,17 +77,6 @@
"category": "B2B & B2C",
"description": "“We looked around for data catalog tool, and DataHub was a clear winner.”
Zynga levels up data management using DataHub, highlighting its role in enhancing data management, tracing data lineage, and ensuring data quality."
},
- {
- "name": "Saxo Bank",
- "slug": "saxo-bank",
- "imageUrl": "/img/logos/companies/saxobank.svg",
- "imageSize": "default",
- "link": "https://blog.datahubproject.io/enabling-data-discovery-in-a-data-mesh-the-saxo-journey-451b06969c8f",
- "linkType": "blog",
- "tagline": "Enabling Data Discovery in a Data Mesh",
- "category": "Financial & Fintech",
- "description": "Saxo Bank adopted DataHub to enhance data quality and streamline governance, facilitating efficient data management through self-service capabilities.
By integrating Apache Kafka and Snowflake with DataHub, the bank embraced Data Mesh principles to democratize data, support rapid growth, and improve business processes."
- },
{
"name": "MediaMarkt Saturn",
"slug": "mediamarkt-saturn",
diff --git a/docs-website/src/pages/_components/Logos/index.js b/docs-website/src/pages/_components/Logos/index.js
index 565f6e9a46fee..b17c072d02d57 100644
--- a/docs-website/src/pages/_components/Logos/index.js
+++ b/docs-website/src/pages/_components/Logos/index.js
@@ -168,36 +168,36 @@ export const CompanyLogos = () => (
modules={[Pagination]}
className={clsx("mySwiper", styles.companyWrapper)}
>
- {companies.map((company, idx) => (
-
- {company.link ? (
-
+ {companies
+ .filter((company) => company.imageUrl) // Filter companies with imageUrl
+ .map((company, idx) => (
+
+ {company.link ? (
+
+
+
+ ) : (
-
- ) : (
-
- )}
-
- ))}
+ )}
+
+ ))}
);
diff --git a/docs-website/static/img/logos/scrollingCompanies/saxo_bank.webp b/docs-website/static/img/logos/scrollingCompanies/saxo_bank.webp
deleted file mode 100644
index a4c1aae73fe48b88946801f5713f15b10df3c177..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001
literal 3592
zcmeHEc|6qZ7XPUz>)0y0y75L4Dq8Hs7)#lg7(&Q0b|Yd)*0NPHcIgcfrbcG$%eXIU
z>@#*o$U7LcV5Aw`pZeU}`^S6lfA_!6bIy6b&pF@E^PJ}#bC|w9zykm_dOD`orWb8F
z002+~trP&N0RX0}Yud#E9s!oFJ%0d9A$)@a&7nHSZ0)WeJNO=80oQkNb`A3Xf&SKm
zEc);dE=m4x8UH)Te$y?;6{LIuO)3z49IQzY#DX3_u*@EI`GM8;aA>f9Fvv08!-1CO
zx*)y@Vi}LWV3)sOSO360elo~WL->U3y|pKMzvFWAwXy*BL!gNPL4Y{`1$6fQ51v8s
zdjkU--UuCf8)?fqn(
z-?&Ljushnl)>z>lZFjc|t<)(dFg
zf&X&{-hXDERltY9|D>gP3e7D>XcYEQKk9v@23=9!Xv0Mp+{dbIN7!eEG(~o!4^`av
zbsPzS|4^#ry>-5UDG8%2$Lq^RkfAqkqDvl)D&S?1C;FA0*I#*PN)MXozVm5=drkAJ
zGY8t%a-u6AOidJ7=shoN;6HsRYMv~hJP>Mjhy6>$x!$B%POYd)Z$8p+hZc7E1Ul54w(=i6P4Undr)n@#D`fC{(?T
zTQPy3F0W0|5Qj91dx<+-va7wndP!-leU0sQ((Khp)ozEE__;oH$fUV@SJOCMXlA)A-GFhl2-KWk35!2
za;H7NScB>+X@LOU*ZqugRI)!g9deiAr)ZoJt=e`UQiV2!yw7ZIAsbs@OqioJ&SICE?46IRxL(R-y~+&_dR=)1LL!dNYr|4p|_z
zcM57!7cdeXj@4ol-pTQ*zdJUB9`u3fNYKk|4Y-CScEwlDk$P{%Zsye4*|5MS6&z2-
z=Ty)$($vF;?8jBCRhz1?Mky+k1LX*~n?Pl~C346y&Bsy5ii-#3ddgx@f#lTijqDp(+#Ijb|;wM3*_0%0BAEb>KRehP3j9-S2%Hkxy}gDPAS5P0UB^
zY@H4D+AaF5WAAW2dN7e3u%tZ<#e683*6?i_{LODopDpBx$Q#R}9os#XAwJ$M=vCo7
z!BnfJuuqFmW{aDjWA0bHUa?e#Ray#pq;j7prG>{b3fLGbHAiTqGYSvH|L9*Et{Fk!
zovf$!Py}UdTvQ0Mh@Lc;!qFMGxnz`7Z!6?&{?&Rz)@!U&^GXVO@v6+ck(a9vDhM(R
zEO}ny$|%qDsZcbrI0M}ieG1|CI`vk0eBFsnI;qRxJ!2kGK*iJM8Llj(^CqCWk<+9T
zE!Z9TEUe?;RcmT}36s01o#csjUB*EP2?-V6%_GGT@dEKyk`r07IP;!QOZF<*>s3YD
z*PEd?M4!9nVAU=>Z6}urhavI<7w~D&ZjD|@g7Mh-4&^8{Io`y6&fajr^`93}pI+t$T{^@es8
zw@J`cspiPM(fVkB*K>`jhgE1|V{gh>3;Yb47-X0PTj@AlRyT>st=r^L*S_=315QO_
zAj+P9lrRS3;gb+zmaYjYgrhK~s(A=m
zp@IkCN{7UV}r&iw-9VL
zaVHsuR!Dxs7`!-rz_D382os-Ql6g^T@~U*_*vwOU^YX11xjG4pLOKcQ6rp!So*JlG
zpHsWjWN;{CT%FeJr+SUFSf7x5Y$Hgl@yUXavm|-8P8!R)Ktd}VjNc5|+MdA^H7>}R
zWpyYQEp9D(iXIuBrGJ%0UV{84E7jc^YQRlFPG(cvKH(B)FckT1wJfeUV%V#7xs3?p
z!@2F*2!ikpF-X}P^B#pZ6ps4x#H&M#iR*mtSWan0Mn!3|YF9-??yO0$O|^ZQo`xKl
d;llDM>8QJxp`)}5Z|-22yB*Ay-Ca=u;9raMP3Qms
From 080f2a2100d2f42d9913a7bc85b6efb7d8e5f5b3 Mon Sep 17 00:00:00 2001
From: david-leifker <114954101+david-leifker@users.noreply.github.com>
Date: Fri, 9 Aug 2024 11:40:03 -0500
Subject: [PATCH 17/72] feat(mcl-processor): Update mcl processor hooks
(#11134)
---
docs/how/kafka-config.md | 21 +++
.../kafka/MaeConsumerApplication.java | 7 +-
.../metadata/kafka/MCLKafkaListener.java | 103 +++++++++++++
.../kafka/MCLKafkaListenerRegistrar.java | 120 +++++++++++++++
.../kafka/MetadataChangeLogProcessor.java | 140 ------------------
.../kafka/hook/MetadataChangeLogHook.java | 8 +
.../kafka/hook/UpdateIndicesHook.java | 17 ++-
.../event/EntityChangeEventGeneratorHook.java | 34 +++--
.../kafka/hook/form/FormAssignmentHook.java | 26 +++-
.../hook/incident/IncidentsSummaryHook.java | 45 ++++--
.../ingestion/IngestionSchedulerHook.java | 30 ++--
.../hook/siblings/SiblingAssociationHook.java | 20 ++-
.../kafka/hook/spring/MCLGMSSpringTest.java | 16 +-
.../kafka/hook/spring/MCLMAESpringTest.java | 16 +-
.../MCLSpringCommonTestConfiguration.java | 9 +-
.../datahub/event/PlatformEventProcessor.java | 9 +-
.../src/main/resources/application.yaml | 10 ++
.../kafka/KafkaEventConsumerFactory.java | 2 +-
.../linkedin/gms/CommonApplicationConfig.java | 5 +-
19 files changed, 421 insertions(+), 217 deletions(-)
create mode 100644 metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MCLKafkaListener.java
create mode 100644 metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MCLKafkaListenerRegistrar.java
delete mode 100644 metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeLogProcessor.java
diff --git a/docs/how/kafka-config.md b/docs/how/kafka-config.md
index 2f20e8b548f83..06c7418f16713 100644
--- a/docs/how/kafka-config.md
+++ b/docs/how/kafka-config.md
@@ -116,6 +116,27 @@ We've included an environment variable to customize the consumer group id, if yo
- `KAFKA_CONSUMER_GROUP_ID`: The name of the kafka consumer's group id.
+#### datahub-mae-consumer MCL Hooks
+
+By default, all MetadataChangeLog processing hooks execute as part of the same kafka consumer group based on the
+previously mentioned `KAFKA_CONSUMER_GROUP_ID`.
+
+The various MCL Hooks could alsp be separated into separate groups which allows for controlling parallelization and
+prioritization of the hooks.
+
+For example, the `UpdateIndicesHook` and `SiblingsHook` processing can be delayed by other hooks. Separating these
+hooks into their own group can reduce latency from these other hooks. The `application.yaml` configuration
+includes options for assigning a suffix to the consumer group, see `consumerGroupSuffix`.
+
+| Environment Variable | Default | Description |
+|------------------------------------------------|---------|---------------------------------------------------------------------------------------------|
+| SIBLINGS_HOOK_CONSUMER_GROUP_SUFFIX | '' | Siblings processing hook. Considered one of the primary hooks in the `datahub-mae-consumer` |
+| UPDATE_INDICES_CONSUMER_GROUP_SUFFIX | '' | Primary processing hook. |
+| INGESTION_SCHEDULER_HOOK_CONSUMER_GROUP_SUFFIX | '' | Scheduled ingestion hook. |
+| INCIDENTS_HOOK_CONSUMER_GROUP_SUFFIX | '' | Incidents hook. |
+| ECE_CONSUMER_GROUP_SUFFIX | '' | Entity Change Event hook which publishes to the Platform Events topic. |
+| FORMS_HOOK_CONSUMER_GROUP_SUFFIX | '' | Forms processing. |
+
## Applying Configurations
### Docker
diff --git a/metadata-jobs/mae-consumer-job/src/main/java/com/linkedin/metadata/kafka/MaeConsumerApplication.java b/metadata-jobs/mae-consumer-job/src/main/java/com/linkedin/metadata/kafka/MaeConsumerApplication.java
index f6533a6ac1d8a..617bc8e0b7303 100644
--- a/metadata-jobs/mae-consumer-job/src/main/java/com/linkedin/metadata/kafka/MaeConsumerApplication.java
+++ b/metadata-jobs/mae-consumer-job/src/main/java/com/linkedin/metadata/kafka/MaeConsumerApplication.java
@@ -18,8 +18,6 @@
"com.linkedin.metadata.service",
"com.datahub.event",
"com.linkedin.gms.factory.kafka",
- "com.linkedin.gms.factory.kafka.common",
- "com.linkedin.gms.factory.kafka.schemaregistry",
"com.linkedin.metadata.boot.kafka",
"com.linkedin.metadata.kafka",
"com.linkedin.metadata.dao.producer",
@@ -34,7 +32,10 @@
"com.linkedin.gms.factory.context",
"com.linkedin.gms.factory.timeseries",
"com.linkedin.gms.factory.assertion",
- "com.linkedin.gms.factory.plugins"
+ "com.linkedin.gms.factory.plugins",
+ "com.linkedin.gms.factory.change",
+ "com.datahub.event.hook",
+ "com.linkedin.gms.factory.notifications"
},
excludeFilters = {
@ComponentScan.Filter(
diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MCLKafkaListener.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MCLKafkaListener.java
new file mode 100644
index 0000000000000..70b452722abc7
--- /dev/null
+++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MCLKafkaListener.java
@@ -0,0 +1,103 @@
+package com.linkedin.metadata.kafka;
+
+import com.codahale.metrics.Histogram;
+import com.codahale.metrics.MetricRegistry;
+import com.codahale.metrics.Timer;
+import com.linkedin.metadata.EventUtils;
+import com.linkedin.metadata.kafka.hook.MetadataChangeLogHook;
+import com.linkedin.metadata.utils.metrics.MetricUtils;
+import com.linkedin.mxe.MetadataChangeLog;
+import io.datahubproject.metadata.context.OperationContext;
+import java.util.List;
+import java.util.stream.Collectors;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+
+@Slf4j
+public class MCLKafkaListener {
+ private static final Histogram kafkaLagStats =
+ MetricUtils.get()
+ .histogram(
+ MetricRegistry.name(
+ "com.linkedin.metadata.kafka.MetadataChangeLogProcessor", "kafkaLag"));
+
+ private final String consumerGroupId;
+ private final List hooks;
+
+ public MCLKafkaListener(
+ OperationContext systemOperationContext,
+ String consumerGroup,
+ List hooks) {
+ this.consumerGroupId = consumerGroup;
+ this.hooks = hooks;
+ this.hooks.forEach(hook -> hook.init(systemOperationContext));
+
+ log.info(
+ "Enabled MCL Hooks - Group: {} Hooks: {}",
+ consumerGroup,
+ hooks.stream().map(hook -> hook.getClass().getSimpleName()).collect(Collectors.toList()));
+ }
+
+ public void consume(final ConsumerRecord consumerRecord) {
+ try (Timer.Context i = MetricUtils.timer(this.getClass(), "consume").time()) {
+ kafkaLagStats.update(System.currentTimeMillis() - consumerRecord.timestamp());
+ final GenericRecord record = consumerRecord.value();
+ log.debug(
+ "Got MCL event consumer: {} key: {}, topic: {}, partition: {}, offset: {}, value size: {}, timestamp: {}",
+ consumerGroupId,
+ consumerRecord.key(),
+ consumerRecord.topic(),
+ consumerRecord.partition(),
+ consumerRecord.offset(),
+ consumerRecord.serializedValueSize(),
+ consumerRecord.timestamp());
+ MetricUtils.counter(this.getClass(), consumerGroupId + "_received_mcl_count").inc();
+
+ MetadataChangeLog event;
+ try {
+ event = EventUtils.avroToPegasusMCL(record);
+ } catch (Exception e) {
+ MetricUtils.counter(
+ this.getClass(), consumerGroupId + "_avro_to_pegasus_conversion_failure")
+ .inc();
+ log.error("Error deserializing message due to: ", e);
+ log.error("Message: {}", record.toString());
+ return;
+ }
+
+ log.info(
+ "Invoking MCL hooks for consumer: {} urn: {}, aspect name: {}, entity type: {}, change type: {}",
+ consumerGroupId,
+ event.getEntityUrn(),
+ event.hasAspectName() ? event.getAspectName() : null,
+ event.hasEntityType() ? event.getEntityType() : null,
+ event.hasChangeType() ? event.getChangeType() : null);
+
+ // Here - plug in additional "custom processor hooks"
+ for (MetadataChangeLogHook hook : this.hooks) {
+ log.info(
+ "Invoking MCL hook {} for urn: {}",
+ hook.getClass().getSimpleName(),
+ event.getEntityUrn());
+ try (Timer.Context ignored =
+ MetricUtils.timer(this.getClass(), hook.getClass().getSimpleName() + "_latency")
+ .time()) {
+ hook.invoke(event);
+ } catch (Exception e) {
+ // Just skip this hook and continue. - Note that this represents "at most once"//
+ // processing.
+ MetricUtils.counter(this.getClass(), hook.getClass().getSimpleName() + "_failure").inc();
+ log.error(
+ "Failed to execute MCL hook with name {}", hook.getClass().getCanonicalName(), e);
+ }
+ }
+ // TODO: Manually commit kafka offsets after full processing.
+ MetricUtils.counter(this.getClass(), consumerGroupId + "_consumed_mcl_count").inc();
+ log.info(
+ "Successfully completed MCL hooks for consumer: {} urn: {}",
+ consumerGroupId,
+ event.getEntityUrn());
+ }
+ }
+}
diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MCLKafkaListenerRegistrar.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MCLKafkaListenerRegistrar.java
new file mode 100644
index 0000000000000..fb2880f617d30
--- /dev/null
+++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MCLKafkaListenerRegistrar.java
@@ -0,0 +1,120 @@
+package com.linkedin.metadata.kafka;
+
+import com.linkedin.metadata.kafka.config.MetadataChangeLogProcessorCondition;
+import com.linkedin.metadata.kafka.hook.MetadataChangeLogHook;
+import com.linkedin.mxe.Topics;
+import io.datahubproject.metadata.context.OperationContext;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+import javax.annotation.Nonnull;
+import lombok.SneakyThrows;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.springframework.beans.factory.InitializingBean;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Qualifier;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.context.annotation.Conditional;
+import org.springframework.kafka.annotation.EnableKafka;
+import org.springframework.kafka.config.KafkaListenerContainerFactory;
+import org.springframework.kafka.config.KafkaListenerEndpoint;
+import org.springframework.kafka.config.KafkaListenerEndpointRegistry;
+import org.springframework.kafka.config.MethodKafkaListenerEndpoint;
+import org.springframework.messaging.handler.annotation.support.DefaultMessageHandlerMethodFactory;
+import org.springframework.stereotype.Component;
+
+@Slf4j
+@EnableKafka
+@Component
+@Conditional(MetadataChangeLogProcessorCondition.class)
+public class MCLKafkaListenerRegistrar implements InitializingBean {
+
+ @Autowired
+ @Qualifier("systemOperationContext")
+ private OperationContext systemOperationContext;
+
+ @Autowired private KafkaListenerEndpointRegistry kafkaListenerEndpointRegistry;
+
+ @Autowired
+ @Qualifier("kafkaEventConsumer")
+ private KafkaListenerContainerFactory> kafkaListenerContainerFactory;
+
+ @Value("${METADATA_CHANGE_LOG_KAFKA_CONSUMER_GROUP_ID:generic-mae-consumer-job-client}")
+ private String consumerGroupBase;
+
+ @Value("${METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME:" + Topics.METADATA_CHANGE_LOG_VERSIONED + "}")
+ private String mclVersionedTopicName;
+
+ @Value(
+ "${METADATA_CHANGE_LOG_TIMESERIES_TOPIC_NAME:" + Topics.METADATA_CHANGE_LOG_TIMESERIES + "}")
+ private String mclTimeseriesTopicName;
+
+ @Autowired private List metadataChangeLogHooks;
+
+ @Override
+ public void afterPropertiesSet() {
+ Map> hookGroups =
+ getMetadataChangeLogHooks().stream()
+ .collect(Collectors.groupingBy(MetadataChangeLogHook::getConsumerGroupSuffix));
+
+ log.info(
+ "MetadataChangeLogProcessor Consumer Groups: {}",
+ hookGroups.keySet().stream().map(this::buildConsumerGroupName).collect(Collectors.toSet()));
+
+ hookGroups.forEach(
+ (key, hooks) -> {
+ KafkaListenerEndpoint kafkaListenerEndpoint =
+ createListenerEndpoint(
+ buildConsumerGroupName(key),
+ List.of(mclVersionedTopicName, mclTimeseriesTopicName),
+ hooks);
+ registerMCLKafkaListener(kafkaListenerEndpoint, true);
+ });
+ }
+
+ public List getMetadataChangeLogHooks() {
+ return metadataChangeLogHooks.stream()
+ .filter(MetadataChangeLogHook::isEnabled)
+ .sorted(Comparator.comparing(MetadataChangeLogHook::executionOrder))
+ .toList();
+ }
+
+ @SneakyThrows
+ public void registerMCLKafkaListener(
+ KafkaListenerEndpoint kafkaListenerEndpoint, boolean startImmediately) {
+ kafkaListenerEndpointRegistry.registerListenerContainer(
+ kafkaListenerEndpoint, kafkaListenerContainerFactory, startImmediately);
+ }
+
+ private KafkaListenerEndpoint createListenerEndpoint(
+ String consumerGroupId, List topics, List hooks) {
+ MethodKafkaListenerEndpoint kafkaListenerEndpoint =
+ new MethodKafkaListenerEndpoint<>();
+ kafkaListenerEndpoint.setId(consumerGroupId);
+ kafkaListenerEndpoint.setGroupId(consumerGroupId);
+ kafkaListenerEndpoint.setAutoStartup(true);
+ kafkaListenerEndpoint.setTopics(topics.toArray(new String[topics.size()]));
+ kafkaListenerEndpoint.setMessageHandlerMethodFactory(new DefaultMessageHandlerMethodFactory());
+ kafkaListenerEndpoint.setBean(
+ new MCLKafkaListener(systemOperationContext, consumerGroupId, hooks));
+ try {
+ kafkaListenerEndpoint.setMethod(
+ MCLKafkaListener.class.getMethod("consume", ConsumerRecord.class));
+ } catch (NoSuchMethodException e) {
+ throw new RuntimeException(e);
+ }
+
+ return kafkaListenerEndpoint;
+ }
+
+ private String buildConsumerGroupName(@Nonnull String suffix) {
+ if (suffix.isEmpty()) {
+ return consumerGroupBase;
+ } else {
+ return String.join("-", consumerGroupBase, suffix);
+ }
+ }
+}
diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeLogProcessor.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeLogProcessor.java
deleted file mode 100644
index 6112ad798d73d..0000000000000
--- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeLogProcessor.java
+++ /dev/null
@@ -1,140 +0,0 @@
-package com.linkedin.metadata.kafka;
-
-import com.codahale.metrics.Histogram;
-import com.codahale.metrics.MetricRegistry;
-import com.codahale.metrics.Timer;
-import com.linkedin.gms.factory.kafka.KafkaEventConsumerFactory;
-import com.linkedin.metadata.EventUtils;
-import com.linkedin.metadata.kafka.config.MetadataChangeLogProcessorCondition;
-import com.linkedin.metadata.kafka.hook.MetadataChangeLogHook;
-import com.linkedin.metadata.kafka.hook.UpdateIndicesHook;
-import com.linkedin.metadata.kafka.hook.event.EntityChangeEventGeneratorHook;
-import com.linkedin.metadata.kafka.hook.form.FormAssignmentHook;
-import com.linkedin.metadata.kafka.hook.incident.IncidentsSummaryHook;
-import com.linkedin.metadata.kafka.hook.ingestion.IngestionSchedulerHook;
-import com.linkedin.metadata.kafka.hook.siblings.SiblingAssociationHook;
-import com.linkedin.metadata.utils.metrics.MetricUtils;
-import com.linkedin.mxe.MetadataChangeLog;
-import com.linkedin.mxe.Topics;
-import io.datahubproject.metadata.context.OperationContext;
-import java.util.Comparator;
-import java.util.List;
-import java.util.stream.Collectors;
-import lombok.Getter;
-import lombok.extern.slf4j.Slf4j;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.kafka.clients.consumer.ConsumerRecord;
-import org.springframework.beans.factory.annotation.Autowired;
-import org.springframework.beans.factory.annotation.Qualifier;
-import org.springframework.context.annotation.Conditional;
-import org.springframework.context.annotation.Import;
-import org.springframework.kafka.annotation.EnableKafka;
-import org.springframework.kafka.annotation.KafkaListener;
-import org.springframework.stereotype.Component;
-
-@Slf4j
-@Component
-@Conditional(MetadataChangeLogProcessorCondition.class)
-@Import({
- UpdateIndicesHook.class,
- IngestionSchedulerHook.class,
- EntityChangeEventGeneratorHook.class,
- KafkaEventConsumerFactory.class,
- SiblingAssociationHook.class,
- FormAssignmentHook.class,
- IncidentsSummaryHook.class,
-})
-@EnableKafka
-public class MetadataChangeLogProcessor {
-
- @Getter private final List hooks;
- private final Histogram kafkaLagStats =
- MetricUtils.get().histogram(MetricRegistry.name(this.getClass(), "kafkaLag"));
-
- @Autowired
- public MetadataChangeLogProcessor(
- @Qualifier("systemOperationContext") OperationContext systemOperationContext,
- List metadataChangeLogHooks) {
- this.hooks =
- metadataChangeLogHooks.stream()
- .filter(MetadataChangeLogHook::isEnabled)
- .sorted(Comparator.comparing(MetadataChangeLogHook::executionOrder))
- .collect(Collectors.toList());
- log.info(
- "Enabled hooks: {}",
- this.hooks.stream()
- .map(hook -> hook.getClass().getSimpleName())
- .collect(Collectors.toList()));
- this.hooks.forEach(hook -> hook.init(systemOperationContext));
- }
-
- @KafkaListener(
- id = "${METADATA_CHANGE_LOG_KAFKA_CONSUMER_GROUP_ID:generic-mae-consumer-job-client}",
- topics = {
- "${METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME:" + Topics.METADATA_CHANGE_LOG_VERSIONED + "}",
- "${METADATA_CHANGE_LOG_TIMESERIES_TOPIC_NAME:" + Topics.METADATA_CHANGE_LOG_TIMESERIES + "}"
- },
- containerFactory = "kafkaEventConsumer")
- public void consume(final ConsumerRecord consumerRecord) {
- try (Timer.Context i = MetricUtils.timer(this.getClass(), "consume").time()) {
- kafkaLagStats.update(System.currentTimeMillis() - consumerRecord.timestamp());
- final GenericRecord record = consumerRecord.value();
- log.info(
- "Got MCL event key: {}, topic: {}, partition: {}, offset: {}, value size: {}, timestamp: {}",
- consumerRecord.key(),
- consumerRecord.topic(),
- consumerRecord.partition(),
- consumerRecord.offset(),
- consumerRecord.serializedValueSize(),
- consumerRecord.timestamp());
- MetricUtils.counter(this.getClass(), "received_mcl_count").inc();
-
- MetadataChangeLog event;
- try {
- event = EventUtils.avroToPegasusMCL(record);
- log.debug(
- "Successfully converted Avro MCL to Pegasus MCL. urn: {}, key: {}",
- event.getEntityUrn(),
- event.getEntityKeyAspect());
- } catch (Exception e) {
- MetricUtils.counter(this.getClass(), "avro_to_pegasus_conversion_failure").inc();
- log.error("Error deserializing message due to: ", e);
- log.error("Message: {}", record.toString());
- return;
- }
-
- log.info(
- "Invoking MCL hooks for urn: {}, aspect name: {}, entity type: {}, change type: {}",
- event.getEntityUrn(),
- event.hasAspectName() ? event.getAspectName() : null,
- event.hasEntityType() ? event.getEntityType() : null,
- event.hasChangeType() ? event.getChangeType() : null);
-
- // Here - plug in additional "custom processor hooks"
- for (MetadataChangeLogHook hook : this.hooks) {
- if (!hook.isEnabled()) {
- log.info(String.format("Skipping disabled hook %s", hook.getClass()));
- continue;
- }
- log.info(
- "Invoking MCL hook {} for urn: {}",
- hook.getClass().getSimpleName(),
- event.getEntityUrn());
- try (Timer.Context ignored =
- MetricUtils.timer(this.getClass(), hook.getClass().getSimpleName() + "_latency")
- .time()) {
- hook.invoke(event);
- } catch (Exception e) {
- // Just skip this hook and continue. - Note that this represents "at most once"//
- // processing.
- MetricUtils.counter(this.getClass(), hook.getClass().getSimpleName() + "_failure").inc();
- log.error(
- "Failed to execute MCL hook with name {}", hook.getClass().getCanonicalName(), e);
- }
- }
- // TODO: Manually commit kafka offsets after full processing.
- MetricUtils.counter(this.getClass(), "consumed_mcl_count").inc();
- log.info("Successfully completed MCL hooks for urn: {}", event.getEntityUrn());
- }
- }
-}
diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/MetadataChangeLogHook.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/MetadataChangeLogHook.java
index 145d1ded724cc..06a184c9f89f9 100644
--- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/MetadataChangeLogHook.java
+++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/MetadataChangeLogHook.java
@@ -18,6 +18,14 @@ default MetadataChangeLogHook init(@Nonnull OperationContext systemOperationCont
return this;
}
+ /**
+ * Suffix for the consumer group
+ *
+ * @return suffix
+ */
+ @Nonnull
+ String getConsumerGroupSuffix();
+
/**
* Return whether the hook is enabled or not. If not enabled, the below invoke method is not
* triggered
diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHook.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHook.java
index a0e304b26ea60..bd804b0f4424c 100644
--- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHook.java
+++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHook.java
@@ -2,6 +2,7 @@
import static com.linkedin.metadata.Constants.*;
+import com.google.common.annotations.VisibleForTesting;
import com.linkedin.gms.factory.common.GraphServiceFactory;
import com.linkedin.gms.factory.common.SystemMetadataServiceFactory;
import com.linkedin.gms.factory.entityregistry.EntityRegistryFactory;
@@ -12,7 +13,9 @@
import com.linkedin.mxe.MetadataChangeLog;
import io.datahubproject.metadata.context.OperationContext;
import javax.annotation.Nonnull;
+import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
+import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Import;
import org.springframework.stereotype.Component;
@@ -34,15 +37,27 @@ public class UpdateIndicesHook implements MetadataChangeLogHook {
private final boolean isEnabled;
private final boolean reprocessUIEvents;
private OperationContext systemOperationContext;
+ @Getter private final String consumerGroupSuffix;
+ @Autowired
public UpdateIndicesHook(
UpdateIndicesService updateIndicesService,
@Nonnull @Value("${updateIndices.enabled:true}") Boolean isEnabled,
@Nonnull @Value("${featureFlags.preProcessHooks.reprocessEnabled:false}")
- Boolean reprocessUIEvents) {
+ Boolean reprocessUIEvents,
+ @Nonnull @Value("${updateIndices.consumerGroupSuffix}") String consumerGroupSuffix) {
this.updateIndicesService = updateIndicesService;
this.isEnabled = isEnabled;
this.reprocessUIEvents = reprocessUIEvents;
+ this.consumerGroupSuffix = consumerGroupSuffix;
+ }
+
+ @VisibleForTesting
+ public UpdateIndicesHook(
+ UpdateIndicesService updateIndicesService,
+ @Nonnull Boolean isEnabled,
+ @Nonnull Boolean reprocessUIEvents) {
+ this(updateIndicesService, isEnabled, reprocessUIEvents, "");
}
@Override
diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHook.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHook.java
index 8dc98d77233ce..59d068a46d8c6 100644
--- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHook.java
+++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHook.java
@@ -1,5 +1,6 @@
package com.linkedin.metadata.kafka.hook.event;
+import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableSet;
import com.linkedin.common.AuditStamp;
import com.linkedin.common.urn.Urn;
@@ -29,6 +30,7 @@
import java.util.stream.Collectors;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
+import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
@@ -78,10 +80,11 @@ public class EntityChangeEventGeneratorHook implements MetadataChangeLogHook {
private static final Set SUPPORTED_OPERATIONS =
ImmutableSet.of("CREATE", "UPSERT", "DELETE");
- private final EntityChangeEventGeneratorRegistry _entityChangeEventGeneratorRegistry;
+ private final EntityChangeEventGeneratorRegistry entityChangeEventGeneratorRegistry;
private final OperationContext systemOperationContext;
- private final SystemEntityClient _entityClient;
- private final Boolean _isEnabled;
+ private final SystemEntityClient entityClient;
+ private final Boolean isEnabled;
+ @Getter private final String consumerGroupSuffix;
@Autowired
public EntityChangeEventGeneratorHook(
@@ -89,17 +92,28 @@ public EntityChangeEventGeneratorHook(
@Nonnull @Qualifier("entityChangeEventGeneratorRegistry")
final EntityChangeEventGeneratorRegistry entityChangeEventGeneratorRegistry,
@Nonnull final SystemEntityClient entityClient,
- @Nonnull @Value("${entityChangeEvents.enabled:true}") Boolean isEnabled) {
+ @Nonnull @Value("${entityChangeEvents.enabled:true}") Boolean isEnabled,
+ @Nonnull @Value("${entityChangeEvents.consumerGroupSuffix}") String consumerGroupSuffix) {
this.systemOperationContext = systemOperationContext;
- _entityChangeEventGeneratorRegistry =
+ this.entityChangeEventGeneratorRegistry =
Objects.requireNonNull(entityChangeEventGeneratorRegistry);
- _entityClient = Objects.requireNonNull(entityClient);
- _isEnabled = isEnabled;
+ this.entityClient = Objects.requireNonNull(entityClient);
+ this.isEnabled = isEnabled;
+ this.consumerGroupSuffix = consumerGroupSuffix;
+ }
+
+ @VisibleForTesting
+ public EntityChangeEventGeneratorHook(
+ @Nonnull OperationContext systemOperationContext,
+ @Nonnull final EntityChangeEventGeneratorRegistry entityChangeEventGeneratorRegistry,
+ @Nonnull final SystemEntityClient entityClient,
+ @Nonnull Boolean isEnabled) {
+ this(systemOperationContext, entityChangeEventGeneratorRegistry, entityClient, isEnabled, "");
}
@Override
public boolean isEnabled() {
- return _isEnabled;
+ return isEnabled;
}
@Override
@@ -166,7 +180,7 @@ private List generateChangeEvents(
@Nonnull final Aspect to,
@Nonnull AuditStamp auditStamp) {
final List> entityChangeEventGenerators =
- _entityChangeEventGeneratorRegistry.getEntityChangeEventGenerators(aspectName).stream()
+ entityChangeEventGeneratorRegistry.getEntityChangeEventGenerators(aspectName).stream()
// Note: Assumes that correct types have been registered for the aspect.
.map(changeEventGenerator -> (EntityChangeEventGenerator) changeEventGenerator)
.collect(Collectors.toList());
@@ -186,7 +200,7 @@ private boolean isEligibleForProcessing(final MetadataChangeLog log) {
private void emitPlatformEvent(
@Nonnull final PlatformEvent event, @Nonnull final String partitioningKey) throws Exception {
- _entityClient.producePlatformEvent(
+ entityClient.producePlatformEvent(
systemOperationContext, Constants.CHANGE_EVENT_PLATFORM_EVENT_NAME, partitioningKey, event);
}
diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/form/FormAssignmentHook.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/form/FormAssignmentHook.java
index 8d093fe0b8a12..063fa6de92c83 100644
--- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/form/FormAssignmentHook.java
+++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/form/FormAssignmentHook.java
@@ -2,6 +2,7 @@
import static com.linkedin.metadata.Constants.*;
+import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableSet;
import com.linkedin.events.metadata.ChangeType;
import com.linkedin.form.DynamicFormAssignment;
@@ -15,6 +16,7 @@
import java.util.Objects;
import java.util.Set;
import javax.annotation.Nonnull;
+import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
@@ -53,17 +55,25 @@ public class FormAssignmentHook implements MetadataChangeLogHook {
ImmutableSet.of(
ChangeType.UPSERT, ChangeType.CREATE, ChangeType.CREATE_ENTITY, ChangeType.RESTATE);
- private final FormService _formService;
- private final boolean _isEnabled;
+ private final FormService formService;
+ private final boolean isEnabled;
private OperationContext systemOperationContext;
+ @Getter private final String consumerGroupSuffix;
@Autowired
public FormAssignmentHook(
@Nonnull final FormService formService,
- @Nonnull @Value("${forms.hook.enabled:true}") Boolean isEnabled) {
- _formService = Objects.requireNonNull(formService, "formService is required");
- _isEnabled = isEnabled;
+ @Nonnull @Value("${forms.hook.enabled:true}") Boolean isEnabled,
+ @Nonnull @Value("${forms.hook.consumerGroupSuffix}") String consumerGroupSuffix) {
+ this.formService = Objects.requireNonNull(formService, "formService is required");
+ this.isEnabled = isEnabled;
+ this.consumerGroupSuffix = consumerGroupSuffix;
+ }
+
+ @VisibleForTesting
+ public FormAssignmentHook(@Nonnull final FormService formService, @Nonnull Boolean isEnabled) {
+ this(formService, isEnabled, "");
}
@Override
@@ -74,12 +84,12 @@ public FormAssignmentHook init(@Nonnull OperationContext systemOperationContext)
@Override
public boolean isEnabled() {
- return _isEnabled;
+ return isEnabled;
}
@Override
public void invoke(@Nonnull final MetadataChangeLog event) {
- if (_isEnabled && isEligibleForProcessing(event)) {
+ if (isEnabled && isEligibleForProcessing(event)) {
if (isFormDynamicFilterUpdated(event)) {
handleFormFilterUpdated(event);
}
@@ -96,7 +106,7 @@ private void handleFormFilterUpdated(@Nonnull final MetadataChangeLog event) {
DynamicFormAssignment.class);
// 2. Register a automation to assign it.
- _formService.upsertFormAssignmentRunner(
+ formService.upsertFormAssignmentRunner(
systemOperationContext, event.getEntityUrn(), formFilters);
}
diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/incident/IncidentsSummaryHook.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/incident/IncidentsSummaryHook.java
index 7c03a11a81f7a..5483fed9116e1 100644
--- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/incident/IncidentsSummaryHook.java
+++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/incident/IncidentsSummaryHook.java
@@ -2,6 +2,7 @@
import static com.linkedin.metadata.Constants.*;
+import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableSet;
import com.linkedin.common.IncidentSummaryDetails;
import com.linkedin.common.IncidentSummaryDetailsArray;
@@ -27,6 +28,7 @@
import java.util.Objects;
import java.util.Set;
import javax.annotation.Nonnull;
+import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
@@ -57,20 +59,31 @@ public class IncidentsSummaryHook implements MetadataChangeLogHook {
ImmutableSet.of(INCIDENT_INFO_ASPECT_NAME, STATUS_ASPECT_NAME);
private OperationContext systemOperationContext;
- private final IncidentService _incidentService;
- private final boolean _isEnabled;
+ private final IncidentService incidentService;
+ private final boolean isEnabled;
+ @Getter private final String consumerGroupSuffix;
/** Max number of incidents to allow in incident summary, limited to prevent HTTP errors */
- private final int _maxIncidentHistory;
+ private final int maxIncidentHistory;
@Autowired
public IncidentsSummaryHook(
@Nonnull final IncidentService incidentService,
- @Nonnull @Value("${incidents.hook.enabled:true}") Boolean isEnabled,
- @Nonnull @Value("${incidents.hook.maxIncidentHistory:100}") Integer maxIncidentHistory) {
- _incidentService = Objects.requireNonNull(incidentService, "incidentService is required");
- _isEnabled = isEnabled;
- _maxIncidentHistory = maxIncidentHistory;
+ @Nonnull @Value("${incidents.hook.enabled}") Boolean isEnabled,
+ @Nonnull @Value("${incidents.hook.maxIncidentHistory}") Integer maxIncidentHistory,
+ @Nonnull @Value("${incidents.hook.consumerGroupSuffix}") String consumerGroupSuffix) {
+ this.incidentService = Objects.requireNonNull(incidentService, "incidentService is required");
+ this.isEnabled = isEnabled;
+ this.maxIncidentHistory = maxIncidentHistory;
+ this.consumerGroupSuffix = consumerGroupSuffix;
+ }
+
+ @VisibleForTesting
+ public IncidentsSummaryHook(
+ @Nonnull final IncidentService incidentService,
+ @Nonnull Boolean isEnabled,
+ @Nonnull Integer maxIncidentHistory) {
+ this(incidentService, isEnabled, maxIncidentHistory, "");
}
@Override
@@ -81,12 +94,12 @@ public IncidentsSummaryHook init(@Nonnull OperationContext systemOperationContex
@Override
public boolean isEnabled() {
- return _isEnabled;
+ return isEnabled;
}
@Override
public void invoke(@Nonnull final MetadataChangeLog event) {
- if (_isEnabled && isEligibleForProcessing(event)) {
+ if (isEnabled && isEligibleForProcessing(event)) {
log.debug("Urn {} received by Incident Summary Hook.", event.getEntityUrn());
final Urn urn = HookUtils.getUrnFromEvent(event, systemOperationContext.getEntityRegistry());
// Handle the deletion case.
@@ -104,7 +117,7 @@ public void invoke(@Nonnull final MetadataChangeLog event) {
private void handleIncidentSoftDeleted(@Nonnull final Urn incidentUrn) {
// 1. Fetch incident info.
IncidentInfo incidentInfo =
- _incidentService.getIncidentInfo(systemOperationContext, incidentUrn);
+ incidentService.getIncidentInfo(systemOperationContext, incidentUrn);
// 2. Retrieve associated urns.
if (incidentInfo != null) {
@@ -127,7 +140,7 @@ private void handleIncidentSoftDeleted(@Nonnull final Urn incidentUrn) {
private void handleIncidentUpdated(@Nonnull final Urn incidentUrn) {
// 1. Fetch incident info + status
IncidentInfo incidentInfo =
- _incidentService.getIncidentInfo(systemOperationContext, incidentUrn);
+ incidentService.getIncidentInfo(systemOperationContext, incidentUrn);
// 2. Retrieve associated urns.
if (incidentInfo != null) {
@@ -179,14 +192,14 @@ private void addIncidentToSummary(
IncidentsSummaryUtils.removeIncidentFromResolvedSummary(incidentUrn, summary);
// Then, add to active.
- IncidentsSummaryUtils.addIncidentToActiveSummary(details, summary, _maxIncidentHistory);
+ IncidentsSummaryUtils.addIncidentToActiveSummary(details, summary, maxIncidentHistory);
} else if (IncidentState.RESOLVED.equals(status.getState())) {
// First, ensure this isn't in any summaries anymore.
IncidentsSummaryUtils.removeIncidentFromActiveSummary(incidentUrn, summary);
// Then, add to resolved.
- IncidentsSummaryUtils.addIncidentToResolvedSummary(details, summary, _maxIncidentHistory);
+ IncidentsSummaryUtils.addIncidentToResolvedSummary(details, summary, maxIncidentHistory);
}
// 3. Emit the change back!
@@ -196,7 +209,7 @@ private void addIncidentToSummary(
@Nonnull
private IncidentsSummary getIncidentsSummary(@Nonnull final Urn entityUrn) {
IncidentsSummary maybeIncidentsSummary =
- _incidentService.getIncidentsSummary(systemOperationContext, entityUrn);
+ incidentService.getIncidentsSummary(systemOperationContext, entityUrn);
return maybeIncidentsSummary == null
? new IncidentsSummary()
.setResolvedIncidentDetails(new IncidentSummaryDetailsArray())
@@ -260,7 +273,7 @@ private boolean isIncidentUpdate(@Nonnull final MetadataChangeLog event) {
private void updateIncidentSummary(
@Nonnull final Urn entityUrn, @Nonnull final IncidentsSummary newSummary) {
try {
- _incidentService.updateIncidentsSummary(systemOperationContext, entityUrn, newSummary);
+ incidentService.updateIncidentsSummary(systemOperationContext, entityUrn, newSummary);
} catch (Exception e) {
log.error(
String.format(
diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/ingestion/IngestionSchedulerHook.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/ingestion/IngestionSchedulerHook.java
index c13f0f75708f7..5569fade7e6eb 100644
--- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/ingestion/IngestionSchedulerHook.java
+++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/ingestion/IngestionSchedulerHook.java
@@ -15,6 +15,7 @@
import com.linkedin.mxe.MetadataChangeLog;
import io.datahubproject.metadata.context.OperationContext;
import javax.annotation.Nonnull;
+import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
@@ -29,27 +30,36 @@
@Component
@Import({EntityRegistryFactory.class, IngestionSchedulerFactory.class})
public class IngestionSchedulerHook implements MetadataChangeLogHook {
- private final IngestionScheduler _scheduler;
- private final boolean _isEnabled;
+ private final IngestionScheduler scheduler;
+ private final boolean isEnabled;
private OperationContext systemOperationContext;
+ @Getter private final String consumerGroupSuffix;
@Autowired
public IngestionSchedulerHook(
@Nonnull final IngestionScheduler scheduler,
- @Nonnull @Value("${ingestionScheduler.enabled:true}") Boolean isEnabled) {
- _scheduler = scheduler;
- _isEnabled = isEnabled;
+ @Nonnull @Value("${ingestionScheduler.enabled:true}") Boolean isEnabled,
+ @Nonnull @Value("${ingestionScheduler.consumerGroupSuffix}") String consumerGroupSuffix) {
+ this.scheduler = scheduler;
+ this.isEnabled = isEnabled;
+ this.consumerGroupSuffix = consumerGroupSuffix;
+ }
+
+ @VisibleForTesting
+ public IngestionSchedulerHook(
+ @Nonnull final IngestionScheduler scheduler, @Nonnull Boolean isEnabled) {
+ this(scheduler, isEnabled, "");
}
@Override
public boolean isEnabled() {
- return _isEnabled;
+ return isEnabled;
}
@Override
public IngestionSchedulerHook init(@Nonnull OperationContext systemOperationContext) {
this.systemOperationContext = systemOperationContext;
- _scheduler.init();
+ scheduler.init();
return this;
}
@@ -66,11 +76,11 @@ public void invoke(@Nonnull MetadataChangeLog event) {
final Urn urn = getUrnFromEvent(event);
if (ChangeType.DELETE.equals(event.getChangeType())) {
- _scheduler.unscheduleNextIngestionSourceExecution(urn);
+ scheduler.unscheduleNextIngestionSourceExecution(urn);
} else {
// Update the scheduler to reflect the latest changes.
final DataHubIngestionSourceInfo info = getInfoFromEvent(event);
- _scheduler.scheduleNextIngestionSourceExecution(urn, info);
+ scheduler.scheduleNextIngestionSourceExecution(urn, info);
}
}
}
@@ -138,6 +148,6 @@ private DataHubIngestionSourceInfo getInfoFromEvent(final MetadataChangeLog even
@VisibleForTesting
IngestionScheduler scheduler() {
- return _scheduler;
+ return scheduler;
}
}
diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java
index f068679da7757..bbe0feed7de11 100644
--- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java
+++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java
@@ -41,6 +41,7 @@
import java.util.List;
import java.util.stream.Collectors;
import javax.annotation.Nonnull;
+import lombok.Getter;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
@@ -70,17 +71,28 @@ public class SiblingAssociationHook implements MetadataChangeLogHook {
private final SystemEntityClient systemEntityClient;
private final EntitySearchService entitySearchService;
- private final boolean _isEnabled;
+ private final boolean isEnabled;
private OperationContext systemOperationContext;
+ @Getter private final String consumerGroupSuffix;
@Autowired
public SiblingAssociationHook(
@Nonnull final SystemEntityClient systemEntityClient,
@Nonnull final EntitySearchService searchService,
- @Nonnull @Value("${siblings.enabled:true}") Boolean isEnabled) {
+ @Nonnull @Value("${siblings.enabled:true}") Boolean isEnabled,
+ @Nonnull @Value("${siblings.consumerGroupSuffix}") String consumerGroupSuffix) {
this.systemEntityClient = systemEntityClient;
entitySearchService = searchService;
- _isEnabled = isEnabled;
+ this.isEnabled = isEnabled;
+ this.consumerGroupSuffix = consumerGroupSuffix;
+ }
+
+ @VisibleForTesting
+ public SiblingAssociationHook(
+ @Nonnull final SystemEntityClient systemEntityClient,
+ @Nonnull final EntitySearchService searchService,
+ @Nonnull Boolean isEnabled) {
+ this(systemEntityClient, searchService, isEnabled, "");
}
@Value("${siblings.enabled:false}")
@@ -99,7 +111,7 @@ public SiblingAssociationHook init(@Nonnull OperationContext systemOperationCont
@Override
public boolean isEnabled() {
- return _isEnabled;
+ return isEnabled;
}
@Override
diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLGMSSpringTest.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLGMSSpringTest.java
index c2a8de161eafe..10f149e606295 100644
--- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLGMSSpringTest.java
+++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLGMSSpringTest.java
@@ -3,7 +3,7 @@
import static org.testng.AssertJUnit.*;
import com.linkedin.gms.factory.config.ConfigurationProvider;
-import com.linkedin.metadata.kafka.MetadataChangeLogProcessor;
+import com.linkedin.metadata.kafka.MCLKafkaListenerRegistrar;
import com.linkedin.metadata.kafka.hook.UpdateIndicesHook;
import com.linkedin.metadata.kafka.hook.event.EntityChangeEventGeneratorHook;
import com.linkedin.metadata.kafka.hook.incident.IncidentsSummaryHook;
@@ -35,23 +35,23 @@ public class MCLGMSSpringTest extends AbstractTestNGSpringContextTests {
@Test
public void testHooks() {
- MetadataChangeLogProcessor metadataChangeLogProcessor =
- applicationContext.getBean(MetadataChangeLogProcessor.class);
+ MCLKafkaListenerRegistrar registrar =
+ applicationContext.getBean(MCLKafkaListenerRegistrar.class);
assertTrue(
- metadataChangeLogProcessor.getHooks().stream()
+ registrar.getMetadataChangeLogHooks().stream()
.noneMatch(hook -> hook instanceof IngestionSchedulerHook));
assertTrue(
- metadataChangeLogProcessor.getHooks().stream()
+ registrar.getMetadataChangeLogHooks().stream()
.anyMatch(hook -> hook instanceof UpdateIndicesHook));
assertTrue(
- metadataChangeLogProcessor.getHooks().stream()
+ registrar.getMetadataChangeLogHooks().stream()
.anyMatch(hook -> hook instanceof SiblingAssociationHook));
assertTrue(
- metadataChangeLogProcessor.getHooks().stream()
+ registrar.getMetadataChangeLogHooks().stream()
.anyMatch(hook -> hook instanceof EntityChangeEventGeneratorHook));
assertEquals(
1,
- metadataChangeLogProcessor.getHooks().stream()
+ registrar.getMetadataChangeLogHooks().stream()
.filter(hook -> hook instanceof IncidentsSummaryHook)
.count());
}
diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLMAESpringTest.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLMAESpringTest.java
index 23de7707cc571..2049e974999b1 100644
--- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLMAESpringTest.java
+++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLMAESpringTest.java
@@ -4,7 +4,7 @@
import static org.testng.AssertJUnit.assertTrue;
import com.linkedin.gms.factory.config.ConfigurationProvider;
-import com.linkedin.metadata.kafka.MetadataChangeLogProcessor;
+import com.linkedin.metadata.kafka.MCLKafkaListenerRegistrar;
import com.linkedin.metadata.kafka.hook.UpdateIndicesHook;
import com.linkedin.metadata.kafka.hook.event.EntityChangeEventGeneratorHook;
import com.linkedin.metadata.kafka.hook.incident.IncidentsSummaryHook;
@@ -33,23 +33,23 @@ public class MCLMAESpringTest extends AbstractTestNGSpringContextTests {
@Test
public void testHooks() {
- MetadataChangeLogProcessor metadataChangeLogProcessor =
- applicationContext.getBean(MetadataChangeLogProcessor.class);
+ MCLKafkaListenerRegistrar registrar =
+ applicationContext.getBean(MCLKafkaListenerRegistrar.class);
assertTrue(
- metadataChangeLogProcessor.getHooks().stream()
+ registrar.getMetadataChangeLogHooks().stream()
.noneMatch(hook -> hook instanceof IngestionSchedulerHook));
assertTrue(
- metadataChangeLogProcessor.getHooks().stream()
+ registrar.getMetadataChangeLogHooks().stream()
.anyMatch(hook -> hook instanceof UpdateIndicesHook));
assertTrue(
- metadataChangeLogProcessor.getHooks().stream()
+ registrar.getMetadataChangeLogHooks().stream()
.anyMatch(hook -> hook instanceof SiblingAssociationHook));
assertTrue(
- metadataChangeLogProcessor.getHooks().stream()
+ registrar.getMetadataChangeLogHooks().stream()
.anyMatch(hook -> hook instanceof EntityChangeEventGeneratorHook));
assertEquals(
1,
- metadataChangeLogProcessor.getHooks().stream()
+ registrar.getMetadataChangeLogHooks().stream()
.filter(hook -> hook instanceof IncidentsSummaryHook)
.count());
}
diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringCommonTestConfiguration.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringCommonTestConfiguration.java
index f6f71a12a6951..68768051eccad 100644
--- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringCommonTestConfiguration.java
+++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringCommonTestConfiguration.java
@@ -34,10 +34,13 @@
@ComponentScan(
basePackages = {
"com.linkedin.metadata.kafka",
- "com.linkedin.gms.factory.kafka.common",
- "com.linkedin.gms.factory.kafka.schemaregistry",
+ "com.linkedin.gms.factory.kafka",
"com.linkedin.gms.factory.entity.update.indices",
- "com.linkedin.gms.factory.timeline.eventgenerator"
+ "com.linkedin.gms.factory.timeline.eventgenerator",
+ "com.linkedin.metadata.dao.producer",
+ "com.linkedin.gms.factory.change",
+ "com.datahub.event.hook",
+ "com.linkedin.gms.factory.notifications"
})
public class MCLSpringCommonTestConfiguration {
diff --git a/metadata-jobs/pe-consumer/src/main/java/com/datahub/event/PlatformEventProcessor.java b/metadata-jobs/pe-consumer/src/main/java/com/datahub/event/PlatformEventProcessor.java
index c4116b314254c..358a2ac0c2ee3 100644
--- a/metadata-jobs/pe-consumer/src/main/java/com/datahub/event/PlatformEventProcessor.java
+++ b/metadata-jobs/pe-consumer/src/main/java/com/datahub/event/PlatformEventProcessor.java
@@ -3,9 +3,7 @@
import com.codahale.metrics.Histogram;
import com.codahale.metrics.MetricRegistry;
import com.codahale.metrics.Timer;
-import com.datahub.event.hook.BusinessAttributeUpdateHook;
import com.datahub.event.hook.PlatformEventHook;
-import com.linkedin.gms.factory.kafka.KafkaEventConsumerFactory;
import com.linkedin.metadata.EventUtils;
import com.linkedin.metadata.utils.metrics.MetricUtils;
import com.linkedin.mxe.PlatformEvent;
@@ -21,7 +19,6 @@
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.context.annotation.Conditional;
-import org.springframework.context.annotation.Import;
import org.springframework.kafka.annotation.EnableKafka;
import org.springframework.kafka.annotation.KafkaListener;
import org.springframework.stereotype.Component;
@@ -29,7 +26,6 @@
@Slf4j
@Component
@Conditional(PlatformEventProcessorCondition.class)
-@Import({BusinessAttributeUpdateHook.class, KafkaEventConsumerFactory.class})
@EnableKafka
public class PlatformEventProcessor {
@@ -49,6 +45,11 @@ public PlatformEventProcessor(
platformEventHooks.stream()
.filter(PlatformEventHook::isEnabled)
.collect(Collectors.toList());
+ log.info(
+ "Enabled platform hooks: {}",
+ this.hooks.stream()
+ .map(hook -> hook.getClass().getSimpleName())
+ .collect(Collectors.toList()));
this.hooks.forEach(PlatformEventHook::init);
}
diff --git a/metadata-service/configuration/src/main/resources/application.yaml b/metadata-service/configuration/src/main/resources/application.yaml
index 2514060ff2d61..5b3673ddca52c 100644
--- a/metadata-service/configuration/src/main/resources/application.yaml
+++ b/metadata-service/configuration/src/main/resources/application.yaml
@@ -296,10 +296,18 @@ metadataTests:
siblings:
enabled: ${ENABLE_SIBLING_HOOK:true} # enable to turn on automatic sibling associations for dbt
+ consumerGroupSuffix: ${SIBLINGS_HOOK_CONSUMER_GROUP_SUFFIX:}
updateIndices:
enabled: ${ENABLE_UPDATE_INDICES_HOOK:true}
+ consumerGroupSuffix: ${UPDATE_INDICES_CONSUMER_GROUP_SUFFIX:}
ingestionScheduler:
enabled: ${ENABLE_INGESTION_SCHEDULER_HOOK:true} # enable to execute ingestion scheduling
+ consumerGroupSuffix: ${INGESTION_SCHEDULER_HOOK_CONSUMER_GROUP_SUFFIX:}
+incidents:
+ hook:
+ enabled: ${ENABLE_INCIDENTS_HOOK:true}
+ maxIncidentHistory: ${MAX_INCIDENT_HISTORY:100}
+ consumerGroupSuffix: ${INCIDENTS_HOOK_CONSUMER_GROUP_SUFFIX:}
bootstrap:
upgradeDefaultBrowsePaths:
@@ -376,6 +384,7 @@ featureFlags:
entityChangeEvents:
enabled: ${ENABLE_ENTITY_CHANGE_EVENTS_HOOK:true}
+ consumerGroupSuffix: ${ECE_CONSUMER_GROUP_SUFFIX:}
views:
enabled: ${VIEWS_ENABLED:true}
@@ -460,6 +469,7 @@ springdoc.api-docs.groups.enabled: true
forms:
hook:
enabled: { $FORMS_HOOK_ENABLED:true }
+ consumerGroupSuffix: ${FORMS_HOOK_CONSUMER_GROUP_SUFFIX:}
businessAttribute:
fetchRelatedEntitiesCount: ${BUSINESS_ATTRIBUTE_RELATED_ENTITIES_COUNT:20000}
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/KafkaEventConsumerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/KafkaEventConsumerFactory.java
index 9501b03482d04..aecb4f0afb12c 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/KafkaEventConsumerFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/KafkaEventConsumerFactory.java
@@ -96,7 +96,7 @@ private static Map buildCustomizedProperties(
}
@Bean(name = "kafkaEventConsumer")
- protected KafkaListenerContainerFactory> createInstance(
+ protected KafkaListenerContainerFactory> kafkaEventConsumer(
@Qualifier("kafkaConsumerFactory")
DefaultKafkaConsumerFactory kafkaConsumerFactory,
@Qualifier("configurationProvider") ConfigurationProvider configurationProvider) {
diff --git a/metadata-service/war/src/main/java/com/linkedin/gms/CommonApplicationConfig.java b/metadata-service/war/src/main/java/com/linkedin/gms/CommonApplicationConfig.java
index bc623c3cc983c..e47a2b4e278e4 100644
--- a/metadata-service/war/src/main/java/com/linkedin/gms/CommonApplicationConfig.java
+++ b/metadata-service/war/src/main/java/com/linkedin/gms/CommonApplicationConfig.java
@@ -37,7 +37,10 @@
"com.linkedin.gms.factory.search",
"com.linkedin.gms.factory.secret",
"com.linkedin.gms.factory.timeseries",
- "com.linkedin.gms.factory.plugins"
+ "com.linkedin.gms.factory.plugins",
+ "com.linkedin.gms.factory.change",
+ "com.datahub.event.hook",
+ "com.linkedin.gms.factory.notifications"
})
@PropertySource(value = "classpath:/application.yaml", factory = YamlPropertySourceFactory.class)
@Configuration
From 573c1cb8407c2a5d152e5abb6b7d9f012eea75cb Mon Sep 17 00:00:00 2001
From: David Leifker
Date: Fri, 9 Aug 2024 12:38:44 -0500
Subject: [PATCH 18/72] fix(openapi): fix openapi v2 endpoints & v3
documentation update
---
docs/api/tutorials/structured-properties.md | 204 ++++++++++--------
.../controller/GenericEntitiesController.java | 23 +-
.../v2/controller/EntityController.java | 25 +++
.../v3/controller/EntityController.java | 27 +++
4 files changed, 169 insertions(+), 110 deletions(-)
diff --git a/docs/api/tutorials/structured-properties.md b/docs/api/tutorials/structured-properties.md
index 6f6c6541554d9..00e992f2bd0bb 100644
--- a/docs/api/tutorials/structured-properties.md
+++ b/docs/api/tutorials/structured-properties.md
@@ -158,29 +158,37 @@ curl -X 'POST' -v \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
+ "value": {
"qualifiedName": "io.acryl.privacy.retentionTime",
- "valueType": "urn:li:dataType:datahub.number",
- "description": "Retention Time is used to figure out how long to retain records in a dataset",
- "displayName": "Retention Time",
- "cardinality": "MULTIPLE",
- "entityTypes": [
- "urn:li:entityType:datahub.dataset",
- "urn:li:entityType:datahub.dataFlow"
- ],
- "allowedValues": [
- {
- "value": {"double": 30},
- "description": "30 days, usually reserved for datasets that are ephemeral and contain pii"
- },
- {
- "value": {"double": 60},
- "description": "Use this for datasets that drive monthly reporting but contain pii"
- },
- {
- "value": {"double": 365},
- "description": "Use this for non-sensitive data that can be retained for longer"
- }
- ]
+ "valueType": "urn:li:dataType:datahub.number",
+ "description": "Retention Time is used to figure out how long to retain records in a dataset",
+ "displayName": "Retention Time",
+ "cardinality": "MULTIPLE",
+ "entityTypes": [
+ "urn:li:entityType:datahub.dataset",
+ "urn:li:entityType:datahub.dataFlow"
+ ],
+ "allowedValues": [
+ {
+ "value": {
+ "double": 30
+ },
+ "description": "30 days, usually reserved for datasets that are ephemeral and contain pii"
+ },
+ {
+ "value": {
+ "double": 60
+ },
+ "description": "Use this for datasets that drive monthly reporting but contain pii"
+ },
+ {
+ "value": {
+ "double": 365
+ },
+ "description": "Use this for non-sensitive data that can be retained for longer"
+ }
+ ]
+ }
}' | jq
```
@@ -474,14 +482,16 @@ curl -X 'POST' -v \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
- "properties": [
- {
- "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime",
- "values": [
- {"double": 60.0}
- ]
- }
- ]
+ "value": {
+ "properties": [
+ {
+ "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime",
+ "values": [
+ {"double": 60.0}
+ ]
+ }
+ ]
+ }
}' | jq
```
Example Response:
@@ -627,23 +637,25 @@ curl -X 'POST' -v \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
- "qualifiedName": "io.acryl.privacy.retentionTime02",
- "displayName": "Retention Time 02",
- "valueType": "urn:li:dataType:datahub.string",
- "allowedValues": [
- {
- "value": {"string": "foo2"},
- "description": "test foo2 value"
- },
- {
- "value": {"string": "bar2"},
- "description": "test bar2 value"
- }
- ],
- "cardinality": "SINGLE",
- "entityTypes": [
- "urn:li:entityType:datahub.dataset"
- ]
+ "value": {
+ "qualifiedName": "io.acryl.privacy.retentionTime02",
+ "displayName": "Retention Time 02",
+ "valueType": "urn:li:dataType:datahub.string",
+ "allowedValues": [
+ {
+ "value": {"string": "foo2"},
+ "description": "test foo2 value"
+ },
+ {
+ "value": {"string": "bar2"},
+ "description": "test bar2 value"
+ }
+ ],
+ "cardinality": "SINGLE",
+ "entityTypes": [
+ "urn:li:entityType:datahub.dataset"
+ ]
+ }
}' | jq
```
@@ -686,24 +698,26 @@ Specically, this will set `io.acryl.privacy.retentionTime` as `60.0` and `io.acr
```shell
curl -X 'POST' -v \
- 'http://localhost:8080/openapi/v3/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2CSampleHiveDataset%2CPROD%29/structuredProperties' \
+ 'http://localhost:8080/openapi/v3/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2CSampleHiveDataset%2CPROD%29/structuredProperties?createIfNotExists=false' \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
- "properties": [
- {
- "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime",
- "values": [
- {"double": 60.0}
- ]
- },
- {
- "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime02",
- "values": [
- {"string": "bar2"}
- ]
- }
- ]
+ "value": {
+ "properties": [
+ {
+ "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime",
+ "values": [
+ {"double": 60.0}
+ ]
+ },
+ {
+ "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime02",
+ "values": [
+ {"string": "bar2"}
+ ]
+ }
+ ]
+ }
}' | jq
```
@@ -1111,7 +1125,9 @@ curl -X 'POST' \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
-"removed": true
+ "value": {
+ "removed": true
+ }
}' | jq
```
@@ -1132,11 +1148,13 @@ If you want to **remove the soft delete**, you can do so by either hard deleting
```shell
curl -X 'POST' \
- 'http://localhost:8080/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.retentionTime/status?systemMetadata=false' \
+ 'http://localhost:8080/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.retentionTime/status?systemMetadata=false&createIfNotExists=false' \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
-"removed": false
+ "value": {
+ "removed": true
+ }
}' | jq
```
@@ -1271,34 +1289,42 @@ Change the cardinality to `SINGLE` and add a `version`.
```shell
curl -X 'POST' -v \
- 'http://localhost:8080/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.retentionTime/propertyDefinition' \
+ 'http://localhost:8080/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.retentionTime/propertyDefinition?createIfNotExists=false' \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
+ "value": {
"qualifiedName": "io.acryl.privacy.retentionTime",
- "valueType": "urn:li:dataType:datahub.number",
- "description": "Retention Time is used to figure out how long to retain records in a dataset",
- "displayName": "Retention Time",
- "cardinality": "SINGLE",
- "version": "20240614080000",
- "entityTypes": [
- "urn:li:entityType:datahub.dataset",
- "urn:li:entityType:datahub.dataFlow"
- ],
- "allowedValues": [
- {
- "value": {"double": 30},
- "description": "30 days, usually reserved for datasets that are ephemeral and contain pii"
- },
- {
- "value": {"double": 60},
- "description": "Use this for datasets that drive monthly reporting but contain pii"
- },
- {
- "value": {"double": 365},
- "description": "Use this for non-sensitive data that can be retained for longer"
- }
- ]
+ "valueType": "urn:li:dataType:datahub.number",
+ "description": "Retention Time is used to figure out how long to retain records in a dataset",
+ "displayName": "Retention Time",
+ "cardinality": "SINGLE",
+ "version": "20240614080000",
+ "entityTypes": [
+ "urn:li:entityType:datahub.dataset",
+ "urn:li:entityType:datahub.dataFlow"
+ ],
+ "allowedValues": [
+ {
+ "value": {
+ "double": 30
+ },
+ "description": "30 days, usually reserved for datasets that are ephemeral and contain pii"
+ },
+ {
+ "value": {
+ "double": 60
+ },
+ "description": "Use this for datasets that drive monthly reporting but contain pii"
+ },
+ {
+ "value": {
+ "double": 365
+ },
+ "description": "Use this for non-sensitive data that can be retained for longer"
+ }
+ ]
+ }
}' | jq
```
diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java
index de5d2ae1118d4..f415a4f47c9dc 100644
--- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java
+++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java
@@ -13,14 +13,11 @@
import com.datahub.authorization.AuthorizerChain;
import com.datahub.util.RecordUtils;
import com.fasterxml.jackson.core.JsonProcessingException;
-import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ImmutableSet;
import com.linkedin.common.urn.Urn;
-import com.linkedin.data.ByteString;
import com.linkedin.data.template.RecordTemplate;
import com.linkedin.entity.EnvelopedAspect;
-import com.linkedin.events.metadata.ChangeType;
import com.linkedin.metadata.aspect.AspectRetriever;
import com.linkedin.metadata.aspect.batch.AspectsBatch;
import com.linkedin.metadata.aspect.batch.ChangeMCP;
@@ -41,7 +38,6 @@
import com.linkedin.metadata.search.SearchEntityArray;
import com.linkedin.metadata.search.SearchService;
import com.linkedin.metadata.utils.AuditStampUtils;
-import com.linkedin.metadata.utils.GenericRecordUtils;
import com.linkedin.metadata.utils.SearchUtil;
import com.linkedin.mxe.SystemMetadata;
import com.linkedin.util.Pair;
@@ -57,7 +53,6 @@
import jakarta.servlet.http.HttpServletRequest;
import java.lang.reflect.InvocationTargetException;
import java.net.URISyntaxException;
-import java.nio.charset.StandardCharsets;
import java.util.*;
import java.util.stream.Collectors;
import javax.annotation.Nonnull;
@@ -726,28 +721,14 @@ protected RecordTemplate toRecordTemplate(
aspectSpec.getDataTemplateClass(), envelopedAspect.getValue().data());
}
- protected ChangeMCP toUpsertItem(
+ protected abstract ChangeMCP toUpsertItem(
@Nonnull AspectRetriever aspectRetriever,
Urn entityUrn,
AspectSpec aspectSpec,
Boolean createIfNotExists,
String jsonAspect,
Actor actor)
- throws JsonProcessingException {
- JsonNode jsonNode = objectMapper.readTree(jsonAspect);
- String aspectJson = jsonNode.get("value").toString();
- return ChangeItemImpl.builder()
- .urn(entityUrn)
- .aspectName(aspectSpec.getName())
- .changeType(Boolean.TRUE.equals(createIfNotExists) ? ChangeType.CREATE : ChangeType.UPSERT)
- .auditStamp(AuditStampUtils.createAuditStamp(actor.toUrnStr()))
- .recordTemplate(
- GenericRecordUtils.deserializeAspect(
- ByteString.copyString(aspectJson, StandardCharsets.UTF_8),
- GenericRecordUtils.JSON,
- aspectSpec))
- .build(aspectRetriever);
- }
+ throws URISyntaxException, JsonProcessingException;
protected ChangeMCP toUpsertItem(
@Nonnull AspectRetriever aspectRetriever,
diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java
index 54a7724cadd34..1207eb331b795 100644
--- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java
+++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java
@@ -13,8 +13,11 @@
import com.linkedin.data.ByteString;
import com.linkedin.data.template.RecordTemplate;
import com.linkedin.entity.EnvelopedAspect;
+import com.linkedin.events.metadata.ChangeType;
+import com.linkedin.metadata.aspect.AspectRetriever;
import com.linkedin.metadata.aspect.batch.AspectsBatch;
import com.linkedin.metadata.aspect.batch.BatchItem;
+import com.linkedin.metadata.aspect.batch.ChangeMCP;
import com.linkedin.metadata.entity.EntityApiUtils;
import com.linkedin.metadata.entity.IngestResult;
import com.linkedin.metadata.entity.UpdateAspectResult;
@@ -260,4 +263,26 @@ protected List buildEntityList(
}
return responseList;
}
+
+ @Override
+ protected ChangeMCP toUpsertItem(
+ @Nonnull AspectRetriever aspectRetriever,
+ Urn entityUrn,
+ AspectSpec aspectSpec,
+ Boolean createIfNotExists,
+ String jsonAspect,
+ Actor actor)
+ throws URISyntaxException {
+ return ChangeItemImpl.builder()
+ .urn(entityUrn)
+ .aspectName(aspectSpec.getName())
+ .changeType(Boolean.TRUE.equals(createIfNotExists) ? ChangeType.CREATE : ChangeType.UPSERT)
+ .auditStamp(AuditStampUtils.createAuditStamp(actor.toUrnStr()))
+ .recordTemplate(
+ GenericRecordUtils.deserializeAspect(
+ ByteString.copyString(jsonAspect, StandardCharsets.UTF_8),
+ GenericRecordUtils.JSON,
+ aspectSpec))
+ .build(aspectRetriever);
+ }
}
diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java
index a0478c9af1609..fbc9bf2956cfd 100644
--- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java
+++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java
@@ -14,8 +14,11 @@
import com.linkedin.common.urn.Urn;
import com.linkedin.data.ByteString;
import com.linkedin.entity.EnvelopedAspect;
+import com.linkedin.events.metadata.ChangeType;
+import com.linkedin.metadata.aspect.AspectRetriever;
import com.linkedin.metadata.aspect.batch.AspectsBatch;
import com.linkedin.metadata.aspect.batch.BatchItem;
+import com.linkedin.metadata.aspect.batch.ChangeMCP;
import com.linkedin.metadata.entity.EntityApiUtils;
import com.linkedin.metadata.entity.IngestResult;
import com.linkedin.metadata.entity.UpdateAspectResult;
@@ -348,4 +351,28 @@ protected AspectsBatch toMCPBatch(
.retrieverContext(opContext.getRetrieverContext().get())
.build();
}
+
+ @Override
+ protected ChangeMCP toUpsertItem(
+ @Nonnull AspectRetriever aspectRetriever,
+ Urn entityUrn,
+ AspectSpec aspectSpec,
+ Boolean createIfNotExists,
+ String jsonAspect,
+ Actor actor)
+ throws JsonProcessingException {
+ JsonNode jsonNode = objectMapper.readTree(jsonAspect);
+ String aspectJson = jsonNode.get("value").toString();
+ return ChangeItemImpl.builder()
+ .urn(entityUrn)
+ .aspectName(aspectSpec.getName())
+ .changeType(Boolean.TRUE.equals(createIfNotExists) ? ChangeType.CREATE : ChangeType.UPSERT)
+ .auditStamp(AuditStampUtils.createAuditStamp(actor.toUrnStr()))
+ .recordTemplate(
+ GenericRecordUtils.deserializeAspect(
+ ByteString.copyString(aspectJson, StandardCharsets.UTF_8),
+ GenericRecordUtils.JSON,
+ aspectSpec))
+ .build(aspectRetriever);
+ }
}
From 469654ced75c3340276028068a6ca201eadc0cdf Mon Sep 17 00:00:00 2001
From: David Leifker
Date: Fri, 9 Aug 2024 12:40:34 -0500
Subject: [PATCH 19/72] Revert "fix(openapi): fix openapi v2 endpoints & v3
documentation update"
This reverts commit 573c1cb8407c2a5d152e5abb6b7d9f012eea75cb.
---
docs/api/tutorials/structured-properties.md | 204 ++++++++----------
.../controller/GenericEntitiesController.java | 23 +-
.../v2/controller/EntityController.java | 25 ---
.../v3/controller/EntityController.java | 27 ---
4 files changed, 110 insertions(+), 169 deletions(-)
diff --git a/docs/api/tutorials/structured-properties.md b/docs/api/tutorials/structured-properties.md
index 00e992f2bd0bb..6f6c6541554d9 100644
--- a/docs/api/tutorials/structured-properties.md
+++ b/docs/api/tutorials/structured-properties.md
@@ -158,37 +158,29 @@ curl -X 'POST' -v \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
- "value": {
"qualifiedName": "io.acryl.privacy.retentionTime",
- "valueType": "urn:li:dataType:datahub.number",
- "description": "Retention Time is used to figure out how long to retain records in a dataset",
- "displayName": "Retention Time",
- "cardinality": "MULTIPLE",
- "entityTypes": [
- "urn:li:entityType:datahub.dataset",
- "urn:li:entityType:datahub.dataFlow"
- ],
- "allowedValues": [
- {
- "value": {
- "double": 30
- },
- "description": "30 days, usually reserved for datasets that are ephemeral and contain pii"
- },
- {
- "value": {
- "double": 60
- },
- "description": "Use this for datasets that drive monthly reporting but contain pii"
- },
- {
- "value": {
- "double": 365
- },
- "description": "Use this for non-sensitive data that can be retained for longer"
- }
- ]
- }
+ "valueType": "urn:li:dataType:datahub.number",
+ "description": "Retention Time is used to figure out how long to retain records in a dataset",
+ "displayName": "Retention Time",
+ "cardinality": "MULTIPLE",
+ "entityTypes": [
+ "urn:li:entityType:datahub.dataset",
+ "urn:li:entityType:datahub.dataFlow"
+ ],
+ "allowedValues": [
+ {
+ "value": {"double": 30},
+ "description": "30 days, usually reserved for datasets that are ephemeral and contain pii"
+ },
+ {
+ "value": {"double": 60},
+ "description": "Use this for datasets that drive monthly reporting but contain pii"
+ },
+ {
+ "value": {"double": 365},
+ "description": "Use this for non-sensitive data that can be retained for longer"
+ }
+ ]
}' | jq
```
@@ -482,16 +474,14 @@ curl -X 'POST' -v \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
- "value": {
- "properties": [
- {
- "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime",
- "values": [
- {"double": 60.0}
- ]
- }
- ]
- }
+ "properties": [
+ {
+ "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime",
+ "values": [
+ {"double": 60.0}
+ ]
+ }
+ ]
}' | jq
```
Example Response:
@@ -637,25 +627,23 @@ curl -X 'POST' -v \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
- "value": {
- "qualifiedName": "io.acryl.privacy.retentionTime02",
- "displayName": "Retention Time 02",
- "valueType": "urn:li:dataType:datahub.string",
- "allowedValues": [
- {
- "value": {"string": "foo2"},
- "description": "test foo2 value"
- },
- {
- "value": {"string": "bar2"},
- "description": "test bar2 value"
- }
- ],
- "cardinality": "SINGLE",
- "entityTypes": [
- "urn:li:entityType:datahub.dataset"
- ]
- }
+ "qualifiedName": "io.acryl.privacy.retentionTime02",
+ "displayName": "Retention Time 02",
+ "valueType": "urn:li:dataType:datahub.string",
+ "allowedValues": [
+ {
+ "value": {"string": "foo2"},
+ "description": "test foo2 value"
+ },
+ {
+ "value": {"string": "bar2"},
+ "description": "test bar2 value"
+ }
+ ],
+ "cardinality": "SINGLE",
+ "entityTypes": [
+ "urn:li:entityType:datahub.dataset"
+ ]
}' | jq
```
@@ -698,26 +686,24 @@ Specically, this will set `io.acryl.privacy.retentionTime` as `60.0` and `io.acr
```shell
curl -X 'POST' -v \
- 'http://localhost:8080/openapi/v3/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2CSampleHiveDataset%2CPROD%29/structuredProperties?createIfNotExists=false' \
+ 'http://localhost:8080/openapi/v3/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2CSampleHiveDataset%2CPROD%29/structuredProperties' \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
- "value": {
- "properties": [
- {
- "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime",
- "values": [
- {"double": 60.0}
- ]
- },
- {
- "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime02",
- "values": [
- {"string": "bar2"}
- ]
- }
- ]
- }
+ "properties": [
+ {
+ "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime",
+ "values": [
+ {"double": 60.0}
+ ]
+ },
+ {
+ "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime02",
+ "values": [
+ {"string": "bar2"}
+ ]
+ }
+ ]
}' | jq
```
@@ -1125,9 +1111,7 @@ curl -X 'POST' \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
- "value": {
- "removed": true
- }
+"removed": true
}' | jq
```
@@ -1148,13 +1132,11 @@ If you want to **remove the soft delete**, you can do so by either hard deleting
```shell
curl -X 'POST' \
- 'http://localhost:8080/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.retentionTime/status?systemMetadata=false&createIfNotExists=false' \
+ 'http://localhost:8080/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.retentionTime/status?systemMetadata=false' \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
- "value": {
- "removed": true
- }
+"removed": false
}' | jq
```
@@ -1289,42 +1271,34 @@ Change the cardinality to `SINGLE` and add a `version`.
```shell
curl -X 'POST' -v \
- 'http://localhost:8080/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.retentionTime/propertyDefinition?createIfNotExists=false' \
+ 'http://localhost:8080/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.retentionTime/propertyDefinition' \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
- "value": {
"qualifiedName": "io.acryl.privacy.retentionTime",
- "valueType": "urn:li:dataType:datahub.number",
- "description": "Retention Time is used to figure out how long to retain records in a dataset",
- "displayName": "Retention Time",
- "cardinality": "SINGLE",
- "version": "20240614080000",
- "entityTypes": [
- "urn:li:entityType:datahub.dataset",
- "urn:li:entityType:datahub.dataFlow"
- ],
- "allowedValues": [
- {
- "value": {
- "double": 30
- },
- "description": "30 days, usually reserved for datasets that are ephemeral and contain pii"
- },
- {
- "value": {
- "double": 60
- },
- "description": "Use this for datasets that drive monthly reporting but contain pii"
- },
- {
- "value": {
- "double": 365
- },
- "description": "Use this for non-sensitive data that can be retained for longer"
- }
- ]
- }
+ "valueType": "urn:li:dataType:datahub.number",
+ "description": "Retention Time is used to figure out how long to retain records in a dataset",
+ "displayName": "Retention Time",
+ "cardinality": "SINGLE",
+ "version": "20240614080000",
+ "entityTypes": [
+ "urn:li:entityType:datahub.dataset",
+ "urn:li:entityType:datahub.dataFlow"
+ ],
+ "allowedValues": [
+ {
+ "value": {"double": 30},
+ "description": "30 days, usually reserved for datasets that are ephemeral and contain pii"
+ },
+ {
+ "value": {"double": 60},
+ "description": "Use this for datasets that drive monthly reporting but contain pii"
+ },
+ {
+ "value": {"double": 365},
+ "description": "Use this for non-sensitive data that can be retained for longer"
+ }
+ ]
}' | jq
```
diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java
index f415a4f47c9dc..de5d2ae1118d4 100644
--- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java
+++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java
@@ -13,11 +13,14 @@
import com.datahub.authorization.AuthorizerChain;
import com.datahub.util.RecordUtils;
import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ImmutableSet;
import com.linkedin.common.urn.Urn;
+import com.linkedin.data.ByteString;
import com.linkedin.data.template.RecordTemplate;
import com.linkedin.entity.EnvelopedAspect;
+import com.linkedin.events.metadata.ChangeType;
import com.linkedin.metadata.aspect.AspectRetriever;
import com.linkedin.metadata.aspect.batch.AspectsBatch;
import com.linkedin.metadata.aspect.batch.ChangeMCP;
@@ -38,6 +41,7 @@
import com.linkedin.metadata.search.SearchEntityArray;
import com.linkedin.metadata.search.SearchService;
import com.linkedin.metadata.utils.AuditStampUtils;
+import com.linkedin.metadata.utils.GenericRecordUtils;
import com.linkedin.metadata.utils.SearchUtil;
import com.linkedin.mxe.SystemMetadata;
import com.linkedin.util.Pair;
@@ -53,6 +57,7 @@
import jakarta.servlet.http.HttpServletRequest;
import java.lang.reflect.InvocationTargetException;
import java.net.URISyntaxException;
+import java.nio.charset.StandardCharsets;
import java.util.*;
import java.util.stream.Collectors;
import javax.annotation.Nonnull;
@@ -721,14 +726,28 @@ protected RecordTemplate toRecordTemplate(
aspectSpec.getDataTemplateClass(), envelopedAspect.getValue().data());
}
- protected abstract ChangeMCP toUpsertItem(
+ protected ChangeMCP toUpsertItem(
@Nonnull AspectRetriever aspectRetriever,
Urn entityUrn,
AspectSpec aspectSpec,
Boolean createIfNotExists,
String jsonAspect,
Actor actor)
- throws URISyntaxException, JsonProcessingException;
+ throws JsonProcessingException {
+ JsonNode jsonNode = objectMapper.readTree(jsonAspect);
+ String aspectJson = jsonNode.get("value").toString();
+ return ChangeItemImpl.builder()
+ .urn(entityUrn)
+ .aspectName(aspectSpec.getName())
+ .changeType(Boolean.TRUE.equals(createIfNotExists) ? ChangeType.CREATE : ChangeType.UPSERT)
+ .auditStamp(AuditStampUtils.createAuditStamp(actor.toUrnStr()))
+ .recordTemplate(
+ GenericRecordUtils.deserializeAspect(
+ ByteString.copyString(aspectJson, StandardCharsets.UTF_8),
+ GenericRecordUtils.JSON,
+ aspectSpec))
+ .build(aspectRetriever);
+ }
protected ChangeMCP toUpsertItem(
@Nonnull AspectRetriever aspectRetriever,
diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java
index 1207eb331b795..54a7724cadd34 100644
--- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java
+++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java
@@ -13,11 +13,8 @@
import com.linkedin.data.ByteString;
import com.linkedin.data.template.RecordTemplate;
import com.linkedin.entity.EnvelopedAspect;
-import com.linkedin.events.metadata.ChangeType;
-import com.linkedin.metadata.aspect.AspectRetriever;
import com.linkedin.metadata.aspect.batch.AspectsBatch;
import com.linkedin.metadata.aspect.batch.BatchItem;
-import com.linkedin.metadata.aspect.batch.ChangeMCP;
import com.linkedin.metadata.entity.EntityApiUtils;
import com.linkedin.metadata.entity.IngestResult;
import com.linkedin.metadata.entity.UpdateAspectResult;
@@ -263,26 +260,4 @@ protected List buildEntityList(
}
return responseList;
}
-
- @Override
- protected ChangeMCP toUpsertItem(
- @Nonnull AspectRetriever aspectRetriever,
- Urn entityUrn,
- AspectSpec aspectSpec,
- Boolean createIfNotExists,
- String jsonAspect,
- Actor actor)
- throws URISyntaxException {
- return ChangeItemImpl.builder()
- .urn(entityUrn)
- .aspectName(aspectSpec.getName())
- .changeType(Boolean.TRUE.equals(createIfNotExists) ? ChangeType.CREATE : ChangeType.UPSERT)
- .auditStamp(AuditStampUtils.createAuditStamp(actor.toUrnStr()))
- .recordTemplate(
- GenericRecordUtils.deserializeAspect(
- ByteString.copyString(jsonAspect, StandardCharsets.UTF_8),
- GenericRecordUtils.JSON,
- aspectSpec))
- .build(aspectRetriever);
- }
}
diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java
index fbc9bf2956cfd..a0478c9af1609 100644
--- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java
+++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java
@@ -14,11 +14,8 @@
import com.linkedin.common.urn.Urn;
import com.linkedin.data.ByteString;
import com.linkedin.entity.EnvelopedAspect;
-import com.linkedin.events.metadata.ChangeType;
-import com.linkedin.metadata.aspect.AspectRetriever;
import com.linkedin.metadata.aspect.batch.AspectsBatch;
import com.linkedin.metadata.aspect.batch.BatchItem;
-import com.linkedin.metadata.aspect.batch.ChangeMCP;
import com.linkedin.metadata.entity.EntityApiUtils;
import com.linkedin.metadata.entity.IngestResult;
import com.linkedin.metadata.entity.UpdateAspectResult;
@@ -351,28 +348,4 @@ protected AspectsBatch toMCPBatch(
.retrieverContext(opContext.getRetrieverContext().get())
.build();
}
-
- @Override
- protected ChangeMCP toUpsertItem(
- @Nonnull AspectRetriever aspectRetriever,
- Urn entityUrn,
- AspectSpec aspectSpec,
- Boolean createIfNotExists,
- String jsonAspect,
- Actor actor)
- throws JsonProcessingException {
- JsonNode jsonNode = objectMapper.readTree(jsonAspect);
- String aspectJson = jsonNode.get("value").toString();
- return ChangeItemImpl.builder()
- .urn(entityUrn)
- .aspectName(aspectSpec.getName())
- .changeType(Boolean.TRUE.equals(createIfNotExists) ? ChangeType.CREATE : ChangeType.UPSERT)
- .auditStamp(AuditStampUtils.createAuditStamp(actor.toUrnStr()))
- .recordTemplate(
- GenericRecordUtils.deserializeAspect(
- ByteString.copyString(aspectJson, StandardCharsets.UTF_8),
- GenericRecordUtils.JSON,
- aspectSpec))
- .build(aspectRetriever);
- }
}
From 3dfbbd5094803e95bd3dbae703e3a57dc9cdd99b Mon Sep 17 00:00:00 2001
From: david-leifker <114954101+david-leifker@users.noreply.github.com>
Date: Fri, 9 Aug 2024 14:19:36 -0500
Subject: [PATCH 20/72] docs(policies): updates to policies documentation
(#11073)
---
docs/authorization/access-policies-guide.md | 26 +-
docs/authorization/policies.md | 359 ++++++++++++--------
2 files changed, 220 insertions(+), 165 deletions(-)
diff --git a/docs/authorization/access-policies-guide.md b/docs/authorization/access-policies-guide.md
index 2040d7ff79e99..0f741a95282bd 100644
--- a/docs/authorization/access-policies-guide.md
+++ b/docs/authorization/access-policies-guide.md
@@ -15,7 +15,9 @@ There are 2 types of Access Policy within DataHub:
-**Platform** Policies determine who has platform-level Privileges on DataHub. These include:
+## Platform
+
+Policies determine who has platform-level Privileges on DataHub. These include:
- Managing Users & Groups
- Viewing the DataHub Analytics Page
@@ -31,7 +33,9 @@ A few Platform Policies in plain English include:
- The Data Platform team should be allowed to manage users & groups, view platform analytics, & manage policies themselves
- John from IT should be able to invite new users
-**Metadata** policies determine who can do what to which Metadata Entities. For example:
+## Metadata
+
+Metadata policies determine who can do what to which Metadata Entities. For example:
- Who can edit Dataset Documentation & Links?
- Who can add Owners to a Chart?
@@ -51,17 +55,14 @@ A few **Metadata** Policies in plain English include:
Each of these can be implemented by constructing DataHub Access Policies.
-## Access Policies Setup, Prerequisites, and Permissions
-
-What you need to manage Access Policies on DataHub:
+## Using Access Policies
+:::note Required Access
* **Manage Policies** Privilege
This Platform Privilege allows users to create, edit, and remove all Access Policies on DataHub. Therefore, it should only be
given to those users who will be serving as Admins of the platform. The default `Admin` role has this Privilege.
-
-
-## Using Access Policies
+:::
Policies can be created by first navigating to **Settings > Permissions > Policies**.
@@ -270,10 +271,5 @@ Policies only affect REST APIs when the environment variable `REST_API_AUTHORIZA
Policies are the lowest level primitive for granting Privileges to users on DataHub.
Roles are built for convenience on top of Policies. Roles grant Privileges to actors indirectly, driven by Policies
-behind the scenes. Both can be used in conjunction to grant Privileges to end users.
-
-
-
-### Related Features
-
-- [Roles](./roles.md)
\ No newline at end of file
+behind the scenes. Both can be used in conjunction to grant Privileges to end users. For more information on roles
+please refer to [Authorization > Roles](./roles.md).
diff --git a/docs/authorization/policies.md b/docs/authorization/policies.md
index 91b0241c7d514..b393c8ffa3757 100644
--- a/docs/authorization/policies.md
+++ b/docs/authorization/policies.md
@@ -49,14 +49,23 @@ and so on.
A Metadata Policy can be broken down into 3 parts:
-1. **Actors**: The 'who'. Specific users, groups that the policy applies to.
+1. **Resources**: The 'which'. Resources that the policy applies to, e.g. "All Datasets".
2. **Privileges**: The 'what'. What actions are being permitted by a policy, e.g. "Add Tags".
-3. **Resources**: The 'which'. Resources that the policy applies to, e.g. "All Datasets".
+3. **Actors**: The 'who'. Specific users, groups that the policy applies to.
-#### Actors
+#### Resources
+
+Resources can be associated with the policy in a number of ways.
-We currently support 3 ways to define the set of actors the policy applies to: a) list of users b) list of groups, and
-c) owners of the entity. You also have the option to apply the policy to all users or groups.
+1. List of resource types - The entity's type for example: dataset, chart, dashboard
+2. List of resource URNs
+3. List of tags
+4. List of domains
+
+:::note Important Note
+The associations in the list above are an *intersection* or an _AND_ operation. For example, if the policy targets
+`1. resource type: dataset` and `3. resources tagged: 'myTag'`, it will apply to datasets that are tagged with tag 'myTag'.
+:::
#### Privileges
@@ -64,55 +73,162 @@ Check out the list of
privileges [here](https://github.com/datahub-project/datahub/blob/master/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java)
. Note, the privileges are semantic by nature, and does not tie in 1-to-1 with the aspect model.
-All edits on the UI are covered by a privilege, to make sure we have the ability to restrict write access.
+All edits on the UI are covered by a privilege, to make sure we have the ability to restrict write access. See the
+[Reference](#Reference) section below.
+
+#### Actors
+
+We currently support 3 ways to define the set of actors the policy applies to:
+
+1. list of users (or all users)
+2. list of groups (or all groups)
+3. owners of the entity
+
+:::note Important Note
+Unlike resources, the definitions for actors are a union of the actors. For example, if user `1. Alice` is associated
+with the policy as well as `3. owners of the entity`. This means that Alice _OR_ any owner of
+the targeted resource(s) will be included in the policy.
+:::
+
+## Managing Policies
+
+Policies can be managed on the page **Settings > Permissions > Policies** page. The `Policies` tab will only
+be visible to those users having the `Manage Policies` privilege.
-We currently support the following:
+Out of the box, DataHub is deployed with a set of pre-baked Policies. The set of default policies are created at deploy
+time and can be found inside the `policies.json` file within `metadata-service/war/src/main/resources/boot`. This set of policies serves the
+following purposes:
+
+1. Assigns immutable super-user privileges for the root `datahub` user account (Immutable)
+2. Assigns all Platform privileges for all Users by default (Editable)
+
+The reason for #1 is to prevent people from accidentally deleting all policies and getting locked out (`datahub` super user account can be a backup)
+The reason for #2 is to permit administrators to log in via OIDC or another means outside of the `datahub` root account
+when they are bootstrapping with DataHub. This way, those setting up DataHub can start managing policies without friction.
+Note that these privilege *can* and likely *should* be altered inside the **Policies** page of the UI.
+
+:::note Pro-Tip
+To login using the `datahub` account, simply navigate to `/login` and enter `datahub`, `datahub`. Note that the password can be customized for your
+deployment by changing the `user.props` file within the `datahub-frontend` module. Notice that JaaS authentication must be enabled.
+:::note
+
+## Configuration
+
+By default, the Policies feature is *enabled*. This means that the deployment will support creating, editing, removing, and
+most importantly enforcing fine-grained access policies.
+
+In some cases, these capabilities are not desirable. For example, if your company's users are already used to having free reign, you
+may want to keep it that way. Or perhaps it is only your Data Platform team who actively uses DataHub, in which case Policies may be overkill.
+
+For these scenarios, we've provided a back door to disable Policies in your deployment of DataHub. This will completely hide
+the policies management UI and by default will allow all actions on the platform. It will be as though
+each user has *all* privileges, both of the **Platform** & **Metadata** flavor.
+
+To disable Policies, you can simply set the `AUTH_POLICIES_ENABLED` environment variable for the `datahub-gms` service container
+to `false`. For example in your `docker/datahub-gms/docker.env`, you'd place
+
+```
+AUTH_POLICIES_ENABLED=false
+```
+
+### REST API Authorization
+
+Policies only affect REST APIs when the environment variable `REST_API_AUTHORIZATION` is set to `true` for GMS. Some policies only apply when this setting is enabled, marked above, and other Metadata and Platform policies apply to the APIs where relevant, also specified in the table above.
+
+## Reference
+
+For a complete list of privileges see the
+privileges [here](https://github.com/datahub-project/datahub/blob/master/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java).
+
+### Platform-level privileges
-##### Platform-level privileges
These privileges are for DataHub operators to access & manage the administrative functionality of the system.
-| Platform Privileges | Description |
-|-----------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| Generate Personal Access Tokens | Allow actor to generate personal access tokens for use with DataHub APIs. |
-| Manage Domains | Allow actor to create and remove Asset Domains. |
-| Manage Home Page Posts | Allow actor to create and delete home page posts |
-| Manage Glossaries | Allow actor to create, edit, and remove Glossary Entities |
-| Manage Tags | Allow actor to create and remove Tags. |
-| Manage Business Attribute | Allow actor to create, update, delete Business Attribute |
-| Manage Documentation Forms | Allow actor to manage forms assigned to assets to assist in documentation efforts. |
-| Manage Policies | Allow actor to create and remove access control policies. Be careful - Actors with this privilege are effectively super users. |
-| Manage Metadata Ingestion | Allow actor to create, remove, and update Metadata Ingestion sources. |
-| Manage Secrets | Allow actor to create & remove Secrets stored inside DataHub. |
-| Manage Users & Groups | Allow actor to create, remove, and update users and groups on DataHub. |
-| View Analytics | Allow actor to view the DataHub analytics dashboard. |
-| Manage All Access Tokens | Allow actor to create, list and revoke access tokens on behalf of users in DataHub. Be careful - Actors with this privilege are effectively super users that can impersonate other users. |
-| Manage User Credentials | Allow actor to manage credentials for native DataHub users, including inviting new users and resetting passwords |
-| Manage Public Views | Allow actor to create, update, and delete any Public (shared) Views. |
-| Manage Ownership Types | Allow actor to create, update and delete Ownership Types. |
-| Create Business Attribute | Allow actor to create new Business Attribute. |
-| Manage Connections | Allow actor to manage connections to external DataHub platforms. |
-| Restore Indices API[^1] | Allow actor to use the Restore Indices API. |
-| Get Timeseries index sizes API[^1] | Allow actor to use the get Timeseries indices size API. |
-| Truncate timeseries aspect index size API[^1] | Allow actor to use the API to truncate a timeseries index. |
-| Get ES task status API[^1] | Allow actor to use the get task status API for an ElasticSearch task. |
-| Enable/Disable Writeability API[^1] | Allow actor to enable or disable GMS writeability for data migrations. |
-| Apply Retention API[^1] | Allow actor to apply retention using the API. |
-| Analytics API access[^1] | Allow actor to use API read access to raw analytics data. |
-| Manage Tests[^2] | Allow actor to create and remove Asset Tests. |
-| View Metadata Proposals[^2] | Allow actor to view the requests tab for viewing metadata proposals. |
-| Create metadata constraints[^2] | Allow actor to create metadata constraints. |
-| Manage Platform Settings[^2] | Allow actor to view and change platform-level settings, like integrations & notifications. |
-| Manage Monitors[^2] | Allow actor to create, update, and delete any data asset monitors, including Custom SQL monitors. Grant with care. |
+#### Access & Credentials
+
+| Platform Privileges | Description |
+|--------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| Generate Personal Access Tokens | Allow actor to generate personal access tokens for use with DataHub APIs. |
+| Manage Policies | Allow actor to create and remove access control policies. Be careful - Actors with this privilege are effectively super users. |
+| Manage Secrets | Allow actor to create & remove Secrets stored inside DataHub. |
+| Manage Users & Groups | Allow actor to create, remove, and update users and groups on DataHub. |
+| Manage All Access Tokens | Allow actor to create, list and revoke access tokens on behalf of users in DataHub. Be careful - Actors with this privilege are effectively super users that can impersonate other users. |
+| Manage User Credentials | Allow actor to manage credentials for native DataHub users, including inviting new users and resetting passwords | |
+| Manage Connections | Allow actor to manage connections to external DataHub platforms. |
+
+#### Product Features
+
+| Platform Privileges | Description |
+|-------------------------------------|--------------------------------------------------------------------------------------------------------------------|
+| Manage Home Page Posts | Allow actor to create and delete home page posts |
+| Manage Business Attribute | Allow actor to create, update, delete Business Attribute |
+| Manage Documentation Forms | Allow actor to manage forms assigned to assets to assist in documentation efforts. |
+| Manage Metadata Ingestion | Allow actor to create, remove, and update Metadata Ingestion sources. |
+| Manage Features | Umbrella privilege to manage all features. |
+| View Analytics | Allow actor to view the DataHub analytics dashboard. |
+| Manage Public Views | Allow actor to create, update, and delete any Public (shared) Views. |
+| Manage Ownership Types | Allow actor to create, update and delete Ownership Types. |
+| Create Business Attribute | Allow actor to create new Business Attribute. |
+| Manage Structured Properties | Manage structured properties in your instance. |
+| View Tests | View Asset Tests. |
+| Manage Tests[^2] | Allow actor to create and remove Asset Tests. |
+| View Metadata Proposals[^2] | Allow actor to view the requests tab for viewing metadata proposals. |
+| Create metadata constraints[^2] | Allow actor to create metadata constraints. |
+| Manage Platform Settings[^2] | Allow actor to view and change platform-level settings, like integrations & notifications. |
+| Manage Monitors[^2] | Allow actor to create, update, and delete any data asset monitors, including Custom SQL monitors. Grant with care. |
[^1]: Only active if REST_API_AUTHORIZATION_ENABLED is true
[^2]: DataHub Cloud only
-##### Common metadata privileges
+#### Entity Management
+
+| Platform Privileges | Description |
+|-------------------------------------|------------------------------------------------------------------------------------|
+| Manage Domains | Allow actor to create and remove Asset Domains. |
+| Manage Glossaries | Allow actor to create, edit, and remove Glossary Entities |
+| Manage Tags | Allow actor to create and remove Tags. |
+
+#### System Management
+
+| Platform Privileges | Description |
+|-----------------------------------------------|--------------------------------------------------------------------------|
+| Restore Indices API[^1] | Allow actor to use the Restore Indices API. | |
+| Get Timeseries index sizes API[^1] | Allow actor to use the get Timeseries indices size API. |
+| Truncate timeseries aspect index size API[^1] | Allow actor to use the API to truncate a timeseries index. |
+| Get ES task status API[^1] | Allow actor to use the get task status API for an ElasticSearch task. |
+| Enable/Disable Writeability API[^1] | Allow actor to enable or disable GMS writeability for data migrations. |
+| Apply Retention API[^1] | Allow actor to apply retention using the API. |
+| Analytics API access[^1] | Allow actor to use API read access to raw analytics data. |
+
+[^1]: Only active if REST_API_AUTHORIZATION_ENABLED is true
+[^2]: DataHub Cloud only
+
+### Common Metadata Privileges
+
These privileges are to view & modify any entity within DataHub.
-| Common Privileges | Description |
+#### Entity Privileges
+
+| Entity Privileges | Description |
|-------------------------------------|--------------------------------------------------------------------------------------------|
| View Entity Page | Allow actor to view the entity page. |
+| Edit Entity | Allow actor to edit any information about an entity. Super user privileges for the entity. |
+| Delete | Allow actor to delete this entity. |
+| Create Entity | Allow actor to create an entity if it doesn't exist. |
+| Entity Exists | Allow actor to determine whether the entity exists. |
+| Get Timeline API[^1] | Allow actor to use the GET Timeline API. |
+| Get Entity + Relationships API[^1] | Allow actor to use the GET Entity and Relationships API. |
+| Get Aspect/Entity Count APIs[^1] | Allow actor to use the GET Aspect/Entity Count APIs. |
+| View Entity[^2] | Allow actor to view the entity in search results. |
+| Share Entity[^2] | Allow actor to share an entity with another DataHub Cloud instance. |
+
+[^1]: Only active if REST_API_AUTHORIZATION_ENABLED is true
+[^2]: DataHub Cloud only
+
+#### Aspect Privileges
+
+| Aspect Privileges | Description |
+|-------------------------------------|--------------------------------------------------------------------------------------------|
| Edit Tags | Allow actor to add and remove tags to an asset. |
| Edit Glossary Terms | Allow actor to add and remove glossary terms to an asset. |
| Edit Description | Allow actor to edit the description (documentation) of an entity. |
@@ -122,35 +238,57 @@ These privileges are to view & modify any entity within DataHub.
| Edit Data Product | Allow actor to edit the Data Product of an entity. |
| Edit Deprecation | Allow actor to edit the Deprecation status of an entity. |
| Edit Incidents | Allow actor to create and remove incidents for an entity. |
-| Edit Entity | Allow actor to edit any information about an entity. Super user privileges for the entity. |
| Edit Lineage | Allow actor to add and remove lineage edges for this entity. |
| Edit Properties | Allow actor to edit the properties for an entity. |
| Edit Owners | Allow actor to add and remove owners of an entity. |
-| Delete | Allow actor to delete this entity. |
-| Search API[^1] | Allow actor to access search APIs. |
-| Get Aspect/Entity Count APIs[^1] | Allow actor to use the GET Aspect/Entity Count APIs. |
| Get Timeseries Aspect API[^1] | Allow actor to use the GET Timeseries Aspect API. |
-| Get Entity + Relationships API[^1] | Allow actor to use the GET Entity and Relationships API. |
-| Get Timeline API[^1] | Allow actor to use the GET Timeline API. |
+
+[^1]: Only active if REST_API_AUTHORIZATION_ENABLED is true
+[^2]: DataHub Cloud only
+
+#### Proposals
+
+| Proposals Privileges | Description |
+|------------------------------------|--------------------------------------------------------------------------------------------|
+| Propose Tags[^2] | Allow actor to propose adding a tag to an asset. |
+| Propose Glossary Terms[^2] | Allow actor to propose adding a glossary term to an asset. |
+| Propose Documentation[^2] | Allow actor to propose updates to an asset's documentation. |
+| Manage Tag Proposals[^2] | Allow actor to manage a proposal to add a tag to an asset. |
+| Manage Glossary Term Proposals[^2] | Allow actor to manage a proposal to add a glossary term to an asset. |
+| Manage Documentation Proposals[^2] | Allow actor to manage a proposal update an asset's documentation |
+
+[^1]: Only active if REST_API_AUTHORIZATION_ENABLED is true
+[^2]: DataHub Cloud only
+
+#### System Management
+
+| System Privileges | Description |
+|-------------------------------------|--------------------------------------------------------------------------------------------|
| Explain ElasticSearch Query API[^1] | Allow actor to use the Operations API explain endpoint. |
| Produce Platform Event API[^1] | Allow actor to produce Platform Events using the API. |
-| Create Entity | Allow actor to create an entity if it doesn't exist. |
-| Entity Exists | Allow actor to determine whether the entity exists. |
-| View Entity[^2] | Allow actor to view the entity in search results. |
-| Propose Tags[^2] | Allow actor to propose adding a tag to an asset. |
-| Propose Glossary Terms[^2] | Allow actor to propose adding a glossary term to an asset. |
-| Propose Documentation[^2] | Allow actor to propose updates to an asset's documentation. |
-| Manage Tag Proposals[^2] | Allow actor to manage a proposal to add a tag to an asset. |
-| Manage Glossary Term Proposals[^2] | Allow actor to manage a proposal to add a glossary term to an asset. |
-| Manage Documentation Proposals[^2] | Allow actor to manage a proposal update an asset's documentation |
-| Share Entity[^2] | Allow actor to share an entity with another DataHub Cloud instance. |
[^1]: Only active if REST_API_AUTHORIZATION_ENABLED is true
[^2]: DataHub Cloud only
-##### Specific entity-level privileges
+### Specific Entity-level Privileges
These privileges are not generalizable.
+#### Users & Groups
+
+| Entity | Privilege | Description |
+|--------------|-------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| Group | Edit Group Members | Allow actor to add and remove members to a group. |
+| Group | Manage Group Notification Settings[^2] | Allow actor to manage notification settings for a group. |
+| Group | Manage Group Subscriptions[^2] | Allow actor to manage subscriptions for a group. |
+| Group | Edit Contact Information | Allow actor to change the contact information such as email & chat handles. |
+| User | Edit Contact Information | Allow actor to change the contact information such as email & chat handles. |
+| User | Edit User Profile | Allow actor to change the user's profile including display name, bio, title, profile image, etc. |
+
+[^1]: Only active if REST_API_AUTHORIZATION_ENABLED is true
+[^2]: DataHub Cloud only
+
+#### Dataset
+
| Entity | Privilege | Description |
|--------------|-------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| Dataset | View Dataset Usage | Allow actor to access dataset usage information (includes usage statistics and queries). |
@@ -174,101 +312,22 @@ These privileges are not generalizable.
| Domain | Manage Data Products | Allow actor to create, edit, and delete Data Products within a Domain |
| GlossaryNode | Manage Direct Glossary Children | Allow actor to create and delete the direct children of this entity. |
| GlossaryNode | Manage All Glossary Children | Allow actor to create and delete everything underneath this entity. |
-| Group | Edit Group Members | Allow actor to add and remove members to a group. |
-| Group | Manage Group Notification Settings[^2] | Allow actor to manage notification settings for a group. |
-| Group | Manage Group Subscriptions[^2] | Allow actor to manage subscriptions for a group. |
-| Group | Edit Contact Information | Allow actor to change the contact information such as email & chat handles. |
-| User | Edit Contact Information | Allow actor to change the contact information such as email & chat handles. |
-| User | Edit User Profile | Allow actor to change the user's profile including display name, bio, title, profile image, etc. |
-
-#### Resources
-
-Resource filter defines the set of resources that the policy applies to is defined using a list of criteria. Each
-criterion defines a field type (like type, urn, domain), a list of field values to compare, and a
-condition (like EQUALS). It essentially checks whether the field of a certain resource matches any of the input values.
-Note, that if there are no criteria or resource is not set, policy is applied to ALL resources.
-
-For example, the following resource filter will apply the policy to datasets, charts, and dashboards under domain 1.
-
-```json
-{
- "resources": {
- "filter": {
- "criteria": [
- {
- "field": "TYPE",
- "condition": "EQUALS",
- "values": [
- "dataset",
- "chart",
- "dashboard"
- ]
- },
- {
- "field": "DOMAIN",
- "values": [
- "urn:li:domain:domain1"
- ],
- "condition": "EQUALS"
- }
- ]
- }
- }
-}
-```
-Where `resources` is inside the `info` aspect of a Policy.
-
-Supported fields are as follows
-
-| Field Type | Description | Example |
-|---------------|------------------------|-------------------------|
-| type | Type of the resource | dataset, chart, dataJob |
-| urn | Urn of the resource | urn:li:dataset:... |
-| domain | Domain of the resource | urn:li:domain:domainX |
-
-## Managing Policies
-
-Policies can be managed on the page **Settings > Permissions > Policies** page. The `Policies` tab will only
-be visible to those users having the `Manage Policies` privilege.
-
-Out of the box, DataHub is deployed with a set of pre-baked Policies. The set of default policies are created at deploy
-time and can be found inside the `policies.json` file within `metadata-service/war/src/main/resources/boot`. This set of policies serves the
-following purposes:
-
-1. Assigns immutable super-user privileges for the root `datahub` user account (Immutable)
-2. Assigns all Platform privileges for all Users by default (Editable)
-
-The reason for #1 is to prevent people from accidentally deleting all policies and getting locked out (`datahub` super user account can be a backup)
-The reason for #2 is to permit administrators to log in via OIDC or another means outside of the `datahub` root account
-when they are bootstrapping with DataHub. This way, those setting up DataHub can start managing policies without friction.
-Note that these privilege *can* and likely *should* be altered inside the **Policies** page of the UI.
-
-> Pro-Tip: To login using the `datahub` account, simply navigate to `/login` and enter `datahub`, `datahub`. Note that the password can be customized for your
-deployment by changing the `user.props` file within the `datahub-frontend` module. Notice that JaaS authentication must be enabled.
-
-## Configuration
-
-By default, the Policies feature is *enabled*. This means that the deployment will support creating, editing, removing, and
-most importantly enforcing fine-grained access policies.
-
-In some cases, these capabilities are not desirable. For example, if your company's users are already used to having free reign, you
-may want to keep it that way. Or perhaps it is only your Data Platform team who actively uses DataHub, in which case Policies may be overkill.
-For these scenarios, we've provided a back door to disable Policies in your deployment of DataHub. This will completely hide
-the policies management UI and by default will allow all actions on the platform. It will be as though
-each user has *all* privileges, both of the **Platform** & **Metadata** flavor.
-To disable Policies, you can simply set the `AUTH_POLICIES_ENABLED` environment variable for the `datahub-gms` service container
-to `false`. For example in your `docker/datahub-gms/docker.env`, you'd place
+[^1]: Only active if REST_API_AUTHORIZATION_ENABLED is true
+[^2]: DataHub Cloud only
-```
-AUTH_POLICIES_ENABLED=false
-```
+#### Misc
-### REST API Authorization
-
-Policies only affect REST APIs when the environment variable `REST_API_AUTHORIZATION` is set to `true` for GMS. Some policies only apply when this setting is enabled, marked above, and other Metadata and Platform policies apply to the APIs where relevant, also specified in the table above.
+| Entity | Privilege | Description |
+|--------------|-------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| Tag | Edit Tag Color | Allow actor to change the color of a Tag. |
+| Domain | Manage Data Products | Allow actor to create, edit, and delete Data Products within a Domain |
+| GlossaryNode | Manage Direct Glossary Children | Allow actor to create and delete the direct children of this entity. |
+| GlossaryNode | Manage All Glossary Children | Allow actor to create and delete everything underneath this entity. |
+[^1]: Only active if REST_API_AUTHORIZATION_ENABLED is true
+[^2]: DataHub Cloud only
## Coming Soon
@@ -278,7 +337,7 @@ The DataHub team is hard at work trying to improve the Policies feature. We are
Under consideration
-- Ability to define Metadata Policies against multiple reosurces scoped to particular "Containers" (e.g. A "schema", "database", or "collection")
+- Ability to define Metadata Policies against multiple resources scoped to particular "Containers" (e.g. A "schema", "database", or "collection")
## Feedback / Questions / Concerns
From 479f31d0f2368ab7376c419f4b365239da353d98 Mon Sep 17 00:00:00 2001
From: david-leifker <114954101+david-leifker@users.noreply.github.com>
Date: Fri, 9 Aug 2024 14:42:59 -0500
Subject: [PATCH 21/72] fix(openapi): fix openapi v2 and v3 docs update
(#11139)
---
docs/api/tutorials/structured-properties.md | 204 ++++++++++--------
.../controller/GenericEntitiesController.java | 23 +-
.../v2/controller/EntityController.java | 25 +++
.../v3/controller/EntityController.java | 27 +++
4 files changed, 169 insertions(+), 110 deletions(-)
diff --git a/docs/api/tutorials/structured-properties.md b/docs/api/tutorials/structured-properties.md
index 6f6c6541554d9..00e992f2bd0bb 100644
--- a/docs/api/tutorials/structured-properties.md
+++ b/docs/api/tutorials/structured-properties.md
@@ -158,29 +158,37 @@ curl -X 'POST' -v \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
+ "value": {
"qualifiedName": "io.acryl.privacy.retentionTime",
- "valueType": "urn:li:dataType:datahub.number",
- "description": "Retention Time is used to figure out how long to retain records in a dataset",
- "displayName": "Retention Time",
- "cardinality": "MULTIPLE",
- "entityTypes": [
- "urn:li:entityType:datahub.dataset",
- "urn:li:entityType:datahub.dataFlow"
- ],
- "allowedValues": [
- {
- "value": {"double": 30},
- "description": "30 days, usually reserved for datasets that are ephemeral and contain pii"
- },
- {
- "value": {"double": 60},
- "description": "Use this for datasets that drive monthly reporting but contain pii"
- },
- {
- "value": {"double": 365},
- "description": "Use this for non-sensitive data that can be retained for longer"
- }
- ]
+ "valueType": "urn:li:dataType:datahub.number",
+ "description": "Retention Time is used to figure out how long to retain records in a dataset",
+ "displayName": "Retention Time",
+ "cardinality": "MULTIPLE",
+ "entityTypes": [
+ "urn:li:entityType:datahub.dataset",
+ "urn:li:entityType:datahub.dataFlow"
+ ],
+ "allowedValues": [
+ {
+ "value": {
+ "double": 30
+ },
+ "description": "30 days, usually reserved for datasets that are ephemeral and contain pii"
+ },
+ {
+ "value": {
+ "double": 60
+ },
+ "description": "Use this for datasets that drive monthly reporting but contain pii"
+ },
+ {
+ "value": {
+ "double": 365
+ },
+ "description": "Use this for non-sensitive data that can be retained for longer"
+ }
+ ]
+ }
}' | jq
```
@@ -474,14 +482,16 @@ curl -X 'POST' -v \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
- "properties": [
- {
- "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime",
- "values": [
- {"double": 60.0}
- ]
- }
- ]
+ "value": {
+ "properties": [
+ {
+ "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime",
+ "values": [
+ {"double": 60.0}
+ ]
+ }
+ ]
+ }
}' | jq
```
Example Response:
@@ -627,23 +637,25 @@ curl -X 'POST' -v \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
- "qualifiedName": "io.acryl.privacy.retentionTime02",
- "displayName": "Retention Time 02",
- "valueType": "urn:li:dataType:datahub.string",
- "allowedValues": [
- {
- "value": {"string": "foo2"},
- "description": "test foo2 value"
- },
- {
- "value": {"string": "bar2"},
- "description": "test bar2 value"
- }
- ],
- "cardinality": "SINGLE",
- "entityTypes": [
- "urn:li:entityType:datahub.dataset"
- ]
+ "value": {
+ "qualifiedName": "io.acryl.privacy.retentionTime02",
+ "displayName": "Retention Time 02",
+ "valueType": "urn:li:dataType:datahub.string",
+ "allowedValues": [
+ {
+ "value": {"string": "foo2"},
+ "description": "test foo2 value"
+ },
+ {
+ "value": {"string": "bar2"},
+ "description": "test bar2 value"
+ }
+ ],
+ "cardinality": "SINGLE",
+ "entityTypes": [
+ "urn:li:entityType:datahub.dataset"
+ ]
+ }
}' | jq
```
@@ -686,24 +698,26 @@ Specically, this will set `io.acryl.privacy.retentionTime` as `60.0` and `io.acr
```shell
curl -X 'POST' -v \
- 'http://localhost:8080/openapi/v3/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2CSampleHiveDataset%2CPROD%29/structuredProperties' \
+ 'http://localhost:8080/openapi/v3/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2CSampleHiveDataset%2CPROD%29/structuredProperties?createIfNotExists=false' \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
- "properties": [
- {
- "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime",
- "values": [
- {"double": 60.0}
- ]
- },
- {
- "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime02",
- "values": [
- {"string": "bar2"}
- ]
- }
- ]
+ "value": {
+ "properties": [
+ {
+ "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime",
+ "values": [
+ {"double": 60.0}
+ ]
+ },
+ {
+ "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime02",
+ "values": [
+ {"string": "bar2"}
+ ]
+ }
+ ]
+ }
}' | jq
```
@@ -1111,7 +1125,9 @@ curl -X 'POST' \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
-"removed": true
+ "value": {
+ "removed": true
+ }
}' | jq
```
@@ -1132,11 +1148,13 @@ If you want to **remove the soft delete**, you can do so by either hard deleting
```shell
curl -X 'POST' \
- 'http://localhost:8080/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.retentionTime/status?systemMetadata=false' \
+ 'http://localhost:8080/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.retentionTime/status?systemMetadata=false&createIfNotExists=false' \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
-"removed": false
+ "value": {
+ "removed": true
+ }
}' | jq
```
@@ -1271,34 +1289,42 @@ Change the cardinality to `SINGLE` and add a `version`.
```shell
curl -X 'POST' -v \
- 'http://localhost:8080/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.retentionTime/propertyDefinition' \
+ 'http://localhost:8080/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.retentionTime/propertyDefinition?createIfNotExists=false' \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
+ "value": {
"qualifiedName": "io.acryl.privacy.retentionTime",
- "valueType": "urn:li:dataType:datahub.number",
- "description": "Retention Time is used to figure out how long to retain records in a dataset",
- "displayName": "Retention Time",
- "cardinality": "SINGLE",
- "version": "20240614080000",
- "entityTypes": [
- "urn:li:entityType:datahub.dataset",
- "urn:li:entityType:datahub.dataFlow"
- ],
- "allowedValues": [
- {
- "value": {"double": 30},
- "description": "30 days, usually reserved for datasets that are ephemeral and contain pii"
- },
- {
- "value": {"double": 60},
- "description": "Use this for datasets that drive monthly reporting but contain pii"
- },
- {
- "value": {"double": 365},
- "description": "Use this for non-sensitive data that can be retained for longer"
- }
- ]
+ "valueType": "urn:li:dataType:datahub.number",
+ "description": "Retention Time is used to figure out how long to retain records in a dataset",
+ "displayName": "Retention Time",
+ "cardinality": "SINGLE",
+ "version": "20240614080000",
+ "entityTypes": [
+ "urn:li:entityType:datahub.dataset",
+ "urn:li:entityType:datahub.dataFlow"
+ ],
+ "allowedValues": [
+ {
+ "value": {
+ "double": 30
+ },
+ "description": "30 days, usually reserved for datasets that are ephemeral and contain pii"
+ },
+ {
+ "value": {
+ "double": 60
+ },
+ "description": "Use this for datasets that drive monthly reporting but contain pii"
+ },
+ {
+ "value": {
+ "double": 365
+ },
+ "description": "Use this for non-sensitive data that can be retained for longer"
+ }
+ ]
+ }
}' | jq
```
diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java
index de5d2ae1118d4..f415a4f47c9dc 100644
--- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java
+++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java
@@ -13,14 +13,11 @@
import com.datahub.authorization.AuthorizerChain;
import com.datahub.util.RecordUtils;
import com.fasterxml.jackson.core.JsonProcessingException;
-import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ImmutableSet;
import com.linkedin.common.urn.Urn;
-import com.linkedin.data.ByteString;
import com.linkedin.data.template.RecordTemplate;
import com.linkedin.entity.EnvelopedAspect;
-import com.linkedin.events.metadata.ChangeType;
import com.linkedin.metadata.aspect.AspectRetriever;
import com.linkedin.metadata.aspect.batch.AspectsBatch;
import com.linkedin.metadata.aspect.batch.ChangeMCP;
@@ -41,7 +38,6 @@
import com.linkedin.metadata.search.SearchEntityArray;
import com.linkedin.metadata.search.SearchService;
import com.linkedin.metadata.utils.AuditStampUtils;
-import com.linkedin.metadata.utils.GenericRecordUtils;
import com.linkedin.metadata.utils.SearchUtil;
import com.linkedin.mxe.SystemMetadata;
import com.linkedin.util.Pair;
@@ -57,7 +53,6 @@
import jakarta.servlet.http.HttpServletRequest;
import java.lang.reflect.InvocationTargetException;
import java.net.URISyntaxException;
-import java.nio.charset.StandardCharsets;
import java.util.*;
import java.util.stream.Collectors;
import javax.annotation.Nonnull;
@@ -726,28 +721,14 @@ protected RecordTemplate toRecordTemplate(
aspectSpec.getDataTemplateClass(), envelopedAspect.getValue().data());
}
- protected ChangeMCP toUpsertItem(
+ protected abstract ChangeMCP toUpsertItem(
@Nonnull AspectRetriever aspectRetriever,
Urn entityUrn,
AspectSpec aspectSpec,
Boolean createIfNotExists,
String jsonAspect,
Actor actor)
- throws JsonProcessingException {
- JsonNode jsonNode = objectMapper.readTree(jsonAspect);
- String aspectJson = jsonNode.get("value").toString();
- return ChangeItemImpl.builder()
- .urn(entityUrn)
- .aspectName(aspectSpec.getName())
- .changeType(Boolean.TRUE.equals(createIfNotExists) ? ChangeType.CREATE : ChangeType.UPSERT)
- .auditStamp(AuditStampUtils.createAuditStamp(actor.toUrnStr()))
- .recordTemplate(
- GenericRecordUtils.deserializeAspect(
- ByteString.copyString(aspectJson, StandardCharsets.UTF_8),
- GenericRecordUtils.JSON,
- aspectSpec))
- .build(aspectRetriever);
- }
+ throws URISyntaxException, JsonProcessingException;
protected ChangeMCP toUpsertItem(
@Nonnull AspectRetriever aspectRetriever,
diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java
index 54a7724cadd34..1207eb331b795 100644
--- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java
+++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java
@@ -13,8 +13,11 @@
import com.linkedin.data.ByteString;
import com.linkedin.data.template.RecordTemplate;
import com.linkedin.entity.EnvelopedAspect;
+import com.linkedin.events.metadata.ChangeType;
+import com.linkedin.metadata.aspect.AspectRetriever;
import com.linkedin.metadata.aspect.batch.AspectsBatch;
import com.linkedin.metadata.aspect.batch.BatchItem;
+import com.linkedin.metadata.aspect.batch.ChangeMCP;
import com.linkedin.metadata.entity.EntityApiUtils;
import com.linkedin.metadata.entity.IngestResult;
import com.linkedin.metadata.entity.UpdateAspectResult;
@@ -260,4 +263,26 @@ protected List buildEntityList(
}
return responseList;
}
+
+ @Override
+ protected ChangeMCP toUpsertItem(
+ @Nonnull AspectRetriever aspectRetriever,
+ Urn entityUrn,
+ AspectSpec aspectSpec,
+ Boolean createIfNotExists,
+ String jsonAspect,
+ Actor actor)
+ throws URISyntaxException {
+ return ChangeItemImpl.builder()
+ .urn(entityUrn)
+ .aspectName(aspectSpec.getName())
+ .changeType(Boolean.TRUE.equals(createIfNotExists) ? ChangeType.CREATE : ChangeType.UPSERT)
+ .auditStamp(AuditStampUtils.createAuditStamp(actor.toUrnStr()))
+ .recordTemplate(
+ GenericRecordUtils.deserializeAspect(
+ ByteString.copyString(jsonAspect, StandardCharsets.UTF_8),
+ GenericRecordUtils.JSON,
+ aspectSpec))
+ .build(aspectRetriever);
+ }
}
diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java
index a0478c9af1609..fbc9bf2956cfd 100644
--- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java
+++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java
@@ -14,8 +14,11 @@
import com.linkedin.common.urn.Urn;
import com.linkedin.data.ByteString;
import com.linkedin.entity.EnvelopedAspect;
+import com.linkedin.events.metadata.ChangeType;
+import com.linkedin.metadata.aspect.AspectRetriever;
import com.linkedin.metadata.aspect.batch.AspectsBatch;
import com.linkedin.metadata.aspect.batch.BatchItem;
+import com.linkedin.metadata.aspect.batch.ChangeMCP;
import com.linkedin.metadata.entity.EntityApiUtils;
import com.linkedin.metadata.entity.IngestResult;
import com.linkedin.metadata.entity.UpdateAspectResult;
@@ -348,4 +351,28 @@ protected AspectsBatch toMCPBatch(
.retrieverContext(opContext.getRetrieverContext().get())
.build();
}
+
+ @Override
+ protected ChangeMCP toUpsertItem(
+ @Nonnull AspectRetriever aspectRetriever,
+ Urn entityUrn,
+ AspectSpec aspectSpec,
+ Boolean createIfNotExists,
+ String jsonAspect,
+ Actor actor)
+ throws JsonProcessingException {
+ JsonNode jsonNode = objectMapper.readTree(jsonAspect);
+ String aspectJson = jsonNode.get("value").toString();
+ return ChangeItemImpl.builder()
+ .urn(entityUrn)
+ .aspectName(aspectSpec.getName())
+ .changeType(Boolean.TRUE.equals(createIfNotExists) ? ChangeType.CREATE : ChangeType.UPSERT)
+ .auditStamp(AuditStampUtils.createAuditStamp(actor.toUrnStr()))
+ .recordTemplate(
+ GenericRecordUtils.deserializeAspect(
+ ByteString.copyString(aspectJson, StandardCharsets.UTF_8),
+ GenericRecordUtils.JSON,
+ aspectSpec))
+ .build(aspectRetriever);
+ }
}
From 946b9f37450a51dd12670f4b383d6970767c4129 Mon Sep 17 00:00:00 2001
From: RyanHolstien
Date: Fri, 9 Aug 2024 14:55:35 -0500
Subject: [PATCH 22/72] feat(auth): grant type and acr values custom oidc
parameters support (#11116)
---
.../app/auth/sso/oidc/OidcConfigs.java | 12 +++++++++++-
.../app/auth/sso/oidc/OidcProvider.java | 14 +++++++++++++-
datahub-frontend/conf/application.conf | 2 ++
3 files changed, 26 insertions(+), 2 deletions(-)
diff --git a/datahub-frontend/app/auth/sso/oidc/OidcConfigs.java b/datahub-frontend/app/auth/sso/oidc/OidcConfigs.java
index 753edaf89d988..080ca236630bf 100644
--- a/datahub-frontend/app/auth/sso/oidc/OidcConfigs.java
+++ b/datahub-frontend/app/auth/sso/oidc/OidcConfigs.java
@@ -41,6 +41,8 @@ public class OidcConfigs extends SsoConfigs {
public static final String OIDC_EXTRACT_JWT_ACCESS_TOKEN_CLAIMS =
"auth.oidc.extractJwtAccessTokenClaims";
public static final String OIDC_PREFERRED_JWS_ALGORITHM = "auth.oidc.preferredJwsAlgorithm";
+ public static final String OIDC_GRANT_TYPE = "auth.oidc.grantType";
+ public static final String OIDC_ACR_VALUES = "auth.oidc.acrValues";
/** Default values */
private static final String DEFAULT_OIDC_USERNAME_CLAIM = "email";
@@ -75,7 +77,9 @@ public class OidcConfigs extends SsoConfigs {
private final Optional customParamResource;
private final String readTimeout;
private final Optional extractJwtAccessTokenClaims;
- private Optional preferredJwsAlgorithm;
+ private final Optional preferredJwsAlgorithm;
+ private final Optional grantType;
+ private final Optional acrValues;
public OidcConfigs(Builder builder) {
super(builder);
@@ -98,6 +102,8 @@ public OidcConfigs(Builder builder) {
this.readTimeout = builder.readTimeout;
this.extractJwtAccessTokenClaims = builder.extractJwtAccessTokenClaims;
this.preferredJwsAlgorithm = builder.preferredJwsAlgorithm;
+ this.acrValues = builder.acrValues;
+ this.grantType = builder.grantType;
}
public static class Builder extends SsoConfigs.Builder {
@@ -123,6 +129,8 @@ public static class Builder extends SsoConfigs.Builder {
private String readTimeout = DEFAULT_OIDC_READ_TIMEOUT;
private Optional extractJwtAccessTokenClaims = Optional.empty();
private Optional preferredJwsAlgorithm = Optional.empty();
+ private Optional grantType = Optional.empty();
+ private Optional acrValues = Optional.empty();
public Builder from(final com.typesafe.config.Config configs) {
super.from(configs);
@@ -169,6 +177,8 @@ public Builder from(final com.typesafe.config.Config configs) {
getOptional(configs, OIDC_EXTRACT_JWT_ACCESS_TOKEN_CLAIMS).map(Boolean::parseBoolean);
preferredJwsAlgorithm =
Optional.ofNullable(getOptional(configs, OIDC_PREFERRED_JWS_ALGORITHM, null));
+ grantType = Optional.ofNullable(getOptional(configs, OIDC_GRANT_TYPE, null));
+ acrValues = Optional.ofNullable(getOptional(configs, OIDC_ACR_VALUES, null));
return this;
}
diff --git a/datahub-frontend/app/auth/sso/oidc/OidcProvider.java b/datahub-frontend/app/auth/sso/oidc/OidcProvider.java
index 39a65a46cbf91..a8a3205e8299c 100644
--- a/datahub-frontend/app/auth/sso/oidc/OidcProvider.java
+++ b/datahub-frontend/app/auth/sso/oidc/OidcProvider.java
@@ -3,6 +3,8 @@
import auth.sso.SsoProvider;
import auth.sso.oidc.custom.CustomOidcClient;
import com.google.common.collect.ImmutableMap;
+import java.util.HashMap;
+import java.util.Map;
import lombok.extern.slf4j.Slf4j;
import org.pac4j.core.client.Client;
import org.pac4j.core.http.callback.PathParameterCallbackUrlResolver;
@@ -64,9 +66,19 @@ private Client createPac4jClient() {
_oidcConfigs.getResponseType().ifPresent(oidcConfiguration::setResponseType);
_oidcConfigs.getResponseMode().ifPresent(oidcConfiguration::setResponseMode);
_oidcConfigs.getUseNonce().ifPresent(oidcConfiguration::setUseNonce);
+ Map customParamsMap = new HashMap<>();
_oidcConfigs
.getCustomParamResource()
- .ifPresent(value -> oidcConfiguration.setCustomParams(ImmutableMap.of("resource", value)));
+ .ifPresent(value -> customParamsMap.put("resource", value));
+ _oidcConfigs
+ .getGrantType()
+ .ifPresent(value -> customParamsMap.put("grant_type", value));
+ _oidcConfigs
+ .getAcrValues()
+ .ifPresent(value -> customParamsMap.put("acr_values", value));
+ if (!customParamsMap.isEmpty()) {
+ oidcConfiguration.setCustomParams(customParamsMap);
+ }
_oidcConfigs
.getPreferredJwsAlgorithm()
.ifPresent(
diff --git a/datahub-frontend/conf/application.conf b/datahub-frontend/conf/application.conf
index dc243ecadafd8..63ff2c9166fbc 100644
--- a/datahub-frontend/conf/application.conf
+++ b/datahub-frontend/conf/application.conf
@@ -186,6 +186,8 @@ auth.oidc.customParam.resource = ${?AUTH_OIDC_CUSTOM_PARAM_RESOURCE}
auth.oidc.readTimeout = ${?AUTH_OIDC_READ_TIMEOUT}
auth.oidc.extractJwtAccessTokenClaims = ${?AUTH_OIDC_EXTRACT_JWT_ACCESS_TOKEN_CLAIMS} # Whether to extract claims from JWT access token. Defaults to false.
auth.oidc.preferredJwsAlgorithm = ${?AUTH_OIDC_PREFERRED_JWS_ALGORITHM} # Which jws algorithm to use
+auth.oidc.acrValues = ${?AUTH_OIDC_ACR_VALUES}
+auth.oidc.grantType = ${?AUTH_OIDC_GRANT_TYPE}
#
# By default, the callback URL that should be registered with the identity provider is computed as {$baseUrl}/callback/oidc.
From 4d2af40465bc26e432285999c1250f6966997124 Mon Sep 17 00:00:00 2001
From: RyanHolstien
Date: Fri, 9 Aug 2024 14:56:32 -0500
Subject: [PATCH 23/72] fix(mutator): mutator hook fixes (#11140)
---
.../models/registry/ConfigEntityRegistry.java | 2 +-
.../registry/SnapshotEntityRegistry.java | 21 +++++++
.../metadata/aspect/plugins/PluginsTest.java | 36 ++++++++---
.../java/com/datahub/util/RecordUtils.java | 12 ++++
.../entity/ebean/batch/AspectsBatchImpl.java | 60 +++++++++++++++----
.../entityregistry/EntityRegistryFactory.java | 18 +++++-
6 files changed, 127 insertions(+), 22 deletions(-)
diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/ConfigEntityRegistry.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/ConfigEntityRegistry.java
index 4238c333615ec..8dd642f63dd97 100644
--- a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/ConfigEntityRegistry.java
+++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/ConfigEntityRegistry.java
@@ -52,7 +52,7 @@ public class ConfigEntityRegistry implements EntityRegistry {
private final DataSchemaFactory dataSchemaFactory;
@Getter private final PluginFactory pluginFactory;
- @Nullable
+ @Getter @Nullable
private BiFunction, PluginFactory> pluginFactoryProvider;
private final Map entityNameToSpec;
diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/SnapshotEntityRegistry.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/SnapshotEntityRegistry.java
index c60f89c510cd7..16df2d452a619 100644
--- a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/SnapshotEntityRegistry.java
+++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/SnapshotEntityRegistry.java
@@ -22,6 +22,8 @@
import com.linkedin.metadata.aspect.patch.template.dataset.UpstreamLineageTemplate;
import com.linkedin.metadata.aspect.patch.template.form.FormInfoTemplate;
import com.linkedin.metadata.aspect.patch.template.structuredproperty.StructuredPropertyDefinitionTemplate;
+import com.linkedin.metadata.aspect.plugins.PluginFactory;
+import com.linkedin.metadata.aspect.plugins.config.PluginConfiguration;
import com.linkedin.metadata.models.AspectSpec;
import com.linkedin.metadata.models.DefaultEntitySpec;
import com.linkedin.metadata.models.EntitySpec;
@@ -32,8 +34,11 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.function.BiFunction;
import java.util.stream.Collectors;
import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
+import lombok.Getter;
/**
* Implementation of {@link EntityRegistry} that builds {@link DefaultEntitySpec} objects from the a
@@ -46,6 +51,9 @@ public class SnapshotEntityRegistry implements EntityRegistry {
private final AspectTemplateEngine _aspectTemplateEngine;
private final Map _aspectNameToSpec;
+ @Getter @Nullable
+ private BiFunction, PluginFactory> pluginFactoryProvider;
+
private static final SnapshotEntityRegistry INSTANCE = new SnapshotEntityRegistry();
public SnapshotEntityRegistry() {
@@ -56,6 +64,19 @@ public SnapshotEntityRegistry() {
entitySpecs = new ArrayList<>(entityNameToSpec.values());
_aspectNameToSpec = populateAspectMap(entitySpecs);
_aspectTemplateEngine = populateTemplateEngine(_aspectNameToSpec);
+ pluginFactoryProvider = null;
+ }
+
+ public SnapshotEntityRegistry(
+ BiFunction, PluginFactory> pluginFactoryProvider) {
+ entityNameToSpec =
+ new EntitySpecBuilder()
+ .buildEntitySpecs(new Snapshot().schema()).stream()
+ .collect(Collectors.toMap(spec -> spec.getName().toLowerCase(), spec -> spec));
+ entitySpecs = new ArrayList<>(entityNameToSpec.values());
+ _aspectNameToSpec = populateAspectMap(entitySpecs);
+ _aspectTemplateEngine = populateTemplateEngine(_aspectNameToSpec);
+ this.pluginFactoryProvider = pluginFactoryProvider;
}
public SnapshotEntityRegistry(UnionTemplate snapshot) {
diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/PluginsTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/PluginsTest.java
index cecf21849f3aa..b98df05d721dd 100644
--- a/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/PluginsTest.java
+++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/PluginsTest.java
@@ -6,6 +6,7 @@
import com.datahub.test.TestEntityProfile;
import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor;
import com.linkedin.events.metadata.ChangeType;
+import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig;
import com.linkedin.metadata.models.EntitySpec;
import com.linkedin.metadata.models.EventSpec;
import com.linkedin.metadata.models.registry.ConfigEntityRegistry;
@@ -262,23 +263,42 @@ public void testUnloadedMerge() throws EntityRegistryException {
mergedEntityRegistry.apply(configEntityRegistry2);
assertEquals(
- mergedEntityRegistry.getAllAspectPayloadValidators().stream()
- .filter(p -> p.getConfig().getSupportedOperations().contains("DELETE"))
+ mergedEntityRegistry
+ .getPluginFactory()
+ .getPluginConfiguration()
+ .getAspectPayloadValidators()
+ .stream()
+ .filter(AspectPluginConfig::isEnabled)
+ .filter(p -> p.getSupportedOperations().contains("DELETE"))
.count(),
1);
+
assertEquals(
- mergedEntityRegistry.getAllMutationHooks().stream()
- .filter(p -> p.getConfig().getSupportedOperations().contains("DELETE"))
+ mergedEntityRegistry.getPluginFactory().getPluginConfiguration().getMutationHooks().stream()
+ .filter(AspectPluginConfig::isEnabled)
+ .filter(p -> p.getSupportedOperations().contains("DELETE"))
.count(),
1);
+
assertEquals(
- mergedEntityRegistry.getAllMCLSideEffects().stream()
- .filter(p -> p.getConfig().getSupportedOperations().contains("DELETE"))
+ mergedEntityRegistry
+ .getPluginFactory()
+ .getPluginConfiguration()
+ .getMclSideEffects()
+ .stream()
+ .filter(AspectPluginConfig::isEnabled)
+ .filter(p -> p.getSupportedOperations().contains("DELETE"))
.count(),
1);
+
assertEquals(
- mergedEntityRegistry.getAllMCPSideEffects().stream()
- .filter(p -> p.getConfig().getSupportedOperations().contains("DELETE"))
+ mergedEntityRegistry
+ .getPluginFactory()
+ .getPluginConfiguration()
+ .getMcpSideEffects()
+ .stream()
+ .filter(AspectPluginConfig::isEnabled)
+ .filter(p -> p.getSupportedOperations().contains("DELETE"))
.count(),
1);
}
diff --git a/li-utils/src/main/java/com/datahub/util/RecordUtils.java b/li-utils/src/main/java/com/datahub/util/RecordUtils.java
index 8183ecc21ee27..2955943919e3b 100644
--- a/li-utils/src/main/java/com/datahub/util/RecordUtils.java
+++ b/li-utils/src/main/java/com/datahub/util/RecordUtils.java
@@ -99,6 +99,18 @@ public static T toRecordTemplate(
return toRecordTemplate(type, dataMap);
}
+ @Nonnull
+ public static DataMap toDataMap(@Nonnull String jsonString) {
+ DataMap dataMap;
+ try {
+ dataMap = DATA_TEMPLATE_CODEC.stringToMap(jsonString);
+ } catch (IOException e) {
+ throw new ModelConversionException("Failed to deserialize DataMap: " + jsonString);
+ }
+
+ return dataMap;
+ }
+
/**
* Creates a {@link RecordTemplate} object from a {@link DataMap}.
*
diff --git a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java
index 7a1af12272ac5..0808c29e8ea89 100644
--- a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java
+++ b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java
@@ -9,6 +9,7 @@
import com.linkedin.metadata.aspect.batch.BatchItem;
import com.linkedin.metadata.aspect.batch.ChangeMCP;
import com.linkedin.metadata.aspect.batch.MCPItem;
+import com.linkedin.metadata.aspect.plugins.hooks.MutationHook;
import com.linkedin.metadata.aspect.plugins.validation.ValidationExceptionCollection;
import com.linkedin.mxe.MetadataChangeProposal;
import com.linkedin.util.Pair;
@@ -47,7 +48,7 @@ public Pair