From 52e98e47e6facc9890909f7e8cd9da6e1fada55f Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 21 Nov 2024 00:19:41 -0800 Subject: [PATCH] chore(ingest): always use urn creation helpers (#11911) --- metadata-ingestion/scripts/modeldocgen.py | 10 +++++++--- .../src/datahub/ingestion/source/sql/sql_common.py | 5 +++-- metadata-ingestion/tests/unit/test_sql_common.py | 12 ++++++++---- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/metadata-ingestion/scripts/modeldocgen.py b/metadata-ingestion/scripts/modeldocgen.py index ee5f06cb801baa..998947e5caa954 100644 --- a/metadata-ingestion/scripts/modeldocgen.py +++ b/metadata-ingestion/scripts/modeldocgen.py @@ -14,7 +14,11 @@ import click from datahub.configuration.common import ConfigEnum, PermissiveConfigModel -from datahub.emitter.mce_builder import make_data_platform_urn, make_dataset_urn +from datahub.emitter.mce_builder import ( + make_data_platform_urn, + make_dataset_urn, + make_schema_field_urn, +) from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.rest_emitter import DatahubRestEmitter from datahub.ingestion.api.common import PipelineContext, RecordEnvelope @@ -442,10 +446,10 @@ def strip_types(field_path: str) -> str: name=relnship_name, foreignDataset=foreign_dataset_urn, foreignFields=[ - f"urn:li:schemaField:({foreign_dataset_urn}, urn)" + make_schema_field_urn(foreign_dataset_urn, "urn") ], sourceFields=[ - f"urn:li:schemaField:({source_dataset_urn},{f_field.fieldPath})" + make_schema_field_urn(source_dataset_urn, f_field.fieldPath) ], ) foreign_keys.append(fkey) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py index 4352ab2f987e91..e83cc39aa94bb5 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py @@ -33,6 +33,7 @@ make_data_platform_urn, make_dataplatform_instance_urn, make_dataset_urn_with_platform_instance, + make_schema_field_urn, make_tag_urn, ) from datahub.emitter.mcp import MetadataChangeProposalWrapper @@ -676,7 +677,7 @@ def get_foreign_key_metadata( ) source_fields = [ - f"urn:li:schemaField:({dataset_urn},{f})" + make_schema_field_urn(dataset_urn, f) for f in fk_dict["constrained_columns"] ] foreign_dataset = make_dataset_urn_with_platform_instance( @@ -686,7 +687,7 @@ def get_foreign_key_metadata( env=self.config.env, ) foreign_fields = [ - f"urn:li:schemaField:({foreign_dataset},{f})" + make_schema_field_urn(foreign_dataset, f) for f in fk_dict["referred_columns"] ] diff --git a/metadata-ingestion/tests/unit/test_sql_common.py b/metadata-ingestion/tests/unit/test_sql_common.py index a98bf641711220..cfb8f55bd977f7 100644 --- a/metadata-ingestion/tests/unit/test_sql_common.py +++ b/metadata-ingestion/tests/unit/test_sql_common.py @@ -38,7 +38,7 @@ def test_generate_foreign_key(): "referred_columns": ["test_referred_column"], # type: ignore } foreign_key = source.get_foreign_key_metadata( - dataset_urn="test_urn", + dataset_urn="urn:li:dataset:(urn:li:dataPlatform:TEST,test_schema.base_urn,PROD)", schema="test_schema", fk_dict=fk_dict, inspector=mock.Mock(), @@ -48,7 +48,9 @@ def test_generate_foreign_key(): assert [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:TEST,test_referred_schema.test_table,PROD),test_referred_column)" ] == foreign_key.foreignFields - assert ["urn:li:schemaField:(test_urn,test_column)"] == foreign_key.sourceFields + assert [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:TEST,test_schema.base_urn,PROD),test_column)" + ] == foreign_key.sourceFields def test_use_source_schema_for_foreign_key_if_not_specified(): @@ -60,7 +62,7 @@ def test_use_source_schema_for_foreign_key_if_not_specified(): "referred_columns": ["test_referred_column"], # type: ignore } foreign_key = source.get_foreign_key_metadata( - dataset_urn="test_urn", + dataset_urn="urn:li:dataset:(urn:li:dataPlatform:TEST,test_schema.base_urn,PROD)", schema="test_schema", fk_dict=fk_dict, inspector=mock.Mock(), @@ -70,7 +72,9 @@ def test_use_source_schema_for_foreign_key_if_not_specified(): assert [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:TEST,test_schema.test_table,PROD),test_referred_column)" ] == foreign_key.foreignFields - assert ["urn:li:schemaField:(test_urn,test_column)"] == foreign_key.sourceFields + assert [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:TEST,test_schema.base_urn,PROD),test_column)" + ] == foreign_key.sourceFields PLATFORM_FROM_SQLALCHEMY_URI_TEST_CASES: Dict[str, str] = {