From ebbadf65ad836e550af52566d7036a791ceafbfa Mon Sep 17 00:00:00 2001 From: Andrew Sikowitz Date: Tue, 14 Jan 2025 01:50:14 -0800 Subject: [PATCH 1/8] feat(ingest/snowflake): Support ingesting snowflake tags as structured properties (#12285) --- .../create_structured_property.py | 6 +- .../update_structured_property.py | 2 +- .../src/datahub/emitter/mcp_builder.py | 27 + .../source/snowflake/snowflake_config.py | 13 + .../source/snowflake/snowflake_schema.py | 7 +- .../source/snowflake/snowflake_schema_gen.py | 132 +- .../source/snowflake/snowflake_tag.py | 18 +- .../datahub/ingestion/source/sql/sql_utils.py | 5 + ...nowflake_structured_properties_golden.json | 4418 +++++++++++++++++ .../integration/snowflake/test_snowflake.py | 62 + .../snowflake/test_snowflake_tag.py | 48 + 11 files changed, 4708 insertions(+), 30 deletions(-) create mode 100644 metadata-ingestion/tests/integration/snowflake/snowflake_structured_properties_golden.json diff --git a/metadata-ingestion/examples/structured_properties/create_structured_property.py b/metadata-ingestion/examples/structured_properties/create_structured_property.py index e66ac3aec41221..64bc0a67812775 100644 --- a/metadata-ingestion/examples/structured_properties/create_structured_property.py +++ b/metadata-ingestion/examples/structured_properties/create_structured_property.py @@ -17,7 +17,7 @@ rest_emitter = DatahubRestEmitter(gms_server="http://localhost:8080") # first, let's make an open ended structured property that allows one text value -text_property_urn = StructuredPropertyUrn("openTextProperty") +text_property_urn = StructuredPropertyUrn("io.acryl.openTextProperty") text_property_definition = StructuredPropertyDefinitionClass( qualifiedName="io.acryl.openTextProperty", displayName="Open Text Property", @@ -39,7 +39,7 @@ # next, let's make a property that allows for multiple datahub entity urns as values # This example property could be used to reference other users or groups in datahub -urn_property_urn = StructuredPropertyUrn("dataSteward") +urn_property_urn = StructuredPropertyUrn("io.acryl.dataManagement.dataSteward") urn_property_definition = StructuredPropertyDefinitionClass( qualifiedName="io.acryl.dataManagement.dataSteward", displayName="Data Steward", @@ -63,7 +63,7 @@ rest_emitter.emit(event_prop_2) # finally, let's make a single select number property with a few allowed options -number_property_urn = StructuredPropertyUrn("replicationSLA") +number_property_urn = StructuredPropertyUrn("io.acryl.dataManagement.replicationSLA") number_property_definition = StructuredPropertyDefinitionClass( qualifiedName="io.acryl.dataManagement.replicationSLA", displayName="Retention Time", diff --git a/metadata-ingestion/examples/structured_properties/update_structured_property.py b/metadata-ingestion/examples/structured_properties/update_structured_property.py index 9b80ebc236d8b6..6f4b8b3be20d15 100644 --- a/metadata-ingestion/examples/structured_properties/update_structured_property.py +++ b/metadata-ingestion/examples/structured_properties/update_structured_property.py @@ -30,7 +30,7 @@ def get_emitter() -> Union[DataHubRestEmitter, DatahubKafkaEmitter]: # input your unique structured property ID -property_urn = StructuredPropertyUrn("dataSteward") +property_urn = StructuredPropertyUrn("io.acryl.dataManagement.dataSteward") with get_emitter() as emitter: for patch_mcp in ( diff --git a/metadata-ingestion/src/datahub/emitter/mcp_builder.py b/metadata-ingestion/src/datahub/emitter/mcp_builder.py index c8eb62a2e1de23..581f903d0eef0d 100644 --- a/metadata-ingestion/src/datahub/emitter/mcp_builder.py +++ b/metadata-ingestion/src/datahub/emitter/mcp_builder.py @@ -31,9 +31,12 @@ OwnershipClass, OwnershipTypeClass, StatusClass, + StructuredPropertiesClass, + StructuredPropertyValueAssignmentClass, SubTypesClass, TagAssociationClass, ) +from datahub.metadata.urns import StructuredPropertyUrn # In https://github.com/datahub-project/datahub/pull/11214, we added a # new env field to container properties. However, populating this field @@ -187,12 +190,31 @@ def add_tags_to_entity_wu( ).as_workunit() +def add_structured_properties_to_entity_wu( + entity_urn: str, structured_properties: Dict[StructuredPropertyUrn, str] +) -> Iterable[MetadataWorkUnit]: + aspect = StructuredPropertiesClass( + properties=[ + StructuredPropertyValueAssignmentClass( + propertyUrn=urn.urn(), + values=[value], + ) + for urn, value in structured_properties.items() + ] + ) + yield MetadataChangeProposalWrapper( + entityUrn=entity_urn, + aspect=aspect, + ).as_workunit() + + def gen_containers( container_key: KeyType, name: str, sub_types: List[str], parent_container_key: Optional[ContainerKey] = None, extra_properties: Optional[Dict[str, str]] = None, + structured_properties: Optional[Dict[StructuredPropertyUrn, str]] = None, domain_urn: Optional[str] = None, description: Optional[str] = None, owner_urn: Optional[str] = None, @@ -282,6 +304,11 @@ def gen_containers( tags=sorted(tags), ) + if structured_properties: + yield from add_structured_properties_to_entity_wu( + entity_urn=container_urn, structured_properties=structured_properties + ) + def add_dataset_to_container( container_key: KeyType, dataset_urn: str diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py index 2d61ce59857778..b14e51a982082c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py @@ -244,6 +244,11 @@ class SnowflakeV2Config( description="""Optional. Allowed values are `without_lineage`, `with_lineage`, and `skip` (default). `without_lineage` only extracts tags that have been applied directly to the given entity. `with_lineage` extracts both directly applied and propagated tags, but will be significantly slower. See the [Snowflake documentation](https://docs.snowflake.com/en/user-guide/object-tagging.html#tag-lineage) for information about tag lineage/propagation. """, ) + extract_tags_as_structured_properties: bool = Field( + default=False, + description="If enabled along with `extract_tags`, extracts snowflake's key-value tags as DataHub structured properties instead of DataHub tags.", + ) + include_external_url: bool = Field( default=True, description="Whether to populate Snowsight url for Snowflake Objects", @@ -263,6 +268,14 @@ class SnowflakeV2Config( description="List of regex patterns for tags to include in ingestion. Only used if `extract_tags` is enabled.", ) + structured_property_pattern: AllowDenyPattern = Field( + default=AllowDenyPattern.allow_all(), + description=( + "List of regex patterns for structured properties to include in ingestion." + " Only used if `extract_tags` and `extract_tags_as_structured_properties` are enabled." + ), + ) + # This is required since access_history table does not capture whether the table was temporary table. temporary_tables_pattern: List[str] = Field( default=DEFAULT_TEMP_TABLES_PATTERNS, diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py index 780effc82b0163..d165be3f3cc656 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py @@ -45,15 +45,18 @@ class SnowflakeTag: name: str value: str - def display_name(self) -> str: + def tag_display_name(self) -> str: return f"{self.name}: {self.value}" - def identifier(self) -> str: + def tag_identifier(self) -> str: return f"{self._id_prefix_as_str()}:{self.value}" def _id_prefix_as_str(self) -> str: return f"{self.database}.{self.schema}.{self.name}" + def structured_property_identifier(self) -> str: + return f"snowflake.{self.database}.{self.schema}.{self.name}" + @dataclass class SnowflakeColumn(BaseColumn): diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py index 6f09c26b08da2d..393e4d3c96d51f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py @@ -4,12 +4,14 @@ from datahub.configuration.pattern_utils import is_schema_allowed from datahub.emitter.mce_builder import ( + get_sys_time, make_data_platform_urn, make_dataset_urn_with_platform_instance, make_schema_field_urn, make_tag_urn, ) from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.emitter.mcp_builder import add_structured_properties_to_entity_wu from datahub.ingestion.api.source import SourceReport from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.glossary.classification_mixin import ( @@ -72,6 +74,7 @@ PROFILING, ) from datahub.metadata.com.linkedin.pegasus2avro.common import ( + AuditStamp, GlobalTags, Status, SubTypes, @@ -98,7 +101,18 @@ StringType, TimeType, ) +from datahub.metadata.com.linkedin.pegasus2avro.structured import ( + StructuredPropertyDefinition, +) from datahub.metadata.com.linkedin.pegasus2avro.tag import TagProperties +from datahub.metadata.urns import ( + ContainerUrn, + DatasetUrn, + DataTypeUrn, + EntityTypeUrn, + SchemaFieldUrn, + StructuredPropertyUrn, +) from datahub.sql_parsing.sql_parsing_aggregator import ( KnownLineageMapping, SqlParsingAggregator, @@ -673,14 +687,31 @@ def _process_view( yield from self.gen_dataset_workunits(view, schema_name, db_name) def _process_tag(self, tag: SnowflakeTag) -> Iterable[MetadataWorkUnit]: - tag_identifier = tag.identifier() + use_sp = self.config.extract_tags_as_structured_properties + identifier = ( + self.snowflake_identifier(tag.structured_property_identifier()) + if use_sp + else tag.tag_identifier() + ) - if self.report.is_tag_processed(tag_identifier): + if self.report.is_tag_processed(identifier): return - self.report.report_tag_processed(tag_identifier) - - yield from self.gen_tag_workunits(tag) + self.report.report_tag_processed(identifier) + if use_sp: + yield from self.gen_tag_as_structured_property_workunits(tag) + else: + yield from self.gen_tag_workunits(tag) + + def _format_tags_as_structured_properties( + self, tags: List[SnowflakeTag] + ) -> Dict[StructuredPropertyUrn, str]: + return { + StructuredPropertyUrn( + self.snowflake_identifier(tag.structured_property_identifier()) + ): tag.value + for tag in tags + } def gen_dataset_workunits( self, @@ -725,6 +756,9 @@ def gen_dataset_workunits( env=self.config.env, ) + if self.config.extract_tags_as_structured_properties: + yield from self.gen_column_tags_as_structured_properties(dataset_urn, table) + yield from add_table_to_schema_container( dataset_urn=dataset_urn, parent_container_key=schema_container_key, @@ -758,16 +792,24 @@ def gen_dataset_workunits( ) if table.tags: - tag_associations = [ - TagAssociation( - tag=make_tag_urn(self.snowflake_identifier(tag.identifier())) + if self.config.extract_tags_as_structured_properties: + yield from add_structured_properties_to_entity_wu( + dataset_urn, + self._format_tags_as_structured_properties(table.tags), ) - for tag in table.tags - ] - global_tags = GlobalTags(tag_associations) - yield MetadataChangeProposalWrapper( - entityUrn=dataset_urn, aspect=global_tags - ).as_workunit() + else: + tag_associations = [ + TagAssociation( + tag=make_tag_urn( + self.snowflake_identifier(tag.tag_identifier()) + ) + ) + for tag in table.tags + ] + global_tags = GlobalTags(tag_associations) + yield MetadataChangeProposalWrapper( + entityUrn=dataset_urn, aspect=global_tags + ).as_workunit() if isinstance(table, SnowflakeView) and table.view_definition is not None: view_properties_aspect = ViewProperties( @@ -840,10 +882,10 @@ def get_dataset_properties( ) def gen_tag_workunits(self, tag: SnowflakeTag) -> Iterable[MetadataWorkUnit]: - tag_urn = make_tag_urn(self.snowflake_identifier(tag.identifier())) + tag_urn = make_tag_urn(self.snowflake_identifier(tag.tag_identifier())) tag_properties_aspect = TagProperties( - name=tag.display_name(), + name=tag.tag_display_name(), description=f"Represents the Snowflake tag `{tag._id_prefix_as_str()}` with value `{tag.value}`.", ) @@ -851,6 +893,41 @@ def gen_tag_workunits(self, tag: SnowflakeTag) -> Iterable[MetadataWorkUnit]: entityUrn=tag_urn, aspect=tag_properties_aspect ).as_workunit() + def gen_tag_as_structured_property_workunits( + self, tag: SnowflakeTag + ) -> Iterable[MetadataWorkUnit]: + identifier = self.snowflake_identifier(tag.structured_property_identifier()) + urn = StructuredPropertyUrn(identifier).urn() + aspect = StructuredPropertyDefinition( + qualifiedName=identifier, + displayName=tag.name, + valueType=DataTypeUrn("datahub.string").urn(), + entityTypes=[ + EntityTypeUrn(f"datahub.{ContainerUrn.ENTITY_TYPE}").urn(), + EntityTypeUrn(f"datahub.{DatasetUrn.ENTITY_TYPE}").urn(), + EntityTypeUrn(f"datahub.{SchemaFieldUrn.ENTITY_TYPE}").urn(), + ], + lastModified=AuditStamp( + time=get_sys_time(), actor="urn:li:corpuser:datahub" + ), + ) + yield MetadataChangeProposalWrapper( + entityUrn=urn, + aspect=aspect, + ).as_workunit() + + def gen_column_tags_as_structured_properties( + self, dataset_urn: str, table: Union[SnowflakeTable, SnowflakeView] + ) -> Iterable[MetadataWorkUnit]: + for column_name in table.column_tags: + schema_field_urn = SchemaFieldUrn(dataset_urn, column_name).urn() + yield from add_structured_properties_to_entity_wu( + schema_field_urn, + self._format_tags_as_structured_properties( + table.column_tags[column_name] + ), + ) + def gen_schema_metadata( self, table: Union[SnowflakeTable, SnowflakeView], @@ -892,13 +969,14 @@ def gen_schema_metadata( [ TagAssociation( make_tag_urn( - self.snowflake_identifier(tag.identifier()) + self.snowflake_identifier(tag.tag_identifier()) ) ) for tag in table.column_tags[col.name] ] ) if col.name in table.column_tags + and not self.config.extract_tags_as_structured_properties else None ), ) @@ -985,8 +1063,17 @@ def gen_database_containers( ) ), tags=( - [self.snowflake_identifier(tag.identifier()) for tag in database.tags] + [ + self.snowflake_identifier(tag.tag_identifier()) + for tag in database.tags + ] if database.tags + and not self.config.extract_tags_as_structured_properties + else None + ), + structured_properties=( + self._format_tags_as_structured_properties(database.tags) + if database.tags and self.config.extract_tags_as_structured_properties else None ), ) @@ -1038,8 +1125,13 @@ def gen_schema_containers( else None ), tags=( - [self.snowflake_identifier(tag.identifier()) for tag in schema.tags] - if schema.tags + [self.snowflake_identifier(tag.tag_identifier()) for tag in schema.tags] + if schema.tags and not self.config.extract_tags_as_structured_properties + else None + ), + structured_properties=( + self._format_tags_as_structured_properties(schema.tags) + if schema.tags and self.config.extract_tags_as_structured_properties else None ), ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_tag.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_tag.py index be449e963d270b..75567cc3da8830 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_tag.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_tag.py @@ -165,10 +165,20 @@ def _filter_tags( allowed_tags = [] for tag in tags: - tag_identifier = tag.identifier() - self.report.report_entity_scanned(tag_identifier, "tag") - if not self.config.tag_pattern.allowed(tag_identifier): - self.report.report_dropped(tag_identifier) + identifier = ( + tag._id_prefix_as_str() + if self.config.extract_tags_as_structured_properties + else tag.tag_identifier() + ) + self.report.report_entity_scanned(identifier, "tag") + + pattern = ( + self.config.structured_property_pattern + if self.config.extract_tags_as_structured_properties + else self.config.tag_pattern + ) + if not pattern.allowed(identifier): + self.report.report_dropped(identifier) else: allowed_tags.append(tag) return allowed_tags diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_utils.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_utils.py index f45147223b8881..1545de0fff796f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_utils.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_utils.py @@ -20,6 +20,7 @@ from datahub.metadata.com.linkedin.pegasus2avro.dataset import UpstreamLineage from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaField from datahub.metadata.schema_classes import DataPlatformInstanceClass +from datahub.metadata.urns import StructuredPropertyUrn from datahub.utilities.registries.domain_registry import DomainRegistry from datahub.utilities.urns.dataset_urn import DatasetUrn @@ -75,6 +76,7 @@ def gen_schema_container( created: Optional[int] = None, last_modified: Optional[int] = None, extra_properties: Optional[Dict[str, str]] = None, + structured_properties: Optional[Dict[StructuredPropertyUrn, str]] = None, ) -> Iterable[MetadataWorkUnit]: domain_urn: Optional[str] = None if domain_registry: @@ -99,6 +101,7 @@ def gen_schema_container( owner_urn=owner_urn, qualified_name=qualified_name, extra_properties=extra_properties, + structured_properties=structured_properties, ) @@ -133,6 +136,7 @@ def gen_database_container( created: Optional[int] = None, last_modified: Optional[int] = None, extra_properties: Optional[Dict[str, str]] = None, + structured_properties: Optional[Dict[StructuredPropertyUrn, str]] = None, ) -> Iterable[MetadataWorkUnit]: domain_urn: Optional[str] = None if domain_registry: @@ -154,6 +158,7 @@ def gen_database_container( owner_urn=owner_urn, qualified_name=qualified_name, extra_properties=extra_properties, + structured_properties=structured_properties, ) diff --git a/metadata-ingestion/tests/integration/snowflake/snowflake_structured_properties_golden.json b/metadata-ingestion/tests/integration/snowflake/snowflake_structured_properties_golden.json new file mode 100644 index 00000000000000..3adedd59070396 --- /dev/null +++ b/metadata-ingestion/tests/integration/snowflake/snowflake_structured_properties_golden.json @@ -0,0 +1,4418 @@ +[ +{ + "entityType": "container", + "entityUrn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "snowflake", + "env": "PROD", + "database": "test_db" + }, + "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/", + "name": "TEST_DB", + "description": "Comment for TEST_DB", + "env": "PROD", + "created": { + "time": 1623135600000 + }, + "lastModified": { + "time": 1623135600000 + } + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:snowflake" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Database" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "changeType": "UPSERT", + "aspectName": "structuredProperties", + "aspect": { + "json": { + "properties": [ + { + "propertyUrn": "urn:li:structuredProperty:snowflake.other_db.other_schema.my_other_tag", + "values": [ + { + "string": "other" + } + ] + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "structuredProperty", + "entityUrn": "urn:li:structuredProperty:snowflake.other_db.other_schema.my_other_tag", + "changeType": "UPSERT", + "aspectName": "propertyDefinition", + "aspect": { + "json": { + "qualifiedName": "snowflake.other_db.other_schema.my_other_tag", + "displayName": "my_other_tag", + "valueType": "urn:li:dataType:datahub.string", + "cardinality": "SINGLE", + "entityTypes": [ + "urn:li:entityType:datahub.container", + "urn:li:entityType:datahub.dataset", + "urn:li:entityType:datahub.schemaField" + ], + "immutable": false, + "lastModified": { + "time": 1615443388097, + "actor": "urn:li:corpuser:datahub" + }, + "filterStatus": "DISABLED" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "snowflake", + "env": "PROD", + "database": "test_db", + "schema": "test_schema" + }, + "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/", + "name": "TEST_SCHEMA", + "description": "comment for TEST_DB.TEST_SCHEMA", + "env": "PROD", + "created": { + "time": 1623135600000 + }, + "lastModified": { + "time": 1623135600000 + } + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:snowflake" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "changeType": "UPSERT", + "aspectName": "structuredProperties", + "aspect": { + "json": { + "properties": [ + { + "propertyUrn": "urn:li:structuredProperty:snowflake.other_db.other_schema.my_other_tag", + "values": [ + { + "string": "other" + } + ] + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "test_db.test_schema.table_1", + "platform": "urn:li:dataPlatform:snowflake", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "col_1", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "NUMBER(38,0)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_2", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_3", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_4", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_5", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_6", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_7", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_8", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_9", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_10", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "CLUSTERING_KEY": "LINEAR(COL_1)" + }, + "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_1/", + "name": "TABLE_1", + "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_1", + "description": "Comment for Table", + "created": { + "time": 1623135600000 + }, + "lastModified": { + "time": 1623135600000 + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" + }, + { + "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "test_db.test_schema.table_2", + "platform": "urn:li:dataPlatform:snowflake", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "col_1", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "NUMBER(38,0)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_2", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_3", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_4", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_5", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_6", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_7", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_8", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_9", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_10", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "CLUSTERING_KEY": "LINEAR(COL_1)" + }, + "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_2/", + "name": "TABLE_2", + "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_2", + "description": "Comment for Table", + "created": { + "time": 1623135600000 + }, + "lastModified": { + "time": 1623135600000 + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" + }, + { + "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "test_db.test_schema.table_3", + "platform": "urn:li:dataPlatform:snowflake", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "col_1", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "NUMBER(38,0)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_2", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_3", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_4", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_5", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_6", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_7", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_8", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_9", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_10", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "CLUSTERING_KEY": "LINEAR(COL_1)" + }, + "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_3/", + "name": "TABLE_3", + "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_3", + "description": "Comment for Table", + "created": { + "time": 1623135600000 + }, + "lastModified": { + "time": 1623135600000 + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" + }, + { + "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "test_db.test_schema.table_4", + "platform": "urn:li:dataPlatform:snowflake", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "col_1", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "NUMBER(38,0)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_2", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_3", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_4", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_5", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_6", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_7", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_8", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_9", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_10", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "CLUSTERING_KEY": "LINEAR(COL_1)" + }, + "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_4/", + "name": "TABLE_4", + "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_4", + "description": "Comment for Table", + "created": { + "time": 1623135600000 + }, + "lastModified": { + "time": 1623135600000 + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" + }, + { + "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "test_db.test_schema.table_5", + "platform": "urn:li:dataPlatform:snowflake", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "col_1", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "NUMBER(38,0)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_2", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_3", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_4", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_5", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_6", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_7", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_8", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_9", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_10", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "CLUSTERING_KEY": "LINEAR(COL_1)" + }, + "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_5/", + "name": "TABLE_5", + "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_5", + "description": "Comment for Table", + "created": { + "time": 1623135600000 + }, + "lastModified": { + "time": 1623135600000 + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" + }, + { + "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "test_db.test_schema.table_6", + "platform": "urn:li:dataPlatform:snowflake", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "col_1", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "NUMBER(38,0)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_2", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_3", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_4", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_5", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_6", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_7", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_8", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_9", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_10", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "CLUSTERING_KEY": "LINEAR(COL_1)" + }, + "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_6/", + "name": "TABLE_6", + "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_6", + "description": "Comment for Table", + "created": { + "time": 1623135600000 + }, + "lastModified": { + "time": 1623135600000 + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" + }, + { + "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "test_db.test_schema.table_7", + "platform": "urn:li:dataPlatform:snowflake", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "col_1", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "NUMBER(38,0)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_2", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_3", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_4", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_5", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_6", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_7", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_8", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_9", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_10", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "CLUSTERING_KEY": "LINEAR(COL_1)" + }, + "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_7/", + "name": "TABLE_7", + "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_7", + "description": "Comment for Table", + "created": { + "time": 1623135600000 + }, + "lastModified": { + "time": 1623135600000 + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" + }, + { + "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "test_db.test_schema.table_8", + "platform": "urn:li:dataPlatform:snowflake", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "col_1", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "NUMBER(38,0)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_2", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_3", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_4", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_5", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_6", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_7", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_8", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_9", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_10", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "CLUSTERING_KEY": "LINEAR(COL_1)" + }, + "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_8/", + "name": "TABLE_8", + "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_8", + "description": "Comment for Table", + "created": { + "time": 1623135600000 + }, + "lastModified": { + "time": 1623135600000 + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" + }, + { + "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "test_db.test_schema.table_9", + "platform": "urn:li:dataPlatform:snowflake", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "col_1", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "NUMBER(38,0)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_2", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_3", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_4", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_5", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_6", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_7", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_8", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_9", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_10", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "CLUSTERING_KEY": "LINEAR(COL_1)" + }, + "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_9/", + "name": "TABLE_9", + "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_9", + "description": "Comment for Table", + "created": { + "time": 1623135600000 + }, + "lastModified": { + "time": 1623135600000 + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" + }, + { + "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "test_db.test_schema.table_10", + "platform": "urn:li:dataPlatform:snowflake", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "col_1", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "NUMBER(38,0)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_2", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_3", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_4", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_5", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_6", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_7", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_8", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_9", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_10", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "CLUSTERING_KEY": "LINEAR(COL_1)" + }, + "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_10/", + "name": "TABLE_10", + "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_10", + "description": "Comment for Table", + "created": { + "time": 1623135600000 + }, + "lastModified": { + "time": 1623135600000 + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" + }, + { + "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "structuredProperty", + "entityUrn": "urn:li:structuredProperty:snowflake.test_db.test_schema.security", + "changeType": "UPSERT", + "aspectName": "propertyDefinition", + "aspect": { + "json": { + "qualifiedName": "snowflake.test_db.test_schema.security", + "displayName": "security", + "valueType": "urn:li:dataType:datahub.string", + "cardinality": "SINGLE", + "entityTypes": [ + "urn:li:entityType:datahub.container", + "urn:li:entityType:datahub.dataset", + "urn:li:entityType:datahub.schemaField" + ], + "immutable": false, + "lastModified": { + "time": 1615443388097, + "actor": "urn:li:corpuser:datahub" + }, + "filterStatus": "DISABLED" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "test_db.test_schema.view_1", + "platform": "urn:li:dataPlatform:snowflake", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "col_1", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "NUMBER(38,0)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_2", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_3", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_4", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_5", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_6", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_7", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_8", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_9", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_10", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "IS_SECURE": "true" + }, + "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/view/VIEW_1/", + "name": "VIEW_1", + "qualifiedName": "TEST_DB.TEST_SCHEMA.VIEW_1", + "description": "Comment for View", + "created": { + "time": 1623135600000 + }, + "lastModified": { + "time": 1623135600000 + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "schemaField", + "entityUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),COL_1)", + "changeType": "UPSERT", + "aspectName": "structuredProperties", + "aspect": { + "json": { + "properties": [ + { + "propertyUrn": "urn:li:structuredProperty:snowflake.test_db.test_schema.security", + "values": [ + { + "string": "pii" + } + ] + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "create view view_1 as select * from table_1", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" + }, + { + "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "structuredProperty", + "entityUrn": "urn:li:structuredProperty:snowflake.test_db.test_schema.my_tag_0", + "changeType": "UPSERT", + "aspectName": "propertyDefinition", + "aspect": { + "json": { + "qualifiedName": "snowflake.test_db.test_schema.my_tag_0", + "displayName": "my_tag_0", + "valueType": "urn:li:dataType:datahub.string", + "cardinality": "SINGLE", + "entityTypes": [ + "urn:li:entityType:datahub.container", + "urn:li:entityType:datahub.dataset", + "urn:li:entityType:datahub.schemaField" + ], + "immutable": false, + "lastModified": { + "time": 1615443388097, + "actor": "urn:li:corpuser:datahub" + }, + "filterStatus": "DISABLED" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "structuredProperty", + "entityUrn": "urn:li:structuredProperty:snowflake.test_db.test_schema.my_tag_1", + "changeType": "UPSERT", + "aspectName": "propertyDefinition", + "aspect": { + "json": { + "qualifiedName": "snowflake.test_db.test_schema.my_tag_1", + "displayName": "my_tag_1", + "valueType": "urn:li:dataType:datahub.string", + "cardinality": "SINGLE", + "entityTypes": [ + "urn:li:entityType:datahub.container", + "urn:li:entityType:datahub.dataset", + "urn:li:entityType:datahub.schemaField" + ], + "immutable": false, + "lastModified": { + "time": 1615443388097, + "actor": "urn:li:corpuser:datahub" + }, + "filterStatus": "DISABLED" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "structuredProperty", + "entityUrn": "urn:li:structuredProperty:snowflake.test_db.test_schema.my_tag_2", + "changeType": "UPSERT", + "aspectName": "propertyDefinition", + "aspect": { + "json": { + "qualifiedName": "snowflake.test_db.test_schema.my_tag_2", + "displayName": "my_tag_2", + "valueType": "urn:li:dataType:datahub.string", + "cardinality": "SINGLE", + "entityTypes": [ + "urn:li:entityType:datahub.container", + "urn:li:entityType:datahub.dataset", + "urn:li:entityType:datahub.schemaField" + ], + "immutable": false, + "lastModified": { + "time": 1615443388097, + "actor": "urn:li:corpuser:datahub" + }, + "filterStatus": "DISABLED" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "test_db.test_schema.view_2", + "platform": "urn:li:dataPlatform:snowflake", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "col_1", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "NUMBER(38,0)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_2", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_3", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_4", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_5", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_6", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_7", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_8", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_9", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_10", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": {}, + "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/view/VIEW_2/", + "name": "VIEW_2", + "qualifiedName": "TEST_DB.TEST_SCHEMA.VIEW_2", + "description": "Comment for View", + "created": { + "time": 1623135600000 + }, + "lastModified": { + "time": 1623135600000 + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "structuredProperties", + "aspect": { + "json": { + "properties": [ + { + "propertyUrn": "urn:li:structuredProperty:snowflake.test_db.test_schema.my_tag_0", + "values": [ + { + "string": "my_value_0" + } + ] + }, + { + "propertyUrn": "urn:li:structuredProperty:snowflake.test_db.test_schema.my_tag_1", + "values": [ + { + "string": "my_value_1" + } + ] + }, + { + "propertyUrn": "urn:li:structuredProperty:snowflake.test_db.test_schema.my_tag_2", + "values": [ + { + "string": "my_value_2" + } + ] + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "create view view_2 as select * from table_2", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" + }, + { + "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1615443388097, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)", + "type": "VIEW", + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_1%2CPROD%29" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_1)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_1)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_1%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_2)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_2)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_1%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_3)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_3)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_1%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_4)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_4)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_1%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_5)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_5)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_1%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_6)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_6)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_1%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_7)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_7)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_1%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_8)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_8)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_1%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_9)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_9)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_1%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_10)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_10)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_1%2CPROD%29" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "create view view_1 as select * from table_1", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "lastModified": { + "time": 1736285939366, + "actor": "urn:li:corpuser:_ingestion" + } + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_1)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_10)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_2)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_3)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_4)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_5)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_6)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_7)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_8)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_9)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_1)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_2)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_3)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_4)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_5)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_6)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_7)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_8)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_9)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_10)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:snowflake" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1615443388097, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", + "type": "VIEW", + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_2%2CPROD%29" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_1)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_1)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_2%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_2)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_2)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_2%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_3)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_3)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_2%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_4)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_4)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_2%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_5)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_5)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_2%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_6)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_6)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_2%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_7)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_7)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_2%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_8)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_8)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_2%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_9)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_9)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_2%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_10)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_10)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_2%2CPROD%29" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_2%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "create view view_2 as select * from table_2", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "lastModified": { + "time": 1736285939376, + "actor": "urn:li:corpuser:_ingestion" + } + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_2%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_1)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_10)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_2)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_3)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_4)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_5)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_6)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_7)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_8)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_9)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_1)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_2)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_3)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_4)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_5)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_6)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_7)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_8)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_9)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_10)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_2%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:snowflake" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_2%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "schemaField", + "entityUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),COL_1)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "structuredProperty", + "entityUrn": "urn:li:structuredProperty:snowflake.other_db.other_schema.my_other_tag", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "structuredProperty", + "entityUrn": "urn:li:structuredProperty:snowflake.test_db.test_schema.my_tag_0", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "structuredProperty", + "entityUrn": "urn:li:structuredProperty:snowflake.test_db.test_schema.my_tag_1", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "structuredProperty", + "entityUrn": "urn:li:structuredProperty:snowflake.test_db.test_schema.my_tag_2", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "structuredProperty", + "entityUrn": "urn:li:structuredProperty:snowflake.test_db.test_schema.security", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/snowflake/test_snowflake.py b/metadata-ingestion/tests/integration/snowflake/test_snowflake.py index ef4918a20e640c..d2e20e784282ee 100644 --- a/metadata-ingestion/tests/integration/snowflake/test_snowflake.py +++ b/metadata-ingestion/tests/integration/snowflake/test_snowflake.py @@ -185,6 +185,68 @@ def test_snowflake_basic(pytestconfig, tmp_path, mock_time, mock_datahub_graph): assert cache_info["get_fk_constraints_for_schema"]["misses"] == 1 +def test_snowflake_tags_as_structured_properties( + pytestconfig, tmp_path, mock_time, mock_datahub_graph +): + test_resources_dir = pytestconfig.rootpath / "tests/integration/snowflake" + + # Run the metadata ingestion pipeline. + output_file = tmp_path / "snowflake_structured_properties_test_events.json" + golden_file = test_resources_dir / "snowflake_structured_properties_golden.json" + + with mock.patch("snowflake.connector.connect") as mock_connect: + sf_connection = mock.MagicMock() + sf_cursor = mock.MagicMock() + mock_connect.return_value = sf_connection + sf_connection.cursor.return_value = sf_cursor + + sf_cursor.execute.side_effect = default_query_results + + pipeline = Pipeline( + config=PipelineConfig( + source=SourceConfig( + type="snowflake", + config=SnowflakeV2Config( + extract_tags_as_structured_properties=True, + extract_tags=TagOption.without_lineage, + account_id="ABC12345.ap-south-1.aws", + username="TST_USR", + password="TST_PWD", + match_fully_qualified_names=True, + schema_pattern=AllowDenyPattern(allow=["test_db.test_schema"]), + include_technical_schema=True, + include_table_lineage=False, + include_column_lineage=False, + include_usage_stats=False, + include_operational_stats=False, + ), + ), + sink=DynamicTypedConfig( + type="file", config={"filename": str(output_file)} + ), + ) + ) + pipeline.run() + pipeline.pretty_print_summary() + pipeline.raise_from_status() + assert not pipeline.source.get_report().warnings + + # Verify the output. + + mce_helpers.check_golden_file( + pytestconfig, + output_path=output_file, + golden_path=golden_file, + ignore_paths=[ + r"root\[\d+\]\['aspect'\]\['json'\]\['timestampMillis'\]", + r"root\[\d+\]\['aspect'\]\['json'\]\['created'\]", + r"root\[\d+\]\['aspect'\]\['json'\]\['lastModified'\]", + r"root\[\d+\]\['aspect'\]\['json'\]\['fields'\]\[\d+\]\['glossaryTerms'\]\['auditStamp'\]\['time'\]", + r"root\[\d+\]\['systemMetadata'\]", + ], + ) + + @freeze_time(FROZEN_TIME) def test_snowflake_private_link_and_incremental_mcps( pytestconfig, tmp_path, mock_time, mock_datahub_graph diff --git a/metadata-ingestion/tests/integration/snowflake/test_snowflake_tag.py b/metadata-ingestion/tests/integration/snowflake/test_snowflake_tag.py index 9bb598cb0c1c7f..d4f6e92c93c1e0 100644 --- a/metadata-ingestion/tests/integration/snowflake/test_snowflake_tag.py +++ b/metadata-ingestion/tests/integration/snowflake/test_snowflake_tag.py @@ -98,3 +98,51 @@ def test_snowflake_tag_pattern_deny(): "TEST_DB.TEST_SCHEMA.my_tag_1:my_value_1", "TEST_DB.TEST_SCHEMA.security:pii", } + + +def test_snowflake_structured_property_pattern_deny(): + with mock.patch("snowflake.connector.connect") as mock_connect: + sf_connection = mock.MagicMock() + sf_cursor = mock.MagicMock() + mock_connect.return_value = sf_connection + sf_connection.cursor.return_value = sf_cursor + sf_cursor.execute.side_effect = default_query_results + + tag_config = SnowflakeV2Config( + account_id="ABC12345.ap-south-1.aws", + username="TST_USR", + password="TST_PWD", + match_fully_qualified_names=True, + schema_pattern=AllowDenyPattern(allow=["test_db.test_schema"]), + extract_tags_as_structured_properties=True, + tag_pattern=AllowDenyPattern( + deny=["TEST_DB.TEST_SCHEMA.my_tag_2:my_value_2"] + ), + structured_property_pattern=AllowDenyPattern( + deny=["TEST_DB.TEST_SCHEMA.my_tag_[0-9]"] + ), + include_technical_schema=True, + include_table_lineage=False, + include_column_lineage=False, + include_usage_stats=False, + include_operational_stats=False, + extract_tags=TagOption.without_lineage, + ) + + pipeline = Pipeline( + config=PipelineConfig( + source=SourceConfig(type="snowflake", config=tag_config), + sink=DynamicTypedConfig(type="blackhole", config={}), + ) + ) + pipeline.run() + pipeline.pretty_print_summary() + pipeline.raise_from_status() + + source_report = pipeline.source.get_report() + assert isinstance(source_report, SnowflakeV2Report) + assert source_report.tags_scanned == 5 + assert source_report._processed_tags == { + "snowflake.other_db.other_schema.my_other_tag", + "snowflake.test_db.test_schema.security", + } From 4633fbfeca963aa7f2fdb49c879446170882c865 Mon Sep 17 00:00:00 2001 From: ryota-cloud Date: Tue, 14 Jan 2025 11:09:08 -0800 Subject: [PATCH 2/8] fix(ingestion) fix snappy inconsistent version in ingestion (#12321) --- build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.gradle b/build.gradle index 284092e2b14f49..e4fd70a99e6434 100644 --- a/build.gradle +++ b/build.gradle @@ -195,7 +195,7 @@ project.ext.externalDependency = [ 'kafkaAvroSerde': "io.confluent:kafka-streams-avro-serde:$kafkaVersion", 'kafkaAvroSerializer': 'io.confluent:kafka-avro-serializer:5.1.4', 'kafkaClients': "org.apache.kafka:kafka-clients:$kafkaVersion-ccs", - 'snappy': 'org.xerial.snappy:snappy-java:1.1.10.4', + 'snappy': 'org.xerial.snappy:snappy-java:1.1.10.5', 'logbackClassic': "ch.qos.logback:logback-classic:$logbackClassic", 'logbackClassicJava8' : "ch.qos.logback:logback-classic:$logbackClassicJava8", 'slf4jApi': "org.slf4j:slf4j-api:$slf4jVersion", From e1d57e3f213bef8ba863426c0fbd2de098f6c6f6 Mon Sep 17 00:00:00 2001 From: kevinkarchacryl Date: Tue, 14 Jan 2025 15:53:51 -0500 Subject: [PATCH 3/8] Super type dbt redshift (#12337) --- .../ingestion/source/redshift/redshift.py | 1 + .../datahub/ingestion/source/sql/sql_types.py | 2 +- .../tests/unit/test_dbt_source.py | 34 ++++++++++++++++++- 3 files changed, 35 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py index 5371017a2a3212..9bfca941ce48fb 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py @@ -276,6 +276,7 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource): "HLLSKETCH": NullType, "TIMETZ": TimeType, "VARBYTE": StringType, + "SUPER": NullType, } def get_platform_instance_id(self) -> str: diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py index 9ec73a9af96dc5..1acf962d7c4750 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py @@ -93,7 +93,7 @@ "regtype": None, "regrole": None, "regnamespace": None, - "super": None, + "super": NullType, "uuid": StringType, "pg_lsn": None, "tsvector": None, # text search vector diff --git a/metadata-ingestion/tests/unit/test_dbt_source.py b/metadata-ingestion/tests/unit/test_dbt_source.py index 0a869297837014..ff22ffedc6228f 100644 --- a/metadata-ingestion/tests/unit/test_dbt_source.py +++ b/metadata-ingestion/tests/unit/test_dbt_source.py @@ -9,7 +9,12 @@ from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.source.dbt import dbt_cloud from datahub.ingestion.source.dbt.dbt_cloud import DBTCloudConfig -from datahub.ingestion.source.dbt.dbt_common import DBTNode +from datahub.ingestion.source.dbt.dbt_common import ( + DBTNode, + DBTSourceReport, + NullTypeClass, + get_column_type, +) from datahub.ingestion.source.dbt.dbt_core import ( DBTCoreConfig, DBTCoreSource, @@ -461,3 +466,30 @@ def test_dbt_time_parsing() -> None: assert timestamp.tzinfo is not None and timestamp.tzinfo.utcoffset( timestamp ) == timedelta(0) + + +def test_get_column_type_redshift(): + report = DBTSourceReport() + dataset_name = "test_dataset" + + # Test 'super' type which should not show any warnings/errors + result_super = get_column_type(report, dataset_name, "super", "redshift") + assert isinstance(result_super.type, NullTypeClass) + assert ( + len(report.infos) == 0 + ), "No warnings should be generated for known SUPER type" + + # Test unknown type, which generates a warning but resolves to NullTypeClass + unknown_type = "unknown_type" + result_unknown = get_column_type(report, dataset_name, unknown_type, "redshift") + assert isinstance(result_unknown.type, NullTypeClass) + + # exact warning message for an unknown type + expected_context = f"{dataset_name} - {unknown_type}" + messages = [info for info in report.infos if expected_context in str(info.context)] + assert len(messages) == 1 + assert messages[0].title == "Unable to map column types to DataHub types" + assert ( + messages[0].message + == "Got an unexpected column type. The column's parsed field type will not be populated." + ) From 90fe5b6cb71a953042ebba33af6d6431e6ae0046 Mon Sep 17 00:00:00 2001 From: ryota-cloud Date: Tue, 14 Jan 2025 14:26:59 -0800 Subject: [PATCH 4/8] fix(docker) add new gradle profile for consumer debug purpose (#12338) --- docker/build.gradle | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docker/build.gradle b/docker/build.gradle index 7b36c0d9acdcf0..576e47a53e6ef5 100644 --- a/docker/build.gradle +++ b/docker/build.gradle @@ -42,6 +42,15 @@ ext { modules: python_services_modules + backend_profile_modules + [':datahub-frontend'], isDebug: true ], + + 'quickstartDebugConsumers': [ + profile: 'debug-consumers', + modules: python_services_modules + backend_profile_modules + [':datahub-frontend', + ':metadata-jobs:mce-consumer-job', + ':metadata-jobs:mae-consumer-job'], + isDebug: true + ], + 'quickstartPg': [ profile: 'quickstart-postgres', modules: (backend_profile_modules - [':docker:mysql-setup']) + [ From 94b9da0bd8d9c04a5566a3c731f2f5418fc3eb0a Mon Sep 17 00:00:00 2001 From: RyanHolstien Date: Tue, 14 Jan 2025 17:28:34 -0600 Subject: [PATCH 5/8] feat(entityVersioning): initial implementation (#12166) --- .../datahub/graphql/GmsGraphQLEngine.java | 15 + .../datahub/graphql/GmsGraphQLEngineArgs.java | 2 + .../versioning/LinkAssetVersionResolver.java | 88 +++ .../UnlinkAssetVersionResolver.java | 67 ++ .../src/main/resources/entity.graphql | 60 ++ .../LinkAssetVersionResolverTest.java | 102 +++ .../UnlinkAssetVersionResolverTest.java | 123 ++++ docker/profiles/docker-compose.gms.yml | 8 + .../metadata/aspect/AspectRetriever.java | 16 +- .../aspect/CachingAspectRetriever.java | 6 + .../linkedin/metadata/aspect/ReadItem.java | 1 + .../patch/template/AspectTemplateEngine.java | 20 +- .../common/VersionPropertiesTemplate.java | 44 ++ .../metadata/entity/SearchRetriever.java | 52 +- .../registry/SnapshotEntityRegistry.java | 2 + .../metadata/aspect/MockAspectRetriever.java | 19 +- .../java/com/linkedin/metadata/Constants.java | 13 + metadata-io/build.gradle | 1 + .../client/EntityClientAspectRetriever.java | 19 + .../entity/EntityServiceAspectRetriever.java | 12 + .../AlphanumericSortIdGenerator.java | 70 ++ .../EntityVersioningServiceImpl.java | 356 +++++++++++ .../sideeffects/VersionSetSideEffect.java | 137 ++++ .../VersionPropertiesValidator.java | 158 +++++ .../VersionSetPropertiesValidator.java | 80 +++ .../search/SearchServiceSearchRetriever.java | 31 +- .../SearchDocumentTransformer.java | 2 +- .../metadata/search/utils/ESUtils.java | 36 ++ .../service/UpdateIndicesService.java | 3 +- .../AlphanumericSortIdGeneratorTest.java | 62 ++ .../EntityVersioningServiceTest.java | 603 ++++++++++++++++++ .../sideeffects/VersionSetSideEffectTest.java | 229 +++++++ .../VersionPropertiesValidatorTest.java | 165 +++++ .../VersionSetPropertiesValidatorTest.java | 139 ++++ .../AutocompleteRequestHandlerTest.java | 156 +++++ .../request/SearchRequestHandlerTest.java | 244 ++++++- .../SearchDocumentTransformerTest.java | 19 + .../com/linkedin/common/VersionProperties.pdl | 77 +++ .../com/linkedin/common/VersionTag.pdl | 1 + .../linkedin/metadata/key/VersionSetKey.pdl | 20 + .../linkedin/metadata/query/SearchFlags.pdl | 5 + .../versionset/VersionSetProperties.pdl | 24 + .../src/main/resources/entity-registry.yml | 7 + .../graphql/featureflags/FeatureFlags.java | 1 + .../src/main/resources/application.yaml | 1 + .../EntityVersioningServiceFactory.java | 21 + .../factory/graphql/GraphQLEngineFactory.java | 5 +- .../SpringStandardPluginConfiguration.java | 67 ++ .../delegates/DatahubUsageEventsImplTest.java | 4 + .../delegates/EntityApiDelegateImplTest.java | 3 + .../GlobalControllerExceptionHandler.java | 24 + .../openapi/config/SpringWebConfig.java | 7 +- .../openapi/v3/OpenAPIV3Generator.java | 243 +++++-- .../v3/controller/EntityController.java | 124 +++- .../openapi/v3/OpenAPIV3GeneratorTest.java | 6 +- .../v3/controller/EntityControllerTest.java | 218 ++++++- .../com.linkedin.entity.aspects.snapshot.json | 4 + ...com.linkedin.entity.entities.snapshot.json | 10 + .../com.linkedin.entity.runs.snapshot.json | 4 + ...nkedin.operations.operations.snapshot.json | 4 + ...m.linkedin.platform.platform.snapshot.json | 4 + .../versioning/EntityVersioningService.java | 36 ++ .../versioning/VersionPropertiesInput.java | 20 + .../metadata/search/utils/QueryUtils.java | 8 + .../authorization/PoliciesConfig.java | 11 +- .../tests/entity_versioning/__init__.py | 0 .../entity_versioning/test_versioning.py | 64 ++ test-models/build.gradle | 1 + 68 files changed, 4063 insertions(+), 121 deletions(-) create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolver.java create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolver.java create mode 100644 datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolverTest.java create mode 100644 datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolverTest.java create mode 100644 entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/common/VersionPropertiesTemplate.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/AlphanumericSortIdGenerator.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/EntityVersioningServiceImpl.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/sideeffects/VersionSetSideEffect.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/validation/VersionPropertiesValidator.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/validation/VersionSetPropertiesValidator.java create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/AlphanumericSortIdGeneratorTest.java create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/EntityVersioningServiceTest.java create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/sideeffects/VersionSetSideEffectTest.java create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/validation/VersionPropertiesValidatorTest.java create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/validation/VersionSetPropertiesValidatorTest.java create mode 100644 metadata-models/src/main/pegasus/com/linkedin/common/VersionProperties.pdl rename {li-utils => metadata-models}/src/main/pegasus/com/linkedin/common/VersionTag.pdl (78%) create mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/key/VersionSetKey.pdl create mode 100644 metadata-models/src/main/pegasus/com/linkedin/versionset/VersionSetProperties.pdl create mode 100644 metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/versioning/EntityVersioningServiceFactory.java create mode 100644 metadata-service/services/src/main/java/com/linkedin/metadata/entity/versioning/EntityVersioningService.java create mode 100644 metadata-service/services/src/main/java/com/linkedin/metadata/entity/versioning/VersionPropertiesInput.java create mode 100644 smoke-test/tests/entity_versioning/__init__.py create mode 100644 smoke-test/tests/entity_versioning/test_versioning.py diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java index 3c46c1a8dce35c..b15db80a8487ae 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java @@ -174,6 +174,8 @@ import com.linkedin.datahub.graphql.resolvers.embed.UpdateEmbedResolver; import com.linkedin.datahub.graphql.resolvers.entity.EntityExistsResolver; import com.linkedin.datahub.graphql.resolvers.entity.EntityPrivilegesResolver; +import com.linkedin.datahub.graphql.resolvers.entity.versioning.LinkAssetVersionResolver; +import com.linkedin.datahub.graphql.resolvers.entity.versioning.UnlinkAssetVersionResolver; import com.linkedin.datahub.graphql.resolvers.form.BatchAssignFormResolver; import com.linkedin.datahub.graphql.resolvers.form.BatchRemoveFormResolver; import com.linkedin.datahub.graphql.resolvers.form.CreateDynamicFormAssignmentResolver; @@ -391,6 +393,7 @@ import com.linkedin.metadata.config.telemetry.TelemetryConfiguration; import com.linkedin.metadata.connection.ConnectionService; import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.versioning.EntityVersioningService; import com.linkedin.metadata.graph.GraphClient; import com.linkedin.metadata.graph.SiblingGraphService; import com.linkedin.metadata.models.registry.EntityRegistry; @@ -476,6 +479,7 @@ public class GmsGraphQLEngine { private final RestrictedService restrictedService; private ConnectionService connectionService; private AssertionService assertionService; + private final EntityVersioningService entityVersioningService; private final BusinessAttributeService businessAttributeService; private final FeatureFlags featureFlags; @@ -599,6 +603,7 @@ public GmsGraphQLEngine(final GmsGraphQLEngineArgs args) { this.restrictedService = args.restrictedService; this.connectionService = args.connectionService; this.assertionService = args.assertionService; + this.entityVersioningService = args.entityVersioningService; this.businessAttributeService = args.businessAttributeService; this.ingestionConfiguration = Objects.requireNonNull(args.ingestionConfiguration); @@ -1392,6 +1397,16 @@ private void configureMutationResolvers(final RuntimeWiring.Builder builder) { "removeBusinessAttribute", new RemoveBusinessAttributeResolver(this.entityService)); } + if (featureFlags.isEntityVersioning()) { + typeWiring + .dataFetcher( + "linkAssetVersion", + new LinkAssetVersionResolver(this.entityVersioningService, this.featureFlags)) + .dataFetcher( + "unlinkAssetVersion", + new UnlinkAssetVersionResolver( + this.entityVersioningService, this.featureFlags)); + } return typeWiring; }); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngineArgs.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngineArgs.java index f6ab3a603dbb7b..131f4e87637807 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngineArgs.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngineArgs.java @@ -21,6 +21,7 @@ import com.linkedin.metadata.config.telemetry.TelemetryConfiguration; import com.linkedin.metadata.connection.ConnectionService; import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.versioning.EntityVersioningService; import com.linkedin.metadata.graph.GraphClient; import com.linkedin.metadata.graph.SiblingGraphService; import com.linkedin.metadata.models.registry.EntityRegistry; @@ -88,6 +89,7 @@ public class GmsGraphQLEngineArgs { BusinessAttributeService businessAttributeService; ConnectionService connectionService; AssertionService assertionService; + EntityVersioningService entityVersioningService; // any fork specific args should go below this line } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolver.java new file mode 100644 index 00000000000000..69e049af1e87b7 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolver.java @@ -0,0 +1,88 @@ +package com.linkedin.datahub.graphql.resolvers.entity.versioning; + +import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.bindArgument; +import static com.linkedin.metadata.Constants.VERSION_SET_ENTITY_NAME; +import static com.linkedin.metadata.authorization.ApiOperation.UPDATE; + +import com.datahub.authorization.AuthUtil; +import com.google.common.collect.ImmutableSet; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; +import com.linkedin.datahub.graphql.exception.AuthorizationException; +import com.linkedin.datahub.graphql.featureflags.FeatureFlags; +import com.linkedin.datahub.graphql.generated.LinkVersionInput; +import com.linkedin.metadata.entity.IngestResult; +import com.linkedin.metadata.entity.versioning.EntityVersioningService; +import com.linkedin.metadata.entity.versioning.VersionPropertiesInput; +import graphql.schema.DataFetcher; +import graphql.schema.DataFetchingEnvironment; +import io.datahubproject.metadata.context.OperationContext; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import org.apache.commons.lang.StringUtils; + +/** + * Currently only supports linking the latest version, but may be modified later to support inserts + */ +public class LinkAssetVersionResolver implements DataFetcher> { + + private final EntityVersioningService entityVersioningService; + private final FeatureFlags featureFlags; + + public LinkAssetVersionResolver( + EntityVersioningService entityVersioningService, FeatureFlags featureFlags) { + this.entityVersioningService = entityVersioningService; + this.featureFlags = featureFlags; + } + + @Override + public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { + final QueryContext context = environment.getContext(); + final LinkVersionInput input = + bindArgument(environment.getArgument("input"), LinkVersionInput.class); + if (!featureFlags.isEntityVersioning()) { + throw new IllegalAccessError( + "Entity Versioning is not configured, please enable before attempting to use this feature."); + } + Urn versionSetUrn = UrnUtils.getUrn(input.getVersionSet()); + if (!VERSION_SET_ENTITY_NAME.equals(versionSetUrn.getEntityType())) { + throw new IllegalArgumentException( + String.format("Version Set urn %s must be of type Version Set.", input.getVersionSet())); + } + Urn entityUrn = UrnUtils.getUrn(input.getLinkedEntity()); + OperationContext opContext = context.getOperationContext(); + if (!AuthUtil.isAPIAuthorizedEntityUrns( + opContext, UPDATE, ImmutableSet.of(versionSetUrn, entityUrn))) { + throw new AuthorizationException( + String.format( + "%s is unauthorized to %s entities %s and %s", + opContext.getAuthentication().getActor().toUrnStr(), + UPDATE, + input.getVersionSet(), + input.getLinkedEntity())); + } + VersionPropertiesInput versionPropertiesInput = + new VersionPropertiesInput( + input.getComment(), + input.getVersion(), + input.getSourceTimestamp(), + input.getSourceCreator()); + return GraphQLConcurrencyUtils.supplyAsync( + () -> { + List linkResults = + entityVersioningService.linkLatestVersion( + opContext, versionSetUrn, entityUrn, versionPropertiesInput); + + return linkResults.stream() + .filter( + ingestResult -> input.getLinkedEntity().equals(ingestResult.getUrn().toString())) + .map(ingestResult -> ingestResult.getUrn().toString()) + .findAny() + .orElse(StringUtils.EMPTY); + }, + this.getClass().getSimpleName(), + "get"); + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolver.java new file mode 100644 index 00000000000000..3d5027a0d668ac --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolver.java @@ -0,0 +1,67 @@ +package com.linkedin.datahub.graphql.resolvers.entity.versioning; + +import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.bindArgument; +import static com.linkedin.metadata.Constants.VERSION_SET_ENTITY_NAME; +import static com.linkedin.metadata.authorization.ApiOperation.UPDATE; + +import com.datahub.authorization.AuthUtil; +import com.google.common.collect.ImmutableSet; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; +import com.linkedin.datahub.graphql.exception.AuthorizationException; +import com.linkedin.datahub.graphql.featureflags.FeatureFlags; +import com.linkedin.datahub.graphql.generated.UnlinkVersionInput; +import com.linkedin.metadata.entity.versioning.EntityVersioningService; +import graphql.schema.DataFetcher; +import graphql.schema.DataFetchingEnvironment; +import io.datahubproject.metadata.context.OperationContext; +import java.util.concurrent.CompletableFuture; + +public class UnlinkAssetVersionResolver implements DataFetcher> { + + private final EntityVersioningService entityVersioningService; + private final FeatureFlags featureFlags; + + public UnlinkAssetVersionResolver( + EntityVersioningService entityVersioningService, FeatureFlags featureFlags) { + this.entityVersioningService = entityVersioningService; + this.featureFlags = featureFlags; + } + + @Override + public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { + if (!featureFlags.isEntityVersioning()) { + throw new IllegalAccessError( + "Entity Versioning is not configured, please enable before attempting to use this feature."); + } + final QueryContext context = environment.getContext(); + final UnlinkVersionInput input = + bindArgument(environment.getArgument("input"), UnlinkVersionInput.class); + Urn versionSetUrn = UrnUtils.getUrn(input.getVersionSet()); + if (!VERSION_SET_ENTITY_NAME.equals(versionSetUrn.getEntityType())) { + throw new IllegalArgumentException( + String.format("Version Set urn %s must be of type Version Set.", input.getVersionSet())); + } + Urn entityUrn = UrnUtils.getUrn(input.getUnlinkedEntity()); + OperationContext opContext = context.getOperationContext(); + if (!AuthUtil.isAPIAuthorizedEntityUrns( + opContext, UPDATE, ImmutableSet.of(versionSetUrn, entityUrn))) { + throw new AuthorizationException( + String.format( + "%s is unauthorized to %s entities %s and %s", + opContext.getAuthentication().getActor(), + UPDATE, + input.getVersionSet(), + input.getUnlinkedEntity())); + } + return GraphQLConcurrencyUtils.supplyAsync( + () -> { + entityVersioningService.unlinkVersion(opContext, versionSetUrn, entityUrn); + return true; + }, + this.getClass().getSimpleName(), + "get"); + } +} diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql index 9dd1948e18e042..b47be7ae32b2c4 100644 --- a/datahub-graphql-core/src/main/resources/entity.graphql +++ b/datahub-graphql-core/src/main/resources/entity.graphql @@ -956,6 +956,16 @@ type Mutation { Remove Business Attribute """ removeBusinessAttribute(input: AddBusinessAttributeInput!): Boolean + + """ + Link the latest versioned entity to a Version Set + """ + linkAssetVersion(input: LinkVersionInput!): String + + """ + Unlink a versioned entity from a Version Set + """ + unlinkAssetVersion(input: UnlinkVersionInput!): Boolean } """ @@ -12911,6 +12921,56 @@ input ListBusinessAttributesInput { query: String } +""" +Input for linking a versioned entity to a Version Set +""" +input LinkVersionInput { + """ + The target version set + """ + versionSet: String! + + """ + The target versioned entity to link + """ + linkedEntity: String! + + """ + Version Tag label for the version, should be unique within a Version Set + """ + version: String! + + """ + Optional timestamp from the source system + """ + sourceTimestamp: Long + + """ + Optional creator from the source system, will be converted to an Urn + """ + sourceCreator: String + + """ + Optional comment about the version + """ + comment: String +} + +""" +Input for unlinking a versioned entity from a Version Set +""" +input UnlinkVersionInput { + """ + The target version set + """ + versionSet: String + + """ + The target versioned entity to unlink + """ + unlinkedEntity: String +} + """ The result obtained when listing Business Attribute """ diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolverTest.java new file mode 100644 index 00000000000000..690856263fccc5 --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolverTest.java @@ -0,0 +1,102 @@ +package com.linkedin.datahub.graphql.resolvers.entity.versioning; + +import static com.linkedin.datahub.graphql.TestUtils.*; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.testng.Assert.*; + +import com.google.common.collect.ImmutableList; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.featureflags.FeatureFlags; +import com.linkedin.datahub.graphql.generated.LinkVersionInput; +import com.linkedin.metadata.entity.IngestResult; +import com.linkedin.metadata.entity.versioning.EntityVersioningService; +import com.linkedin.metadata.entity.versioning.VersionPropertiesInput; +import graphql.schema.DataFetchingEnvironment; +import org.mockito.Mockito; +import org.testng.annotations.Test; + +public class LinkAssetVersionResolverTest { + + private static final String TEST_VERSION_SET_URN = "urn:li:versionSet:test-version-set"; + private static final String TEST_ENTITY_URN = + "urn:li:dataset:(urn:li:dataPlatform:mysql,my-test,PROD)"; + + @Test + public void testGetSuccessful() throws Exception { + EntityVersioningService mockService = Mockito.mock(EntityVersioningService.class); + FeatureFlags mockFlags = Mockito.mock(FeatureFlags.class); + + Mockito.when(mockFlags.isEntityVersioning()).thenReturn(true); + + IngestResult mockResult = + IngestResult.builder().urn(Urn.createFromString(TEST_ENTITY_URN)).build(); + + Mockito.when( + mockService.linkLatestVersion( + any(), + eq(UrnUtils.getUrn(TEST_VERSION_SET_URN)), + eq(UrnUtils.getUrn(TEST_ENTITY_URN)), + any(VersionPropertiesInput.class))) + .thenReturn(ImmutableList.of(mockResult)); + + LinkAssetVersionResolver resolver = new LinkAssetVersionResolver(mockService, mockFlags); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + LinkVersionInput input = new LinkVersionInput(); + input.setVersionSet(TEST_VERSION_SET_URN); + input.setLinkedEntity(TEST_ENTITY_URN); + input.setComment("Test comment"); + input.setVersion("v1"); + + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + + String result = resolver.get(mockEnv).get(); + assertEquals(result, TEST_ENTITY_URN); + } + + @Test + public void testGetFeatureFlagDisabled() throws Exception { + EntityVersioningService mockService = Mockito.mock(EntityVersioningService.class); + FeatureFlags mockFlags = Mockito.mock(FeatureFlags.class); + + Mockito.when(mockFlags.isEntityVersioning()).thenReturn(false); + + LinkAssetVersionResolver resolver = new LinkAssetVersionResolver(mockService, mockFlags); + + // Execute resolver + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + LinkVersionInput input = new LinkVersionInput(); + input.setVersionSet(TEST_VERSION_SET_URN); + input.setLinkedEntity(TEST_ENTITY_URN); + + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + + assertThrows(IllegalAccessError.class, () -> resolver.get(mockEnv)); + } + + @Test + public void testGetInvalidVersionSetUrn() throws Exception { + EntityVersioningService mockService = Mockito.mock(EntityVersioningService.class); + FeatureFlags mockFlags = Mockito.mock(FeatureFlags.class); + + Mockito.when(mockFlags.isEntityVersioning()).thenReturn(true); + + LinkAssetVersionResolver resolver = new LinkAssetVersionResolver(mockService, mockFlags); + + // Execute resolver + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + LinkVersionInput input = new LinkVersionInput(); + input.setVersionSet("urn:li:dataset:invalid-version-set"); // Invalid URN type + input.setLinkedEntity(TEST_ENTITY_URN); + + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + + assertThrows(IllegalArgumentException.class, () -> resolver.get(mockEnv)); + } +} diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolverTest.java new file mode 100644 index 00000000000000..0000ad24a04537 --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolverTest.java @@ -0,0 +1,123 @@ +package com.linkedin.datahub.graphql.resolvers.entity.versioning; + +import static com.linkedin.datahub.graphql.TestUtils.*; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.testng.Assert.*; + +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.featureflags.FeatureFlags; +import com.linkedin.datahub.graphql.generated.UnlinkVersionInput; +import com.linkedin.metadata.entity.versioning.EntityVersioningService; +import graphql.schema.DataFetchingEnvironment; +import java.util.Collections; +import java.util.concurrent.CompletionException; +import org.mockito.Mockito; +import org.testng.annotations.Test; + +public class UnlinkAssetVersionResolverTest { + + private static final String TEST_VERSION_SET_URN = "urn:li:versionSet:test-version-set"; + private static final String TEST_ENTITY_URN = + "urn:li:dataset:(urn:li:dataPlatform:mysql,my-test,PROD)"; + + @Test + public void testGetSuccessful() throws Exception { + EntityVersioningService mockService = Mockito.mock(EntityVersioningService.class); + FeatureFlags mockFlags = Mockito.mock(FeatureFlags.class); + + Mockito.when(mockFlags.isEntityVersioning()).thenReturn(true); + + Mockito.when( + mockService.unlinkVersion( + any(), + eq(UrnUtils.getUrn(TEST_VERSION_SET_URN)), + eq(UrnUtils.getUrn(TEST_ENTITY_URN)))) + .thenReturn(Collections.emptyList()); + + UnlinkAssetVersionResolver resolver = new UnlinkAssetVersionResolver(mockService, mockFlags); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + UnlinkVersionInput input = new UnlinkVersionInput(); + input.setVersionSet(TEST_VERSION_SET_URN); + input.setUnlinkedEntity(TEST_ENTITY_URN); + + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + + assertTrue(resolver.get(mockEnv).get()); + + Mockito.verify(mockService) + .unlinkVersion( + any(), eq(UrnUtils.getUrn(TEST_VERSION_SET_URN)), eq(UrnUtils.getUrn(TEST_ENTITY_URN))); + } + + @Test + public void testGetFeatureFlagDisabled() throws Exception { + EntityVersioningService mockService = Mockito.mock(EntityVersioningService.class); + FeatureFlags mockFlags = Mockito.mock(FeatureFlags.class); + + Mockito.when(mockFlags.isEntityVersioning()).thenReturn(false); + + UnlinkAssetVersionResolver resolver = new UnlinkAssetVersionResolver(mockService, mockFlags); + + // Execute resolver + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + UnlinkVersionInput input = new UnlinkVersionInput(); + input.setVersionSet(TEST_VERSION_SET_URN); + input.setUnlinkedEntity(TEST_ENTITY_URN); + + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + + assertThrows(IllegalAccessError.class, () -> resolver.get(mockEnv)); + } + + @Test + public void testGetInvalidVersionSetUrn() throws Exception { + EntityVersioningService mockService = Mockito.mock(EntityVersioningService.class); + FeatureFlags mockFlags = Mockito.mock(FeatureFlags.class); + + Mockito.when(mockFlags.isEntityVersioning()).thenReturn(true); + + UnlinkAssetVersionResolver resolver = new UnlinkAssetVersionResolver(mockService, mockFlags); + + // Execute resolver + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + UnlinkVersionInput input = new UnlinkVersionInput(); + input.setVersionSet("urn:li:dataset:invalid-version-set"); // Invalid URN type + input.setUnlinkedEntity(TEST_ENTITY_URN); + + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + + assertThrows(IllegalArgumentException.class, () -> resolver.get(mockEnv)); + } + + @Test + public void testGetServiceException() throws Exception { + EntityVersioningService mockService = Mockito.mock(EntityVersioningService.class); + FeatureFlags mockFlags = Mockito.mock(FeatureFlags.class); + + Mockito.when(mockFlags.isEntityVersioning()).thenReturn(true); + + Mockito.doThrow(new RuntimeException("Service error")) + .when(mockService) + .unlinkVersion(any(), any(), any()); + + UnlinkAssetVersionResolver resolver = new UnlinkAssetVersionResolver(mockService, mockFlags); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + UnlinkVersionInput input = new UnlinkVersionInput(); + input.setVersionSet(TEST_VERSION_SET_URN); + input.setUnlinkedEntity(TEST_ENTITY_URN); + + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + + assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); + } +} diff --git a/docker/profiles/docker-compose.gms.yml b/docker/profiles/docker-compose.gms.yml index 64163ef970080a..ada7df51e20bef 100644 --- a/docker/profiles/docker-compose.gms.yml +++ b/docker/profiles/docker-compose.gms.yml @@ -67,6 +67,7 @@ x-datahub-system-update-service: &datahub-system-update-service SCHEMA_REGISTRY_SYSTEM_UPDATE: ${SCHEMA_REGISTRY_SYSTEM_UPDATE:-true} SPRING_KAFKA_PROPERTIES_AUTO_REGISTER_SCHEMAS: ${SPRING_KAFKA_PROPERTIES_AUTO_REGISTER_SCHEMAS:-true} SPRING_KAFKA_PROPERTIES_USE_LATEST_VERSION: ${SPRING_KAFKA_PROPERTIES_USE_LATEST_VERSION:-true} + ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true} volumes: - ${HOME}/.datahub/plugins:/etc/datahub/plugins @@ -80,6 +81,7 @@ x-datahub-system-update-service-dev: &datahub-system-update-service-dev SKIP_ELASTICSEARCH_CHECK: false REPROCESS_DEFAULT_BROWSE_PATHS_V2: ${REPROCESS_DEFAULT_BROWSE_PATHS_V2:-false} JAVA_TOOL_OPTIONS: '-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5003' + ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true} volumes: - ../../datahub-upgrade/build/libs/:/datahub/datahub-upgrade/bin/ - ../../metadata-models/src/main/resources/:/datahub/datahub-gms/resources @@ -101,6 +103,7 @@ x-datahub-gms-service: &datahub-gms-service <<: [*primary-datastore-mysql-env, *graph-datastore-search-env, *search-datastore-env, *datahub-quickstart-telemetry-env, *kafka-env] ALTERNATE_MCP_VALIDATION: ${ALTERNATE_MCP_VALIDATION:-true} STRICT_URN_VALIDATION_ENABLED: ${STRICT_URN_VALIDATION_ENABLED:-true} + ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true} healthcheck: test: curl -sS --fail http://datahub-gms:${DATAHUB_GMS_PORT:-8080}/health start_period: 90s @@ -131,6 +134,7 @@ x-datahub-gms-service-dev: &datahub-gms-service-dev SEARCH_SERVICE_ENABLE_CACHE: false LINEAGE_SEARCH_CACHE_ENABLED: false SHOW_BROWSE_V2: true + ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true} volumes: - ./datahub-gms/start.sh:/datahub/datahub-gms/scripts/start.sh - ./datahub-gms/jetty.xml:/datahub/datahub-gms/scripts/jetty.xml @@ -155,12 +159,14 @@ x-datahub-mae-consumer-service: &datahub-mae-consumer-service - ${DATAHUB_LOCAL_MAE_ENV:-empty2.env} environment: &datahub-mae-consumer-env <<: [*primary-datastore-mysql-env, *graph-datastore-search-env, *search-datastore-env, *kafka-env] + ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true} x-datahub-mae-consumer-service-dev: &datahub-mae-consumer-service-dev <<: *datahub-mae-consumer-service image: ${DATAHUB_MAE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mae-consumer}:${DATAHUB_VERSION:-debug} environment: <<: [*datahub-dev-telemetry-env, *datahub-mae-consumer-env] + ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true} volumes: - ./datahub-mae-consumer/start.sh:/datahub/datahub-mae-consumer/scripts/start.sh - ../../metadata-models/src/main/resources/:/datahub/datahub-mae-consumer/resources @@ -183,12 +189,14 @@ x-datahub-mce-consumer-service: &datahub-mce-consumer-service <<: [*primary-datastore-mysql-env, *graph-datastore-search-env, *search-datastore-env, *datahub-quickstart-telemetry-env, *kafka-env] ALTERNATE_MCP_VALIDATION: ${ALTERNATE_MCP_VALIDATION:-true} STRICT_URN_VALIDATION_ENABLED: ${STRICT_URN_VALIDATION_ENABLED:-true} + ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true} x-datahub-mce-consumer-service-dev: &datahub-mce-consumer-service-dev <<: *datahub-mce-consumer-service image: ${DATAHUB_MCE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mce-consumer}:${DATAHUB_VERSION:-debug} environment: <<: [*datahub-dev-telemetry-env, *datahub-mce-consumer-env] + ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true} volumes: - ./datahub-mce-consumer/start.sh:/datahub/datahub-mce-consumer/scripts/start.sh - ../../metadata-jobs/mce-consumer-job/build/libs/:/datahub/datahub-mce-consumer/bin diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/AspectRetriever.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/AspectRetriever.java index e34df7db481189..87939e14bfde68 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/AspectRetriever.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/AspectRetriever.java @@ -5,11 +5,9 @@ import com.linkedin.common.urn.Urn; import com.linkedin.entity.Aspect; import com.linkedin.metadata.models.registry.EntityRegistry; -import com.linkedin.util.Pair; import java.util.Collections; import java.util.Map; import java.util.Set; -import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -50,19 +48,7 @@ default SystemAspect getLatestSystemAspect( Map> getLatestSystemAspects(Map> urnAspectNames); @Nonnull - default Map entityExists(Set urns) { - Set keyAspectNames = - urns.stream() - .map(Urn::getEntityType) - .distinct() - .map(entityType -> getEntityRegistry().getEntitySpec(entityType).getKeyAspectName()) - .collect(Collectors.toSet()); - - Map> latest = getLatestAspectObjects(urns, keyAspectNames); - return urns.stream() - .map(urn -> Pair.of(urn, latest.containsKey(urn))) - .collect(Collectors.toMap(Pair::getKey, Pair::getValue)); - } + Map entityExists(Set urns); @Nonnull EntityRegistry getEntityRegistry(); diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/CachingAspectRetriever.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/CachingAspectRetriever.java index 375dd8cf8911e1..7b3233921d039e 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/CachingAspectRetriever.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/CachingAspectRetriever.java @@ -29,6 +29,12 @@ public Map> getLatestSystemAspects( return Collections.emptyMap(); } + @Nonnull + @Override + public Map entityExists(Set urns) { + return Collections.emptyMap(); + } + @Nonnull @Override public EntityRegistry getEntityRegistry() { diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/ReadItem.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/ReadItem.java index 106596bf80ccf0..341dec4d4741c7 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/ReadItem.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/ReadItem.java @@ -36,6 +36,7 @@ default String getAspectName() { @Nullable RecordTemplate getRecordTemplate(); + @Nullable default T getAspect(Class clazz) { return getAspect(clazz, getRecordTemplate()); } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/AspectTemplateEngine.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/AspectTemplateEngine.java index ce36b7e77a2b16..821dad13aa0c3c 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/AspectTemplateEngine.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/AspectTemplateEngine.java @@ -1,20 +1,6 @@ package com.linkedin.metadata.aspect.patch.template; -import static com.linkedin.metadata.Constants.CHART_INFO_ASPECT_NAME; -import static com.linkedin.metadata.Constants.DASHBOARD_INFO_ASPECT_NAME; -import static com.linkedin.metadata.Constants.DATASET_PROPERTIES_ASPECT_NAME; -import static com.linkedin.metadata.Constants.DATA_FLOW_INFO_ASPECT_NAME; -import static com.linkedin.metadata.Constants.DATA_JOB_INFO_ASPECT_NAME; -import static com.linkedin.metadata.Constants.DATA_JOB_INPUT_OUTPUT_ASPECT_NAME; -import static com.linkedin.metadata.Constants.DATA_PRODUCT_PROPERTIES_ASPECT_NAME; -import static com.linkedin.metadata.Constants.EDITABLE_SCHEMA_METADATA_ASPECT_NAME; -import static com.linkedin.metadata.Constants.FORM_INFO_ASPECT_NAME; -import static com.linkedin.metadata.Constants.GLOBAL_TAGS_ASPECT_NAME; -import static com.linkedin.metadata.Constants.GLOSSARY_TERMS_ASPECT_NAME; -import static com.linkedin.metadata.Constants.OWNERSHIP_ASPECT_NAME; -import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTIES_ASPECT_NAME; -import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME; -import static com.linkedin.metadata.Constants.UPSTREAM_LINEAGE_ASPECT_NAME; +import static com.linkedin.metadata.Constants.*; import com.fasterxml.jackson.core.JsonProcessingException; import com.linkedin.data.template.RecordTemplate; @@ -50,7 +36,9 @@ public class AspectTemplateEngine { DASHBOARD_INFO_ASPECT_NAME, STRUCTURED_PROPERTIES_ASPECT_NAME, STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME, - FORM_INFO_ASPECT_NAME) + FORM_INFO_ASPECT_NAME, + UPSTREAM_LINEAGE_ASPECT_NAME, + VERSION_PROPERTIES_ASPECT_NAME) .collect(Collectors.toSet()); private final Map> _aspectTemplateMap; diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/common/VersionPropertiesTemplate.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/common/VersionPropertiesTemplate.java new file mode 100644 index 00000000000000..2f7d24e2cdb4b7 --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/common/VersionPropertiesTemplate.java @@ -0,0 +1,44 @@ +package com.linkedin.metadata.aspect.patch.template.common; + +import com.fasterxml.jackson.databind.JsonNode; +import com.linkedin.common.VersionProperties; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.metadata.aspect.patch.template.Template; +import javax.annotation.Nonnull; + +public class VersionPropertiesTemplate implements Template { + + public static final String IS_LATEST_FIELD = "isLatest"; + + @Override + public VersionProperties getSubtype(RecordTemplate recordTemplate) throws ClassCastException { + if (recordTemplate instanceof VersionProperties) { + return (VersionProperties) recordTemplate; + } + throw new ClassCastException("Unable to cast RecordTemplate to VersionProperties"); + } + + @Override + public Class getTemplateType() { + return VersionProperties.class; + } + + @Nonnull + @Override + public VersionProperties getDefault() { + throw new UnsupportedOperationException( + "Unable to generate default version properties, no sensible default for " + "version set."); + } + + @Nonnull + @Override + public JsonNode transformFields(JsonNode baseNode) { + return baseNode; + } + + @Nonnull + @Override + public JsonNode rebaseFields(JsonNode patched) { + return patched; + } +} diff --git a/entity-registry/src/main/java/com/linkedin/metadata/entity/SearchRetriever.java b/entity-registry/src/main/java/com/linkedin/metadata/entity/SearchRetriever.java index d4894c97015f8f..19dc89d26cb1af 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/entity/SearchRetriever.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/entity/SearchRetriever.java @@ -1,6 +1,10 @@ package com.linkedin.metadata.entity; +import com.google.common.collect.ImmutableList; +import com.linkedin.metadata.query.SearchFlags; import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.query.filter.SortCriterion; +import com.linkedin.metadata.query.filter.SortOrder; import com.linkedin.metadata.search.ScrollResult; import com.linkedin.metadata.search.SearchEntityArray; import java.util.List; @@ -8,6 +12,40 @@ import javax.annotation.Nullable; public interface SearchRetriever { + + SearchFlags RETRIEVER_SEARCH_FLAGS = + new SearchFlags() + .setFulltext(false) + .setMaxAggValues(20) + .setSkipCache(false) + .setSkipAggregates(true) + .setSkipHighlighting(true) + .setIncludeSoftDeleted(false) + .setIncludeRestricted(false); + + SearchFlags RETRIEVER_SEARCH_FLAGS_NO_CACHE_ALL_VERSIONS = + new SearchFlags() + .setFulltext(false) + .setMaxAggValues(20) + .setSkipCache(true) + .setSkipAggregates(true) + .setSkipHighlighting(true) + .setIncludeSoftDeleted(false) + .setIncludeRestricted(false) + .setFilterNonLatestVersions(false); + + /** + * Allows for configuring the sort, should only be used when sort specified is unique. More often + * the default is desirable to just use the urnSort + */ + ScrollResult scroll( + @Nonnull List entities, + @Nullable Filter filters, + @Nullable String scrollId, + int count, + List sortCriteria, + @Nullable SearchFlags searchFlags); + /** * Returns search results for the given entities, filtered and sorted. * @@ -17,11 +55,17 @@ public interface SearchRetriever { * @param count size of a page * @return result of the search */ - ScrollResult scroll( + default ScrollResult scroll( @Nonnull List entities, @Nullable Filter filters, @Nullable String scrollId, - int count); + int count) { + SortCriterion urnSort = new SortCriterion(); + urnSort.setField("urn"); + urnSort.setOrder(SortOrder.ASCENDING); + return scroll( + entities, filters, scrollId, count, ImmutableList.of(urnSort), RETRIEVER_SEARCH_FLAGS); + } SearchRetriever EMPTY = new EmptySearchRetriever(); @@ -32,7 +76,9 @@ public ScrollResult scroll( @Nonnull List entities, @Nullable Filter filters, @Nullable String scrollId, - int count) { + int count, + List sortCriteria, + @Nullable SearchFlags searchFlags) { ScrollResult empty = new ScrollResult(); empty.setEntities(new SearchEntityArray()); empty.setNumEntities(0); diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/SnapshotEntityRegistry.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/SnapshotEntityRegistry.java index 16df2d452a619e..f4d6799bb476f5 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/SnapshotEntityRegistry.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/SnapshotEntityRegistry.java @@ -12,6 +12,7 @@ import com.linkedin.metadata.aspect.patch.template.common.GlossaryTermsTemplate; import com.linkedin.metadata.aspect.patch.template.common.OwnershipTemplate; import com.linkedin.metadata.aspect.patch.template.common.StructuredPropertiesTemplate; +import com.linkedin.metadata.aspect.patch.template.common.VersionPropertiesTemplate; import com.linkedin.metadata.aspect.patch.template.dashboard.DashboardInfoTemplate; import com.linkedin.metadata.aspect.patch.template.dataflow.DataFlowInfoTemplate; import com.linkedin.metadata.aspect.patch.template.datajob.DataJobInfoTemplate; @@ -113,6 +114,7 @@ private AspectTemplateEngine populateTemplateEngine(Map aspe aspectSpecTemplateMap.put( STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME, new StructuredPropertyDefinitionTemplate()); aspectSpecTemplateMap.put(FORM_INFO_ASPECT_NAME, new FormInfoTemplate()); + aspectSpecTemplateMap.put(VERSION_PROPERTIES_ASPECT_NAME, new VersionPropertiesTemplate()); return new AspectTemplateEngine(aspectSpecTemplateMap); } diff --git a/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java b/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java index 98a6d59004a92a..15f168f74a32df 100644 --- a/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java +++ b/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java @@ -20,11 +20,14 @@ import java.util.Set; import java.util.stream.Collectors; import javax.annotation.Nonnull; +import lombok.Getter; +import lombok.Setter; import org.mockito.Mockito; public class MockAspectRetriever implements CachingAspectRetriever { private final Map> data; private final Map> systemData = new HashMap<>(); + @Getter @Setter private EntityRegistry entityRegistry; public MockAspectRetriever(@Nonnull Map> data) { this.data = @@ -60,6 +63,7 @@ public MockAspectRetriever(@Nonnull Map> data) { .build()); } } + this.entityRegistry = Mockito.mock(EntityRegistry.class); } public MockAspectRetriever( @@ -71,6 +75,15 @@ public MockAspectRetriever(Urn propertyUrn, StructuredPropertyDefinition definit this(Map.of(propertyUrn, List.of(definition))); } + @Nonnull + public Map entityExists(Set urns) { + if (urns.isEmpty()) { + return Map.of(); + } else { + return urns.stream().collect(Collectors.toMap(urn -> urn, data::containsKey)); + } + } + @Nonnull @Override public Map> getLatestAspectObjects( @@ -90,10 +103,4 @@ public Map> getLatestSystemAspects( .map(urn -> Pair.of(urn, systemData.get(urn))) .collect(Collectors.toMap(Pair::getKey, Pair::getValue)); } - - @Nonnull - @Override - public EntityRegistry getEntityRegistry() { - return Mockito.mock(EntityRegistry.class); - } } diff --git a/li-utils/src/main/java/com/linkedin/metadata/Constants.java b/li-utils/src/main/java/com/linkedin/metadata/Constants.java index 01c33a2530efb5..463376edcdf259 100644 --- a/li-utils/src/main/java/com/linkedin/metadata/Constants.java +++ b/li-utils/src/main/java/com/linkedin/metadata/Constants.java @@ -128,6 +128,7 @@ public class Constants { public static final String INCIDENTS_SUMMARY_ASPECT_NAME = "incidentsSummary"; public static final String DOCUMENTATION_ASPECT_NAME = "documentation"; public static final String DATA_TRANSFORM_LOGIC_ASPECT_NAME = "dataTransformLogic"; + public static final String VERSION_PROPERTIES_ASPECT_NAME = "versionProperties"; // User public static final String CORP_USER_KEY_ASPECT_NAME = "corpUserKey"; @@ -464,6 +465,18 @@ public class Constants { // Incidents public static final String ENTITY_REF = "entities"; + // Version Set + public static final String VERSION_SET_ENTITY_NAME = "versionSet"; + public static final String VERSION_SET_KEY_ASPECT_NAME = "versionSetKey"; + public static final String VERSION_SET_PROPERTIES_ASPECT_NAME = "versionSetProperties"; + + // Versioning related + public static final String INITIAL_VERSION_SORT_ID = "AAAAAAAA"; + public static final String VERSION_SORT_ID_FIELD_NAME = "versionSortId"; + public static final String IS_LATEST_FIELD_NAME = "isLatest"; + + public static final String DISPLAY_PROPERTIES_ASPECT_NAME = "displayProperties"; + // Config public static final String ELASTICSEARCH_IMPLEMENTATION_OPENSEARCH = "opensearch"; public static final String ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH = "elasticsearch"; diff --git a/metadata-io/build.gradle b/metadata-io/build.gradle index 88bbfa2e10c4c1..aab29101b30f71 100644 --- a/metadata-io/build.gradle +++ b/metadata-io/build.gradle @@ -71,6 +71,7 @@ dependencies { testImplementation project(':datahub-graphql-core') testImplementation project(path: ':metadata-integration:java:datahub-client', configuration: 'shadow') testImplementation project(':metadata-service:auth-impl') + testImplementation project(':li-utils') testImplementation externalDependency.testng testImplementation externalDependency.h2 testImplementation externalDependency.mysqlConnector diff --git a/metadata-io/src/main/java/com/linkedin/metadata/client/EntityClientAspectRetriever.java b/metadata-io/src/main/java/com/linkedin/metadata/client/EntityClientAspectRetriever.java index 669ec751f87c69..bb9a5ad68c959b 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/client/EntityClientAspectRetriever.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/client/EntityClientAspectRetriever.java @@ -56,6 +56,25 @@ public Map> getLatestAspectObjects( } } + @Nonnull + public Map entityExists(Set urns) { + if (urns.isEmpty()) { + return Map.of(); + } else { + return urns.stream() + .collect( + Collectors.toMap( + urn -> urn, + urn -> { + try { + return entityClient.exists(systemOperationContext, urn); + } catch (RemoteInvocationException e) { + throw new RuntimeException(e); + } + })); + } + } + @Nonnull @Override public Map> getLatestSystemAspects( diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceAspectRetriever.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceAspectRetriever.java index 50cf8af30d606a..6ecf83b874dea0 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceAspectRetriever.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceAspectRetriever.java @@ -54,6 +54,18 @@ public Map> getLatestAspectObjects( } } + @Nonnull + public Map entityExists(Set urns) { + if (urns.isEmpty()) { + return Map.of(); + } else { + return urns.stream() + .collect( + Collectors.toMap( + urn -> urn, urn -> entityService.exists(systemOperationContext, urn))); + } + } + @Nonnull @Override public Map> getLatestSystemAspects( diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/AlphanumericSortIdGenerator.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/AlphanumericSortIdGenerator.java new file mode 100644 index 00000000000000..40553b338741f8 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/AlphanumericSortIdGenerator.java @@ -0,0 +1,70 @@ +package com.linkedin.metadata.entity.versioning; + +public class AlphanumericSortIdGenerator { + + private AlphanumericSortIdGenerator() {} + + private static final int STRING_LENGTH = 8; + private static final char[] ALLOWED_CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".toCharArray(); + + /** + * Increments an 8-character alphanumeric string. For example: "AAAAAAAA" -> "AAAAAAAB" "AAAAAAAZ" + * -> "AAAAAABA" + * + * @param currentId The current 8-character string + * @return The next string in sequence + * @throws IllegalArgumentException if input string is not 8 characters or contains invalid + * characters + */ + public static String increment(String currentId) { + if (currentId == null || currentId.length() != STRING_LENGTH) { + throw new IllegalArgumentException("Input string must be exactly 8 characters long"); + } + + // Convert string to char array for manipulation + char[] currentIdChars = currentId.toCharArray(); + + // Validate input characters + for (char c : currentIdChars) { + if (getCharIndex(c) == -1) { + throw new IllegalArgumentException("Invalid character in input string: " + c); + } + } + + // Start from rightmost position + for (int i = STRING_LENGTH - 1; i >= 0; i--) { + int currentCharIndex = getCharIndex(currentIdChars[i]); + + // If current character is not the last allowed character, + // simply increment it and we're done + if (currentCharIndex < ALLOWED_CHARS.length - 1) { + currentIdChars[i] = ALLOWED_CHARS[currentCharIndex + 1]; + return new String(currentIdChars); + } + + // If we're here, we need to carry over to next position + currentIdChars[i] = ALLOWED_CHARS[0]; + + // If we're at the leftmost position and need to carry, + // we've reached maximum value and need to wrap around + if (i == 0) { + return "AAAAAAAA"; + } + } + + // Should never reach here + throw new RuntimeException("Unexpected error in increment operation"); + } + + /** + * Gets the index of a character in the ALLOWED_CHARS array. Returns -1 if character is not found. + */ + private static int getCharIndex(char c) { + for (int i = 0; i < ALLOWED_CHARS.length; i++) { + if (ALLOWED_CHARS[i] == c) { + return i; + } + } + return -1; + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/EntityVersioningServiceImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/EntityVersioningServiceImpl.java new file mode 100644 index 00000000000000..48f5a00e9e8d5a --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/EntityVersioningServiceImpl.java @@ -0,0 +1,356 @@ +package com.linkedin.metadata.entity.versioning; + +import static com.linkedin.metadata.Constants.INITIAL_VERSION_SORT_ID; +import static com.linkedin.metadata.Constants.VERSION_PROPERTIES_ASPECT_NAME; +import static com.linkedin.metadata.Constants.VERSION_SET_ENTITY_NAME; +import static com.linkedin.metadata.Constants.VERSION_SET_KEY_ASPECT_NAME; +import static com.linkedin.metadata.Constants.VERSION_SET_PROPERTIES_ASPECT_NAME; +import static com.linkedin.metadata.Constants.VERSION_SORT_ID_FIELD_NAME; +import static com.linkedin.metadata.aspect.validation.ConditionalWriteValidator.HTTP_HEADER_IF_VERSION_MATCH; + +import com.datahub.util.RecordUtils; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import com.linkedin.common.AuditStamp; +import com.linkedin.common.MetadataAttribution; +import com.linkedin.common.VersionProperties; +import com.linkedin.common.VersionTag; +import com.linkedin.common.urn.CorpuserUrn; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.template.SetMode; +import com.linkedin.data.template.StringMap; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.SystemAspect; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.IngestResult; +import com.linkedin.metadata.entity.RollbackResult; +import com.linkedin.metadata.entity.RollbackRunResult; +import com.linkedin.metadata.entity.SearchRetriever; +import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; +import com.linkedin.metadata.key.VersionSetKey; +import com.linkedin.metadata.query.filter.Condition; +import com.linkedin.metadata.query.filter.SortCriterion; +import com.linkedin.metadata.query.filter.SortOrder; +import com.linkedin.metadata.search.ScrollResult; +import com.linkedin.metadata.search.SearchEntity; +import com.linkedin.metadata.search.SearchEntityArray; +import com.linkedin.metadata.search.utils.QueryUtils; +import com.linkedin.metadata.utils.CriterionUtils; +import com.linkedin.metadata.utils.EntityKeyUtils; +import com.linkedin.metadata.utils.GenericRecordUtils; +import com.linkedin.mxe.MetadataChangeProposal; +import com.linkedin.versionset.VersionSetProperties; +import com.linkedin.versionset.VersioningScheme; +import io.datahubproject.metadata.context.OperationContext; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class EntityVersioningServiceImpl implements EntityVersioningService { + + private final EntityService entityService; + + public EntityVersioningServiceImpl(EntityService entityService) { + this.entityService = entityService; + } + + /** + * Generates a new set of VersionProperties for the latest version and links it to the specified + * version set. If the specified version set does not yet exist, will create it. Order of + * operations here is important: 1. Create initial Version Set if necessary, do not generate + * Version Set Properties 2. Create Version Properties for specified entity. If this aspect + * already exists will fail. 3. Generate version properties with the properly set latest version + * Will eventually want to add in the scheme here as a parameter + * + * @return ingestResult -> the results of the ingested linked version + */ + @Override + public List linkLatestVersion( + OperationContext opContext, + Urn versionSet, + Urn newLatestVersion, + VersionPropertiesInput inputProperties) { + List proposals = new ArrayList<>(); + AspectRetriever aspectRetriever = opContext.getAspectRetriever(); + String sortId; + Long versionSetConstraint; + Long versionPropertiesConstraint; + VersionSetKey versionSetKey = + (VersionSetKey) + EntityKeyUtils.convertUrnToEntityKey( + versionSet, opContext.getEntityRegistryContext().getKeyAspectSpec(versionSet)); + if (!versionSetKey.getEntityType().equals(newLatestVersion.getEntityType())) { + throw new IllegalArgumentException( + "Entity type must match Version Set's specified type: " + + versionSetKey.getEntityType() + + " invalid type: " + + newLatestVersion.getEntityType()); + } + if (!aspectRetriever.entityExists(ImmutableSet.of(versionSet)).get(versionSet)) { + MetadataChangeProposal versionSetKeyProposal = new MetadataChangeProposal(); + versionSetKeyProposal.setEntityUrn(versionSet); + versionSetKeyProposal.setEntityType(VERSION_SET_ENTITY_NAME); + versionSetKeyProposal.setAspectName(VERSION_SET_KEY_ASPECT_NAME); + versionSetKeyProposal.setAspect(GenericRecordUtils.serializeAspect(versionSetKey)); + versionSetKeyProposal.setChangeType(ChangeType.CREATE_ENTITY); + entityService.ingestProposal( + opContext, versionSetKeyProposal, opContext.getAuditStamp(), false); + + sortId = INITIAL_VERSION_SORT_ID; + versionSetConstraint = -1L; + versionPropertiesConstraint = -1L; + } else { + SystemAspect versionSetPropertiesAspect = + aspectRetriever.getLatestSystemAspect(versionSet, VERSION_SET_PROPERTIES_ASPECT_NAME); + VersionSetProperties versionSetProperties = + RecordUtils.toRecordTemplate( + VersionSetProperties.class, versionSetPropertiesAspect.getRecordTemplate().data()); + versionSetConstraint = + versionSetPropertiesAspect + .getSystemMetadataVersion() + .orElse(versionSetPropertiesAspect.getVersion()); + SystemAspect latestVersion = + aspectRetriever.getLatestSystemAspect( + versionSetProperties.getLatest(), VERSION_PROPERTIES_ASPECT_NAME); + VersionProperties latestVersionProperties = + RecordUtils.toRecordTemplate( + VersionProperties.class, latestVersion.getRecordTemplate().data()); + versionPropertiesConstraint = + latestVersion.getSystemMetadataVersion().orElse(latestVersion.getVersion()); + // When more impls for versioning scheme are set up, this will need to be resolved to the + // correct scheme generation strategy + sortId = AlphanumericSortIdGenerator.increment(latestVersionProperties.getSortId()); + } + + SystemAspect currentVersionPropertiesAspect = + aspectRetriever.getLatestSystemAspect(newLatestVersion, VERSION_PROPERTIES_ASPECT_NAME); + if (currentVersionPropertiesAspect != null) { + VersionProperties currentVersionProperties = + RecordUtils.toRecordTemplate( + VersionProperties.class, currentVersionPropertiesAspect.getRecordTemplate().data()); + if (currentVersionProperties.getVersionSet().equals(versionSet)) { + return new ArrayList<>(); + } else { + throw new IllegalStateException( + String.format( + "Version already exists for specified entity: %s for a different Version Set: %s", + newLatestVersion, currentVersionProperties.getVersionSet())); + } + } + + VersionTag versionTag = new VersionTag(); + versionTag.setVersionTag(inputProperties.getVersion()); + MetadataAttribution metadataAttribution = new MetadataAttribution(); + metadataAttribution.setActor(opContext.getActorContext().getActorUrn()); + metadataAttribution.setTime(System.currentTimeMillis()); + versionTag.setMetadataAttribution(metadataAttribution); + VersionProperties versionProperties = + new VersionProperties() + .setVersionSet(versionSet) + .setComment(inputProperties.getComment(), SetMode.IGNORE_NULL) + .setVersion(versionTag) + .setMetadataCreatedTimestamp(opContext.getAuditStamp()) + .setSortId(sortId); + if (inputProperties.getSourceCreationTimestamp() != null) { + + AuditStamp sourceCreatedAuditStamp = + new AuditStamp().setTime(inputProperties.getSourceCreationTimestamp()); + Urn actor = null; + if (inputProperties.getSourceCreator() != null) { + actor = new CorpuserUrn(inputProperties.getSourceCreator()); + } + sourceCreatedAuditStamp.setActor(UrnUtils.getActorOrDefault(actor)); + + versionProperties.setSourceCreatedTimestamp(sourceCreatedAuditStamp); + } + MetadataChangeProposal versionPropertiesProposal = new MetadataChangeProposal(); + versionPropertiesProposal.setEntityUrn(newLatestVersion); + versionPropertiesProposal.setEntityType(newLatestVersion.getEntityType()); + versionPropertiesProposal.setAspectName(VERSION_PROPERTIES_ASPECT_NAME); + versionPropertiesProposal.setAspect(GenericRecordUtils.serializeAspect(versionProperties)); + versionPropertiesProposal.setChangeType(ChangeType.UPSERT); + StringMap headerMap = new StringMap(); + headerMap.put(HTTP_HEADER_IF_VERSION_MATCH, versionPropertiesConstraint.toString()); + versionPropertiesProposal.setChangeType(ChangeType.UPSERT); + proposals.add(versionPropertiesProposal); + + // Might want to refactor this to a Patch w/ Create if not exists logic if more properties get + // added + // to Version Set Properties + VersionSetProperties versionSetProperties = + new VersionSetProperties() + .setVersioningScheme( + VersioningScheme + .ALPHANUMERIC_GENERATED_BY_DATAHUB) // Only one available, will need to add to + // input properties once more are added. + .setLatest(newLatestVersion); + MetadataChangeProposal versionSetPropertiesProposal = new MetadataChangeProposal(); + versionSetPropertiesProposal.setEntityUrn(versionSet); + versionSetPropertiesProposal.setEntityType(VERSION_SET_ENTITY_NAME); + versionSetPropertiesProposal.setAspectName(VERSION_SET_PROPERTIES_ASPECT_NAME); + versionSetPropertiesProposal.setAspect( + GenericRecordUtils.serializeAspect(versionSetProperties)); + versionSetPropertiesProposal.setChangeType(ChangeType.UPSERT); + StringMap versionSetHeaderMap = new StringMap(); + versionSetHeaderMap.put(HTTP_HEADER_IF_VERSION_MATCH, versionSetConstraint.toString()); + versionSetPropertiesProposal.setHeaders(versionSetHeaderMap); + proposals.add(versionSetPropertiesProposal); + + return entityService.ingestProposal( + opContext, + AspectsBatchImpl.builder() + .mcps(proposals, opContext.getAuditStamp(), opContext.getRetrieverContext()) + .build(), + false); + } + + /** + * Unlinks a version from a version set. Will attempt to set up the previous version as the new + * latest. This fully removes the version properties and unversions the specified entity. + * + * @param opContext operational context containing various information about the current execution + * @param linkedVersion the currently linked latest versioned entity urn + * @return the deletion result + */ + @Override + public List unlinkVersion( + OperationContext opContext, Urn versionSet, Urn linkedVersion) { + List deletedAspects = new ArrayList<>(); + AspectRetriever aspectRetriever = opContext.getAspectRetriever(); + SystemAspect linkedVersionPropertiesAspect = + aspectRetriever.getLatestSystemAspect(linkedVersion, VERSION_PROPERTIES_ASPECT_NAME); + // Not currently versioned, do nothing + if (linkedVersionPropertiesAspect == null) { + return deletedAspects; + } + VersionProperties linkedVersionProperties = + RecordUtils.toRecordTemplate( + VersionProperties.class, linkedVersionPropertiesAspect.getRecordTemplate().data()); + Urn versionSetUrn = linkedVersionProperties.getVersionSet(); + if (!versionSet.equals(versionSetUrn)) { + throw new IllegalArgumentException( + String.format( + "Version is not linked to specified version set: %s but is linked to: %s", + versionSet, versionSetUrn)); + } + // Delete latest version properties + entityService + .deleteAspect( + opContext, + linkedVersion.toString(), + VERSION_PROPERTIES_ASPECT_NAME, + Collections.emptyMap(), + true) + .ifPresent(deletedAspects::add); + + // Get Version Set details + VersionSetKey versionSetKey = + (VersionSetKey) + EntityKeyUtils.convertUrnToEntityKey( + versionSetUrn, + opContext.getEntityRegistryContext().getKeyAspectSpec(versionSetUrn)); + SearchRetriever searchRetriever = opContext.getRetrieverContext().getSearchRetriever(); + + // Find current latest version and previous + ScrollResult linkedVersions = + searchRetriever.scroll( + ImmutableList.of(versionSetKey.getEntityType()), + QueryUtils.newConjunctiveFilter( + CriterionUtils.buildCriterion( + "versionSet", Condition.EQUAL, versionSetUrn.toString())), + null, + 2, + ImmutableList.of( + new SortCriterion() + .setField(VERSION_SORT_ID_FIELD_NAME) + .setOrder(SortOrder.DESCENDING)), + SearchRetriever.RETRIEVER_SEARCH_FLAGS_NO_CACHE_ALL_VERSIONS); + String updatedLatestVersionUrn = null; + + SearchEntityArray linkedEntities = linkedVersions.getEntities(); + SystemAspect versionSetPropertiesAspect = + aspectRetriever.getLatestSystemAspect(versionSetUrn, VERSION_SET_PROPERTIES_ASPECT_NAME); + if (versionSetPropertiesAspect == null) { + throw new IllegalStateException( + String.format( + "Version Set Properties must exist if entity version exists: %s", versionSetUrn)); + } + VersionSetProperties versionSetProperties = + RecordUtils.toRecordTemplate( + VersionSetProperties.class, versionSetPropertiesAspect.getRecordTemplate().data()); + long versionConstraint = + versionSetPropertiesAspect + .getSystemMetadataVersion() + .orElse(versionSetPropertiesAspect.getVersion()); + boolean isLatest = linkedVersion.equals(versionSetProperties.getLatest()); + + if (linkedEntities.size() == 2 && isLatest) { + // If the version to unlink is the same as the last search result and is currently the latest + // based on SQL, set to one immediately before. + // Otherwise set to most current one in search results assuming we have not gotten the index + // update for a recent update to latest. + // Does assume that there are not multiple index updates waiting in the queue so rapid fire + // updates intermixed with deletes should be avoided. + SearchEntity maybeLatestVersion = linkedEntities.get(0); + if (maybeLatestVersion.getEntity().equals(linkedVersion)) { + SearchEntity priorLatestVersion = linkedEntities.get(1); + updatedLatestVersionUrn = priorLatestVersion.getEntity().toString(); + } else { + updatedLatestVersionUrn = maybeLatestVersion.getEntity().toString(); + } + + } else if (linkedEntities.size() == 1 && isLatest) { + // Missing a version, if that version is not the one being unlinked then set as latest + // version. Same reasoning as above + SearchEntity maybePriorLatestVersion = linkedEntities.get(0); + if (!linkedVersion.equals(maybePriorLatestVersion.getEntity())) { + updatedLatestVersionUrn = maybePriorLatestVersion.getEntity().toString(); + } else { + // Delete Version Set if we are removing the last version + // TODO: Conditional deletes impl + only do the delete if version match + RollbackRunResult deleteResult = entityService.deleteUrn(opContext, versionSetUrn); + deletedAspects.addAll(deleteResult.getRollbackResults()); + } + } + + if (updatedLatestVersionUrn != null) { + + // Might want to refactor this to a Patch w/ Create if not exists logic if more properties + // get added + // to Version Set Properties + VersionSetProperties newVersionSetProperties = + new VersionSetProperties() + .setVersioningScheme( + VersioningScheme + .ALPHANUMERIC_GENERATED_BY_DATAHUB) // Only one available, will need to add + // to input properties once more are + // added. + .setLatest(UrnUtils.getUrn(updatedLatestVersionUrn)); + MetadataChangeProposal versionSetPropertiesProposal = new MetadataChangeProposal(); + versionSetPropertiesProposal.setEntityUrn(versionSetUrn); + versionSetPropertiesProposal.setEntityType(VERSION_SET_ENTITY_NAME); + versionSetPropertiesProposal.setAspectName(VERSION_SET_PROPERTIES_ASPECT_NAME); + versionSetPropertiesProposal.setAspect( + GenericRecordUtils.serializeAspect(newVersionSetProperties)); + versionSetPropertiesProposal.setChangeType(ChangeType.UPSERT); + StringMap headerMap = new StringMap(); + headerMap.put(HTTP_HEADER_IF_VERSION_MATCH, Long.toString(versionConstraint)); + versionSetPropertiesProposal.setHeaders(headerMap); + entityService.ingestProposal( + opContext, + AspectsBatchImpl.builder() + .mcps( + ImmutableList.of(versionSetPropertiesProposal), + opContext.getAuditStamp(), + opContext.getRetrieverContext()) + .build(), + false); + } + + return deletedAspects; + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/sideeffects/VersionSetSideEffect.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/sideeffects/VersionSetSideEffect.java new file mode 100644 index 00000000000000..7e9692841c79ae --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/sideeffects/VersionSetSideEffect.java @@ -0,0 +1,137 @@ +package com.linkedin.metadata.entity.versioning.sideeffects; + +import static com.linkedin.metadata.Constants.VERSION_PROPERTIES_ASPECT_NAME; +import static com.linkedin.metadata.Constants.VERSION_SET_PROPERTIES_ASPECT_NAME; + +import com.linkedin.common.urn.Urn; +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.batch.MCLItem; +import com.linkedin.metadata.aspect.batch.MCPItem; +import com.linkedin.metadata.aspect.patch.GenericJsonPatch; +import com.linkedin.metadata.aspect.patch.PatchOperationType; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.hooks.MCPSideEffect; +import com.linkedin.metadata.entity.ebean.batch.PatchItemImpl; +import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.versionset.VersionSetProperties; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.stream.Stream; +import javax.annotation.Nonnull; +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.Accessors; +import lombok.extern.slf4j.Slf4j; + +/** + * Side effect that updates the isLatest property for the referenced versioned entity's Version + * Properties aspect. + */ +@Slf4j +@Getter +@Setter +@Accessors(chain = true) +public class VersionSetSideEffect extends MCPSideEffect { + @Nonnull private AspectPluginConfig config; + + @Override + protected Stream applyMCPSideEffect( + Collection changeMCPS, @Nonnull RetrieverContext retrieverContext) { + return Stream.of(); + } + + @Override + protected Stream postMCPSideEffect( + Collection mclItems, @Nonnull RetrieverContext retrieverContext) { + return mclItems.stream().flatMap(item -> updateLatest(item, retrieverContext)); + } + + private static Stream updateLatest( + MCLItem mclItem, @Nonnull RetrieverContext retrieverContext) { + + if (VERSION_SET_PROPERTIES_ASPECT_NAME.equals(mclItem.getAspectName())) { + List mcpItems = new ArrayList<>(); + VersionSetProperties versionSetProperties = mclItem.getAspect(VersionSetProperties.class); + if (versionSetProperties == null) { + log.error("Unable to process version set properties for urn: {}", mclItem.getUrn()); + return Stream.empty(); + } + // Set old latest isLatest to false, set new latest isLatest to true + // This side effect assumes the entity is already versioned, if it is not yet versioned it + // will fail due + // to not having set default values for the aspect. This creates an implicit ordering of when + // aspects should be + // updated. Version Properties first, then Version Set Properties. + Urn newLatest = versionSetProperties.getLatest(); + + VersionSetProperties previousVersionSetProperties = + mclItem.getPreviousAspect(VersionSetProperties.class); + if (previousVersionSetProperties != null) { + Urn previousLatest = previousVersionSetProperties.getLatest(); + if (!newLatest.equals(previousLatest) + && retrieverContext + .getAspectRetriever() + .entityExists(Collections.singleton(previousLatest)) + .getOrDefault(previousLatest, false)) { + EntitySpec entitySpec = + retrieverContext + .getAspectRetriever() + .getEntityRegistry() + .getEntitySpec(previousLatest.getEntityType()); + GenericJsonPatch.PatchOp previousPatch = new GenericJsonPatch.PatchOp(); + previousPatch.setOp(PatchOperationType.ADD.getValue()); + previousPatch.setPath("/isLatest"); + previousPatch.setValue(false); + mcpItems.add( + PatchItemImpl.builder() + .urn(previousLatest) + .entitySpec(entitySpec) + .aspectName(VERSION_PROPERTIES_ASPECT_NAME) + .aspectSpec(entitySpec.getAspectSpec(VERSION_PROPERTIES_ASPECT_NAME)) + .patch( + GenericJsonPatch.builder() + .patch(List.of(previousPatch)) + .build() + .getJsonPatch()) + .auditStamp(mclItem.getAuditStamp()) + .systemMetadata(mclItem.getSystemMetadata()) + .build(retrieverContext.getAspectRetriever().getEntityRegistry())); + } + } + + // Explicitly error here to avoid downstream patch error with less context + if (retrieverContext + .getAspectRetriever() + .getLatestAspectObject(newLatest, VERSION_PROPERTIES_ASPECT_NAME) + == null) { + throw new UnsupportedOperationException( + "Cannot set latest version to unversioned entity: " + newLatest); + } + + EntitySpec entitySpec = + retrieverContext + .getAspectRetriever() + .getEntityRegistry() + .getEntitySpec(newLatest.getEntityType()); + GenericJsonPatch.PatchOp currentPatch = new GenericJsonPatch.PatchOp(); + currentPatch.setOp(PatchOperationType.ADD.getValue()); + currentPatch.setPath("/isLatest"); + currentPatch.setValue(true); + mcpItems.add( + PatchItemImpl.builder() + .urn(newLatest) + .entitySpec(entitySpec) + .aspectName(VERSION_PROPERTIES_ASPECT_NAME) + .aspectSpec(entitySpec.getAspectSpec(VERSION_PROPERTIES_ASPECT_NAME)) + .patch(GenericJsonPatch.builder().patch(List.of(currentPatch)).build().getJsonPatch()) + .auditStamp(mclItem.getAuditStamp()) + .systemMetadata(mclItem.getSystemMetadata()) + .build(retrieverContext.getAspectRetriever().getEntityRegistry())); + return mcpItems.stream(); + } + return Stream.empty(); + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/validation/VersionPropertiesValidator.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/validation/VersionPropertiesValidator.java new file mode 100644 index 00000000000000..4d29cc254c1ba6 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/validation/VersionPropertiesValidator.java @@ -0,0 +1,158 @@ +package com.linkedin.metadata.entity.versioning.validation; + +import static com.linkedin.metadata.Constants.VERSION_PROPERTIES_ASPECT_NAME; +import static com.linkedin.metadata.Constants.VERSION_SET_KEY_ASPECT_NAME; +import static com.linkedin.metadata.Constants.VERSION_SET_PROPERTIES_ASPECT_NAME; + +import com.datahub.util.RecordUtils; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableSet; +import com.linkedin.common.VersionProperties; +import com.linkedin.common.urn.Urn; +import com.linkedin.entity.Aspect; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.batch.BatchItem; +import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.validation.AspectPayloadValidator; +import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; +import com.linkedin.metadata.aspect.plugins.validation.ValidationExceptionCollection; +import com.linkedin.metadata.entity.ebean.batch.PatchItemImpl; +import com.linkedin.metadata.key.VersionSetKey; +import com.linkedin.versionset.VersionSetProperties; +import com.linkedin.versionset.VersioningScheme; +import java.util.Collection; +import java.util.Collections; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import javax.annotation.Nonnull; +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.Accessors; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang.StringUtils; + +@Setter +@Getter +@Slf4j +@Accessors(chain = true) +public class VersionPropertiesValidator extends AspectPayloadValidator { + + @Nonnull private AspectPluginConfig config; + + private static final Set SHOULD_VALIDATE_PROPOSED = + ImmutableSet.of(ChangeType.UPDATE, ChangeType.UPSERT, ChangeType.CREATE); + + @Override + protected Stream validateProposedAspects( + @Nonnull Collection mcpItems, + @Nonnull RetrieverContext retrieverContext) { + return validatePropertiesProposals( + mcpItems.stream() + .filter(mcpItem -> VERSION_PROPERTIES_ASPECT_NAME.equals(mcpItem.getAspectName())) + .filter(mcpItem -> SHOULD_VALIDATE_PROPOSED.contains(mcpItem.getChangeType())) + .collect(Collectors.toList())); + } + + @Override + protected Stream validatePreCommitAspects( + @Nonnull Collection changeMCPs, @Nonnull RetrieverContext retrieverContext) { + return validatePropertiesUpserts( + changeMCPs.stream() + .filter(changeMCP -> VERSION_PROPERTIES_ASPECT_NAME.equals(changeMCP.getAspectName())) + .collect(Collectors.toList()), + retrieverContext); + } + + @VisibleForTesting + public static Stream validatePropertiesUpserts( + @Nonnull Collection mcpItems, + @Nonnull RetrieverContext retrieverContext) { + ValidationExceptionCollection exceptions = ValidationExceptionCollection.newCollection(); + for (BatchItem mcpItem : mcpItems) { + VersionProperties versionProperties = mcpItem.getAspect(VersionProperties.class); + // Validate Version Set entity type + Urn versionSetUrn = versionProperties.getVersionSet(); + Map aspects = + retrieverContext + .getAspectRetriever() + .getLatestAspectObjects( + Collections.singleton(versionSetUrn), + ImmutableSet.of(VERSION_SET_KEY_ASPECT_NAME, VERSION_SET_PROPERTIES_ASPECT_NAME)) + .get(versionSetUrn); + if (aspects == null || aspects.isEmpty()) { + exceptions.addException(mcpItem, "Version Set specified does not exist: " + versionSetUrn); + continue; + } + Optional keyAspect = Optional.ofNullable(aspects.get(VERSION_SET_KEY_ASPECT_NAME)); + if (keyAspect.isPresent()) { + VersionSetKey versionSetKey = + RecordUtils.toRecordTemplate(VersionSetKey.class, keyAspect.get().data()); + if (!mcpItem.getEntitySpec().getName().equals(versionSetKey.getEntityType())) { + exceptions.addException( + mcpItem, + "Version Set specified entity type does not match, expected type: " + + versionSetKey.getEntityType()); + } + + // Validate sort ID scheme + String sortId = versionProperties.getSortId(); + Optional versionSetPropertiesAspect = + Optional.ofNullable(aspects.get(VERSION_SET_PROPERTIES_ASPECT_NAME)); + // Validate sort id matches scheme if version set properties exist + if (versionSetPropertiesAspect.isPresent()) { + VersionSetProperties versionSetProperties = + RecordUtils.toRecordTemplate( + VersionSetProperties.class, versionSetPropertiesAspect.get().data()); + VersioningScheme versioningScheme = versionSetProperties.getVersioningScheme(); + switch (versioningScheme) { + case ALPHANUMERIC_GENERATED_BY_DATAHUB: + validateDataHubGeneratedScheme(sortId, exceptions, mcpItem); + break; + default: + exceptions.addException(mcpItem, "Unsupported scheme type: " + versioningScheme); + } + } + } else { + exceptions.addException(mcpItem, "Version Set specified does not exist: " + versionSetUrn); + } + } + return exceptions.streamAllExceptions(); + } + + private static void validateDataHubGeneratedScheme( + String sortId, ValidationExceptionCollection exceptions, BatchItem mcpItem) { + if (!(sortId.length() == 8 + && StringUtils.isAllUpperCase(sortId) + && StringUtils.isAlpha(sortId))) { + exceptions.addException( + mcpItem, + "Invalid sortID for Versioning Scheme. ID: " + + sortId + + " Scheme: " + + VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB); + } + } + + @VisibleForTesting + public static Stream validatePropertiesProposals( + @Nonnull Collection mcpItems) { + ValidationExceptionCollection exceptions = ValidationExceptionCollection.newCollection(); + for (BatchItem mcpItem : mcpItems) { + if (mcpItem instanceof PatchItemImpl) { + throw new IllegalStateException("Patch item must have change type of PATCH."); + } + VersionProperties versionProperties = mcpItem.getAspect(VersionProperties.class); + // Validate isLatest not set + if (versionProperties.hasIsLatest()) { + exceptions.addException( + mcpItem, "IsLatest should not be specified, this is a computed field."); + } + } + return exceptions.streamAllExceptions(); + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/validation/VersionSetPropertiesValidator.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/validation/VersionSetPropertiesValidator.java new file mode 100644 index 00000000000000..8a7795f29ccfe0 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/validation/VersionSetPropertiesValidator.java @@ -0,0 +1,80 @@ +package com.linkedin.metadata.entity.versioning.validation; + +import static com.linkedin.metadata.Constants.VERSION_SET_PROPERTIES_ASPECT_NAME; + +import com.datahub.util.RecordUtils; +import com.google.common.annotations.VisibleForTesting; +import com.linkedin.entity.Aspect; +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.batch.BatchItem; +import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.validation.AspectPayloadValidator; +import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; +import com.linkedin.metadata.aspect.plugins.validation.ValidationExceptionCollection; +import com.linkedin.versionset.VersionSetProperties; +import java.util.Collection; +import java.util.Optional; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import javax.annotation.Nonnull; +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.Accessors; +import lombok.extern.slf4j.Slf4j; + +@Setter +@Getter +@Slf4j +@Accessors(chain = true) +public class VersionSetPropertiesValidator extends AspectPayloadValidator { + + @Nonnull private AspectPluginConfig config; + + @Override + protected Stream validateProposedAspects( + @Nonnull Collection mcpItems, + @Nonnull RetrieverContext retrieverContext) { + return validatePropertiesUpserts( + mcpItems.stream() + .filter(i -> VERSION_SET_PROPERTIES_ASPECT_NAME.equals(i.getAspectName())) + .collect(Collectors.toList()), + retrieverContext); + } + + @Override + protected Stream validatePreCommitAspects( + @Nonnull Collection changeMCPs, @Nonnull RetrieverContext retrieverContext) { + return Stream.empty(); + } + + @VisibleForTesting + public static Stream validatePropertiesUpserts( + @Nonnull Collection mcpItems, + @Nonnull RetrieverContext retrieverContext) { + ValidationExceptionCollection exceptions = ValidationExceptionCollection.newCollection(); + for (BatchItem mcpItem : mcpItems) { + VersionSetProperties versionSetProperties = mcpItem.getAspect(VersionSetProperties.class); + Optional aspect = + Optional.ofNullable( + retrieverContext + .getAspectRetriever() + .getLatestAspectObject(mcpItem.getUrn(), VERSION_SET_PROPERTIES_ASPECT_NAME)); + if (aspect.isPresent()) { + VersionSetProperties previousVersionSetProperties = + RecordUtils.toRecordTemplate(VersionSetProperties.class, aspect.get().data()); + if (!previousVersionSetProperties + .getVersioningScheme() + .equals(versionSetProperties.getVersioningScheme())) { + exceptions.addException( + mcpItem, + "Versioning Scheme cannot change. Expected Scheme: " + + previousVersionSetProperties.getVersioningScheme() + + " Provided Scheme: " + + versionSetProperties.getVersioningScheme()); + } + } + } + return exceptions.streamAllExceptions(); + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/SearchServiceSearchRetriever.java b/metadata-io/src/main/java/com/linkedin/metadata/search/SearchServiceSearchRetriever.java index 8d7548e0ba90a1..dae119beec4a7e 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/SearchServiceSearchRetriever.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/SearchServiceSearchRetriever.java @@ -6,7 +6,9 @@ import com.linkedin.metadata.query.filter.SortCriterion; import com.linkedin.metadata.query.filter.SortOrder; import io.datahubproject.metadata.context.OperationContext; +import java.util.ArrayList; import java.util.List; +import java.util.Optional; import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.Builder; @@ -16,15 +18,6 @@ @Getter @Builder public class SearchServiceSearchRetriever implements SearchRetriever { - private static final SearchFlags RETRIEVER_SEARCH_FLAGS = - new SearchFlags() - .setFulltext(false) - .setMaxAggValues(20) - .setSkipCache(false) - .setSkipAggregates(true) - .setSkipHighlighting(true) - .setIncludeSoftDeleted(false) - .setIncludeRestricted(false); @Setter private OperationContext systemOperationContext; private final SearchService searchService; @@ -34,16 +27,24 @@ public ScrollResult scroll( @Nonnull List entities, @Nullable Filter filters, @Nullable String scrollId, - int count) { - SortCriterion urnSort = new SortCriterion(); - urnSort.setField("urn"); - urnSort.setOrder(SortOrder.ASCENDING); + int count, + List sortCriteria, + @Nullable SearchFlags searchFlags) { + List finalCriteria = new ArrayList<>(sortCriteria); + if (sortCriteria.stream().noneMatch(sortCriterion -> "urn".equals(sortCriterion.getField()))) { + SortCriterion urnSort = new SortCriterion(); + urnSort.setField("urn"); + urnSort.setOrder(SortOrder.ASCENDING); + finalCriteria.add(urnSort); + } + final SearchFlags finalSearchFlags = + Optional.ofNullable(searchFlags).orElse(RETRIEVER_SEARCH_FLAGS); return searchService.scrollAcrossEntities( - systemOperationContext.withSearchFlags(flags -> RETRIEVER_SEARCH_FLAGS), + systemOperationContext.withSearchFlags(flags -> finalSearchFlags), entities, "*", filters, - List.of(urnSort), + finalCriteria, scrollId, null, count); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java b/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java index b4ad847cb7afc2..7a60b89d0127cc 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java @@ -147,7 +147,7 @@ private static Set objectFieldsFilter(Iterator fieldNames) { public Optional transformAspect( @Nonnull OperationContext opContext, final @Nonnull Urn urn, - final @Nonnull RecordTemplate aspect, + final @Nullable RecordTemplate aspect, final @Nonnull AspectSpec aspectSpec, final Boolean forDelete) throws RemoteInvocationException, URISyntaxException { diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java index 17bbbaf059dec4..95fff81d13957c 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java @@ -20,10 +20,12 @@ import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.Criterion; +import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriterContext; +import com.linkedin.metadata.utils.CriterionUtils; import io.datahubproject.metadata.context.OperationContext; import java.util.Collections; import java.util.HashMap; @@ -188,6 +190,13 @@ public static BoolQueryBuilder buildFilterQuery( }); finalQueryBuilder.should(andQueryBuilder); } + if (Boolean.TRUE.equals( + opContext.getSearchContext().getSearchFlags().isFilterNonLatestVersions())) { + BoolQueryBuilder filterNonLatestVersions = + ESUtils.buildFilterNonLatestEntities( + opContext, queryFilterRewriteChain, searchableFieldTypes); + finalQueryBuilder.must(filterNonLatestVersions); + } if (!finalQueryBuilder.should().isEmpty()) { finalQueryBuilder.minimumShouldMatch(1); } @@ -869,4 +878,31 @@ private static void filterSoftDeletedByDefault( } } } + + public static BoolQueryBuilder buildFilterNonLatestEntities( + OperationContext opContext, + QueryFilterRewriteChain queryFilterRewriteChain, + Map> searchableFieldTypes) { + ConjunctiveCriterion isLatestCriterion = new ConjunctiveCriterion(); + CriterionArray isLatestCriterionArray = new CriterionArray(); + isLatestCriterionArray.add( + CriterionUtils.buildCriterion(IS_LATEST_FIELD_NAME, Condition.EQUAL, "true")); + isLatestCriterion.setAnd(isLatestCriterionArray); + BoolQueryBuilder isLatest = + ESUtils.buildConjunctiveFilterQuery( + isLatestCriterion, false, searchableFieldTypes, opContext, queryFilterRewriteChain); + ConjunctiveCriterion isNotVersionedCriterion = new ConjunctiveCriterion(); + CriterionArray isNotVersionedCriterionArray = new CriterionArray(); + isNotVersionedCriterionArray.add( + CriterionUtils.buildCriterion(IS_LATEST_FIELD_NAME, Condition.EXISTS, true)); + isNotVersionedCriterion.setAnd(isNotVersionedCriterionArray); + BoolQueryBuilder isNotVersioned = + ESUtils.buildConjunctiveFilterQuery( + isNotVersionedCriterion, + false, + searchableFieldTypes, + opContext, + queryFilterRewriteChain); + return QueryBuilders.boolQuery().should(isLatest).should(isNotVersioned).minimumShouldMatch(1); + } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java index c5fc9ebdac9fa6..635d4472305c93 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java @@ -42,6 +42,7 @@ import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; +import javax.annotation.Nullable; import lombok.Getter; import lombok.extern.slf4j.Slf4j; @@ -399,7 +400,7 @@ private void deleteSearchData( Urn urn, String entityName, AspectSpec aspectSpec, - RecordTemplate aspect, + @Nullable RecordTemplate aspect, Boolean isKeyAspect) { String docId; try { diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/AlphanumericSortIdGeneratorTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/AlphanumericSortIdGeneratorTest.java new file mode 100644 index 00000000000000..8021507231d3b9 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/AlphanumericSortIdGeneratorTest.java @@ -0,0 +1,62 @@ +package com.linkedin.metadata.entity.versioning; + +import static com.linkedin.metadata.Constants.INITIAL_VERSION_SORT_ID; +import static org.testng.Assert.*; + +import org.testng.annotations.Test; + +public class AlphanumericSortIdGeneratorTest { + + @Test + public void testBasicIncrement() { + assertEquals(AlphanumericSortIdGenerator.increment(INITIAL_VERSION_SORT_ID), "AAAAAAAB"); + assertEquals(AlphanumericSortIdGenerator.increment("AAAAAAAB"), "AAAAAAAC"); + } + + @Test + public void testCarryOver() { + assertEquals(AlphanumericSortIdGenerator.increment("AAAAAAAZ"), "AAAAAABA"); + assertEquals(AlphanumericSortIdGenerator.increment("AAAAAZZZ"), "AAAABAAA"); + } + + @Test + public void testWrapAround() { + assertEquals(AlphanumericSortIdGenerator.increment("ZZZZZZZZ"), INITIAL_VERSION_SORT_ID); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testInvalidLength() { + AlphanumericSortIdGenerator.increment("AAA"); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testInvalidCharacters() { + AlphanumericSortIdGenerator.increment("AAAA$AAA"); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testNullInput() { + AlphanumericSortIdGenerator.increment(null); + } + + @Test + public void testSequence() { + String id = "AAAAAAAA"; + id = AlphanumericSortIdGenerator.increment(id); + assertEquals(id, "AAAAAAAB"); + id = AlphanumericSortIdGenerator.increment(id); + assertEquals(id, "AAAAAAAC"); + id = AlphanumericSortIdGenerator.increment(id); + assertEquals(id, "AAAAAAAD"); + } + + @Test + public void testLowerBoundary() { + assertEquals(AlphanumericSortIdGenerator.increment(INITIAL_VERSION_SORT_ID), "AAAAAAAB"); + } + + @Test + public void testUpperBoundary() { + assertEquals(AlphanumericSortIdGenerator.increment("ZZZZZZZZ"), "AAAAAAAA"); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/EntityVersioningServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/EntityVersioningServiceTest.java new file mode 100644 index 00000000000000..8c4d81af129428 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/EntityVersioningServiceTest.java @@ -0,0 +1,603 @@ +package com.linkedin.metadata.entity.versioning; + +import static com.linkedin.metadata.Constants.INITIAL_VERSION_SORT_ID; +import static com.linkedin.metadata.Constants.VERSION_PROPERTIES_ASPECT_NAME; +import static com.linkedin.metadata.Constants.VERSION_SET_PROPERTIES_ASPECT_NAME; +import static org.mockito.Mockito.*; +import static org.testng.Assert.*; + +import com.linkedin.common.FabricType; +import com.linkedin.common.VersionProperties; +import com.linkedin.common.VersionTag; +import com.linkedin.common.urn.DataPlatformUrn; +import com.linkedin.common.urn.DatasetUrn; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.CachingAspectRetriever; +import com.linkedin.metadata.aspect.GraphRetriever; +import com.linkedin.metadata.aspect.SystemAspect; +import com.linkedin.metadata.aspect.batch.AspectsBatch; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.EntityServiceAspectRetriever; +import com.linkedin.metadata.entity.RollbackResult; +import com.linkedin.metadata.entity.RollbackRunResult; +import com.linkedin.metadata.entity.SearchRetriever; +import com.linkedin.metadata.entity.TestEntityRegistry; +import com.linkedin.metadata.models.registry.ConfigEntityRegistry; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.models.registry.EntityRegistryException; +import com.linkedin.metadata.models.registry.MergedEntityRegistry; +import com.linkedin.metadata.search.ScrollResult; +import com.linkedin.metadata.search.SearchEntity; +import com.linkedin.metadata.search.SearchEntityArray; +import com.linkedin.metadata.search.SearchResultMetadata; +import com.linkedin.metadata.snapshot.Snapshot; +import com.linkedin.versionset.VersionSetProperties; +import com.linkedin.versionset.VersioningScheme; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.metadata.context.RetrieverContext; +import io.datahubproject.test.metadata.context.TestOperationContexts; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Collectors; +import org.mockito.ArgumentCaptor; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class EntityVersioningServiceTest { + + private EntityVersioningServiceImpl versioningService; + private EntityService mockEntityService; + private OperationContext mockOpContext; + private AspectRetriever mockAspectRetriever; + private CachingAspectRetriever mockCachingAspectRetriever; + private SearchRetriever mockSearchRetriever; + private static Urn TEST_VERSION_SET_URN = UrnUtils.getUrn("urn:li:versionSet:(123456,dataset)"); + private static Urn TEST_DATASET_URN = + new DatasetUrn(new DataPlatformUrn("kafka"), "myDataset", FabricType.PROD); + private static Urn TEST_DATASET_URN_2 = + new DatasetUrn(new DataPlatformUrn("hive"), "myHiveDataset", FabricType.PROD); + private static Urn TEST_DATASET_URN_3 = + new DatasetUrn(new DataPlatformUrn("hive"), "myHiveDataset2", FabricType.PROD); + + @BeforeMethod + public void setup() throws EntityRegistryException { + mockEntityService = mock(EntityService.class); + final EntityRegistry snapshotEntityRegistry = new TestEntityRegistry(); + final EntityRegistry configEntityRegistry = + new ConfigEntityRegistry( + Snapshot.class.getClassLoader().getResourceAsStream("entity-registry.yml")); + final EntityRegistry testEntityRegistry = + new MergedEntityRegistry(snapshotEntityRegistry).apply(configEntityRegistry); + mockAspectRetriever = mock(EntityServiceAspectRetriever.class); + mockCachingAspectRetriever = mock(CachingAspectRetriever.class); + mockSearchRetriever = mock(SearchRetriever.class); + when(mockAspectRetriever.getEntityRegistry()).thenReturn(testEntityRegistry); + mockOpContext = + TestOperationContexts.systemContext( + null, + null, + null, + () -> testEntityRegistry, + () -> + RetrieverContext.builder() + .aspectRetriever(mockAspectRetriever) + .graphRetriever(GraphRetriever.EMPTY) + .searchRetriever(mockSearchRetriever) + .cachingAspectRetriever(mockCachingAspectRetriever) + .build(), + null, + opContext -> + ((EntityServiceAspectRetriever) opContext.getAspectRetriever()) + .setSystemOperationContext(opContext), + null); + versioningService = new EntityVersioningServiceImpl(mockEntityService); + } + + @Test + public void testLinkLatestVersionNewVersionSet() throws Exception { + + VersionPropertiesInput input = + new VersionPropertiesInput("Test comment", "Test label", 123456789L, "testCreator"); + // Mock version set doesn't exist + when(mockAspectRetriever.entityExists(anySet())) + .thenReturn(Map.of(TEST_VERSION_SET_URN, false)); + + // Capture the proposals + ArgumentCaptor aspectsCaptor = ArgumentCaptor.forClass(AspectsBatch.class); + when(mockEntityService.ingestProposal(eq(mockOpContext), aspectsCaptor.capture(), eq(false))) + .thenReturn(List.of()); + + // Execute + versioningService.linkLatestVersion( + mockOpContext, TEST_VERSION_SET_URN, TEST_DATASET_URN, input); + + // Verify + List capturedAspects = aspectsCaptor.getAllValues(); + List versionPropertiesAspect = + capturedAspects.get(0).getMCPItems().stream() + .filter(mcpItem -> VERSION_PROPERTIES_ASPECT_NAME.equals(mcpItem.getAspectName())) + .map(mcpItem -> mcpItem.getAspect(VersionProperties.class)) + .collect(Collectors.toList()); + + // Verify VersionProperties has initial sort ID + VersionProperties versionProps = + (VersionProperties) + versionPropertiesAspect.stream() + .filter(a -> a instanceof VersionProperties) + .findFirst() + .orElseThrow(() -> new AssertionError("VersionProperties not found")); + + assertEquals(versionProps.getSortId(), INITIAL_VERSION_SORT_ID); + assertEquals(versionProps.getComment(), "Test comment"); + assertEquals(versionProps.getVersionSet(), TEST_VERSION_SET_URN); + + List versionSetPropertiesAspect = + capturedAspects.get(0).getMCPItems().stream() + .filter(mcpItem -> VERSION_SET_PROPERTIES_ASPECT_NAME.equals(mcpItem.getAspectName())) + .map(mcpItem -> mcpItem.getAspect(VersionSetProperties.class)) + .collect(Collectors.toList()); + VersionSetProperties versionSetProperties = + (VersionSetProperties) + versionSetPropertiesAspect.stream() + .filter(aspect -> aspect instanceof VersionSetProperties) + .findFirst() + .orElseThrow(() -> new AssertionError("Version Set Properties not found")); + assertEquals(versionSetProperties.getLatest(), TEST_DATASET_URN); + assertEquals( + versionSetProperties.getVersioningScheme(), + VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB); + } + + @Test + public void testLinkLatestVersionExistingVersionSet() throws Exception { + + VersionPropertiesInput input = + new VersionPropertiesInput("Test comment", "Label2", 123456789L, "testCreator"); + + // Mock version set exists + when(mockAspectRetriever.entityExists(anySet())).thenReturn(Map.of(TEST_VERSION_SET_URN, true)); + + // Mock existing version set properties + VersionSetProperties existingVersionSetProps = + new VersionSetProperties() + .setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB) + .setLatest(TEST_DATASET_URN); + SystemAspect mockVersionSetPropertiesAspect = mock(SystemAspect.class); + when(mockVersionSetPropertiesAspect.getRecordTemplate()).thenReturn(existingVersionSetProps); + when(mockVersionSetPropertiesAspect.getSystemMetadataVersion()).thenReturn(Optional.of(1L)); + when(mockAspectRetriever.getLatestSystemAspect(eq(TEST_VERSION_SET_URN), anyString())) + .thenReturn(mockVersionSetPropertiesAspect); + + // Mock existing version properties with a sort ID + VersionProperties existingVersionProps = + new VersionProperties() + .setSortId("AAAAAAAA") + .setVersion(new VersionTag().setVersionTag("Label1")) + .setVersionSet(TEST_VERSION_SET_URN); + SystemAspect mockVersionPropertiesAspect = mock(SystemAspect.class); + when(mockVersionPropertiesAspect.getRecordTemplate()).thenReturn(existingVersionProps); + when(mockVersionPropertiesAspect.getSystemMetadataVersion()).thenReturn(Optional.of(1L)); + when(mockAspectRetriever.getLatestSystemAspect(eq(TEST_DATASET_URN), anyString())) + .thenReturn(mockVersionPropertiesAspect); + + // Capture the proposals + ArgumentCaptor aspectsCaptor = ArgumentCaptor.forClass(AspectsBatch.class); + when(mockEntityService.ingestProposal(eq(mockOpContext), aspectsCaptor.capture(), eq(false))) + .thenReturn(List.of()); + + // Execute + versioningService.linkLatestVersion( + mockOpContext, TEST_VERSION_SET_URN, TEST_DATASET_URN_2, input); + + // Verify + List capturedAspects = aspectsCaptor.getAllValues(); + List aspects = + capturedAspects.get(0).getMCPItems().stream() + .filter(mcpItem -> VERSION_PROPERTIES_ASPECT_NAME.equals(mcpItem.getAspectName())) + .map(mcpItem -> mcpItem.getAspect(VersionProperties.class)) + .collect(Collectors.toList()); + + // Verify VersionProperties has incremented sort ID + VersionProperties versionProps = + (VersionProperties) + aspects.stream() + .filter(a -> a instanceof VersionProperties) + .findFirst() + .orElseThrow(() -> new AssertionError("VersionProperties not found")); + + assertEquals(versionProps.getSortId(), "AAAAAAAB"); + assertEquals(versionProps.getComment(), "Test comment"); + assertEquals(versionProps.getVersionSet(), TEST_VERSION_SET_URN); + } + + @Test + public void testUnlinkInitialVersion() throws Exception { + + // Mock version properties aspect + VersionProperties versionProps = + new VersionProperties() + .setVersionSet(TEST_VERSION_SET_URN) + .setSortId(INITIAL_VERSION_SORT_ID); + SystemAspect mockVersionPropsAspect = mock(SystemAspect.class); + when(mockVersionPropsAspect.getRecordTemplate()).thenReturn(versionProps); + when(mockVersionPropsAspect.getSystemMetadataVersion()).thenReturn(Optional.of(1L)); + when(mockAspectRetriever.getLatestSystemAspect( + eq(TEST_DATASET_URN), eq(VERSION_PROPERTIES_ASPECT_NAME))) + .thenReturn(mockVersionPropsAspect); + VersionSetProperties versionSetProps = + new VersionSetProperties() + .setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB) + .setLatest(TEST_DATASET_URN); + SystemAspect mockVersionSetPropsAspect = mock(SystemAspect.class); + when(mockVersionSetPropsAspect.getRecordTemplate()).thenReturn(versionSetProps); + when(mockVersionSetPropsAspect.getSystemMetadataVersion()).thenReturn(Optional.of(1L)); + when(mockAspectRetriever.getLatestSystemAspect( + eq(TEST_VERSION_SET_URN), eq(VERSION_SET_PROPERTIES_ASPECT_NAME))) + .thenReturn(mockVersionSetPropsAspect); + + // Mock delete aspect responses + RollbackResult versionSetDeleteResult = + new RollbackResult( + TEST_VERSION_SET_URN, + "versionSet", + VERSION_SET_PROPERTIES_ASPECT_NAME, + null, + null, + null, + null, + null, + false, + 0); + RollbackRunResult rollbackRunResult = + new RollbackRunResult(new ArrayList<>(), 1, List.of(versionSetDeleteResult)); + RollbackResult versionPropsDeleteResult = + new RollbackResult( + TEST_DATASET_URN, + "dataset", + VERSION_PROPERTIES_ASPECT_NAME, + null, + null, + null, + null, + null, + false, + 0); + + when(mockEntityService.deleteUrn(eq(mockOpContext), eq(TEST_VERSION_SET_URN))) + .thenReturn(rollbackRunResult); + when(mockEntityService.deleteAspect( + eq(mockOpContext), anyString(), eq(VERSION_PROPERTIES_ASPECT_NAME), anyMap(), eq(true))) + .thenReturn(Optional.of(versionPropsDeleteResult)); + + // Mock graph retriever response + SearchEntityArray relatedEntities = new SearchEntityArray(); + relatedEntities.add(new SearchEntity().setEntity(TEST_DATASET_URN)); + + ScrollResult scrollResult = + new ScrollResult().setEntities(relatedEntities).setMetadata(new SearchResultMetadata()); + when(mockSearchRetriever.scroll(any(), any(), any(), eq(2), any(), any())) + .thenReturn(scrollResult); + + // Execute + List results = + versioningService.unlinkVersion(mockOpContext, TEST_VERSION_SET_URN, TEST_DATASET_URN); + + // Verify + assertEquals(results.size(), 2); + verify(mockEntityService).deleteUrn(eq(mockOpContext), eq(TEST_VERSION_SET_URN)); + verify(mockEntityService) + .deleteAspect( + eq(mockOpContext), + eq(TEST_DATASET_URN.toString()), + eq(VERSION_PROPERTIES_ASPECT_NAME), + anyMap(), + eq(true)); + verify(mockSearchRetriever, never()).scroll(any(), any(), anyString(), anyInt(), any(), any()); + } + + @Test + public void testUnlinkLatestVersionWithPriorVersion() throws Exception { + + // Mock version properties aspect + VersionProperties versionProps = + new VersionProperties() + .setVersionSet(TEST_VERSION_SET_URN) + .setSortId("AAAAAAAB"); // Not initial version + SystemAspect mockVersionPropsAspect = mock(SystemAspect.class); + when(mockVersionPropsAspect.getRecordTemplate()).thenReturn(versionProps); + when(mockVersionPropsAspect.getSystemMetadataVersion()).thenReturn(Optional.of(1L)); + when(mockAspectRetriever.getLatestSystemAspect( + eq(TEST_DATASET_URN), eq(VERSION_PROPERTIES_ASPECT_NAME))) + .thenReturn(mockVersionPropsAspect); + + VersionSetProperties versionSetProps = + new VersionSetProperties() + .setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB) + .setLatest(TEST_DATASET_URN); + SystemAspect mockVersionSetPropsAspect = mock(SystemAspect.class); + when(mockVersionSetPropsAspect.getRecordTemplate()).thenReturn(versionSetProps); + when(mockVersionSetPropsAspect.getSystemMetadataVersion()).thenReturn(Optional.of(1L)); + when(mockAspectRetriever.getLatestSystemAspect( + eq(TEST_VERSION_SET_URN), eq(VERSION_SET_PROPERTIES_ASPECT_NAME))) + .thenReturn(mockVersionSetPropsAspect); + + // Mock graph retriever response + SearchEntityArray relatedEntities = new SearchEntityArray(); + relatedEntities.add(new SearchEntity().setEntity(TEST_DATASET_URN)); + relatedEntities.add(new SearchEntity().setEntity(TEST_DATASET_URN_2)); + + ScrollResult scrollResult = + new ScrollResult().setEntities(relatedEntities).setMetadata(new SearchResultMetadata()); + when(mockSearchRetriever.scroll(any(), any(), any(), eq(2), any(), any())) + .thenReturn(scrollResult); + + // Mock delete aspect response + RollbackResult versionPropsDeleteResult = + new RollbackResult( + TEST_DATASET_URN, + "dataset", + VERSION_PROPERTIES_ASPECT_NAME, + null, + null, + null, + null, + null, + false, + 0); + when(mockEntityService.deleteAspect( + eq(mockOpContext), anyString(), eq(VERSION_PROPERTIES_ASPECT_NAME), anyMap(), eq(true))) + .thenReturn(Optional.of(versionPropsDeleteResult)); + + // Execute + List results = + versioningService.unlinkVersion(mockOpContext, TEST_VERSION_SET_URN, TEST_DATASET_URN); + + // Verify + assertEquals(results.size(), 1); + verify(mockEntityService) + .deleteAspect( + eq(mockOpContext), + eq(TEST_DATASET_URN.toString()), + eq(VERSION_PROPERTIES_ASPECT_NAME), + anyMap(), + eq(true)); + verify(mockEntityService).ingestProposal(eq(mockOpContext), any(), eq(false)); + verify(mockEntityService, never()).deleteUrn(eq(mockOpContext), eq(TEST_VERSION_SET_URN)); + } + + @Test + public void testUnlinkNotLatestVersionWithPriorVersion() throws Exception { + + // Mock version properties aspect + VersionProperties versionProps = + new VersionProperties() + .setVersionSet(TEST_VERSION_SET_URN) + .setSortId("AAAAAAAB"); // Not initial version + SystemAspect mockVersionPropsAspect = mock(SystemAspect.class); + when(mockVersionPropsAspect.getRecordTemplate()).thenReturn(versionProps); + when(mockVersionPropsAspect.getSystemMetadataVersion()).thenReturn(Optional.of(1L)); + when(mockAspectRetriever.getLatestSystemAspect( + eq(TEST_DATASET_URN_2), eq(VERSION_PROPERTIES_ASPECT_NAME))) + .thenReturn(mockVersionPropsAspect); + + VersionSetProperties versionSetProps = + new VersionSetProperties() + .setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB) + .setLatest(TEST_DATASET_URN); + SystemAspect mockVersionSetPropsAspect = mock(SystemAspect.class); + when(mockVersionSetPropsAspect.getRecordTemplate()).thenReturn(versionSetProps); + when(mockVersionSetPropsAspect.getSystemMetadataVersion()).thenReturn(Optional.of(1L)); + when(mockAspectRetriever.getLatestSystemAspect( + eq(TEST_VERSION_SET_URN), eq(VERSION_SET_PROPERTIES_ASPECT_NAME))) + .thenReturn(mockVersionSetPropsAspect); + + // Mock graph retriever response + SearchEntityArray relatedEntities = new SearchEntityArray(); + relatedEntities.add(new SearchEntity().setEntity(TEST_DATASET_URN)); + relatedEntities.add(new SearchEntity().setEntity(TEST_DATASET_URN_2)); + + ScrollResult scrollResult = + new ScrollResult().setEntities(relatedEntities).setMetadata(new SearchResultMetadata()); + when(mockSearchRetriever.scroll(any(), any(), any(), eq(2), any(), any())) + .thenReturn(scrollResult); + + // Mock delete aspect response + RollbackResult versionPropsDeleteResult = + new RollbackResult( + TEST_DATASET_URN_2, + "dataset", + VERSION_PROPERTIES_ASPECT_NAME, + null, + null, + null, + null, + null, + false, + 0); + when(mockEntityService.deleteAspect( + eq(mockOpContext), + eq(TEST_DATASET_URN_2.toString()), + eq(VERSION_PROPERTIES_ASPECT_NAME), + anyMap(), + eq(true))) + .thenReturn(Optional.of(versionPropsDeleteResult)); + + // Execute + List results = + versioningService.unlinkVersion(mockOpContext, TEST_VERSION_SET_URN, TEST_DATASET_URN_2); + + // Verify + assertEquals(results.size(), 1); + verify(mockEntityService) + .deleteAspect( + eq(mockOpContext), + eq(TEST_DATASET_URN_2.toString()), + eq(VERSION_PROPERTIES_ASPECT_NAME), + anyMap(), + eq(true)); + verify(mockEntityService, never()).deleteUrn(eq(mockOpContext), eq(TEST_VERSION_SET_URN)); + } + + @Test + public void testUnlinkNotReturnedSingleVersionWithPriorVersion() throws Exception { + + // Mock version properties aspect + VersionProperties versionProps = + new VersionProperties() + .setVersionSet(TEST_VERSION_SET_URN) + .setSortId("AAAAAAAB"); // Not initial version + SystemAspect mockVersionPropsAspect = mock(SystemAspect.class); + when(mockVersionPropsAspect.getRecordTemplate()).thenReturn(versionProps); + when(mockVersionPropsAspect.getSystemMetadataVersion()).thenReturn(Optional.of(1L)); + when(mockAspectRetriever.getLatestSystemAspect( + eq(TEST_DATASET_URN_2), eq(VERSION_PROPERTIES_ASPECT_NAME))) + .thenReturn(mockVersionPropsAspect); + + VersionSetProperties versionSetProps = + new VersionSetProperties() + .setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB) + .setLatest(TEST_DATASET_URN_2); + SystemAspect mockVersionSetPropsAspect = mock(SystemAspect.class); + when(mockVersionSetPropsAspect.getRecordTemplate()).thenReturn(versionSetProps); + when(mockVersionSetPropsAspect.getSystemMetadataVersion()).thenReturn(Optional.of(1L)); + when(mockAspectRetriever.getLatestSystemAspect( + eq(TEST_VERSION_SET_URN), eq(VERSION_SET_PROPERTIES_ASPECT_NAME))) + .thenReturn(mockVersionSetPropsAspect); + + // Mock graph retriever response + SearchEntityArray relatedEntities = new SearchEntityArray(); + relatedEntities.add(new SearchEntity().setEntity(TEST_DATASET_URN)); + + ScrollResult scrollResult = + new ScrollResult().setEntities(relatedEntities).setMetadata(new SearchResultMetadata()); + when(mockSearchRetriever.scroll(any(), any(), any(), eq(2), any(), any())) + .thenReturn(scrollResult); + + // Mock delete aspect response + RollbackResult versionPropsDeleteResult = + new RollbackResult( + TEST_DATASET_URN_2, + "dataset", + VERSION_PROPERTIES_ASPECT_NAME, + null, + null, + null, + null, + null, + false, + 0); + when(mockEntityService.deleteAspect( + eq(mockOpContext), anyString(), eq(VERSION_PROPERTIES_ASPECT_NAME), anyMap(), eq(true))) + .thenReturn(Optional.of(versionPropsDeleteResult)); + + // Execute + List results = + versioningService.unlinkVersion(mockOpContext, TEST_VERSION_SET_URN, TEST_DATASET_URN_2); + + // Verify + assertEquals(results.size(), 1); + verify(mockEntityService) + .deleteAspect( + eq(mockOpContext), + eq(TEST_DATASET_URN_2.toString()), + eq(VERSION_PROPERTIES_ASPECT_NAME), + anyMap(), + eq(true)); + verify(mockEntityService).ingestProposal(eq(mockOpContext), any(), eq(false)); + verify(mockEntityService, never()).deleteUrn(eq(mockOpContext), eq(TEST_VERSION_SET_URN)); + } + + @Test + public void testUnlinkNotReturnedDoubleVersionWithPriorVersion() throws Exception { + + // Mock version properties aspect + VersionProperties versionProps = + new VersionProperties() + .setVersionSet(TEST_VERSION_SET_URN) + .setSortId("AAAAAAAB"); // Not initial version + SystemAspect mockVersionPropsAspect = mock(SystemAspect.class); + when(mockVersionPropsAspect.getRecordTemplate()).thenReturn(versionProps); + when(mockVersionPropsAspect.getSystemMetadataVersion()).thenReturn(Optional.of(1L)); + when(mockAspectRetriever.getLatestSystemAspect( + eq(TEST_DATASET_URN_3), eq(VERSION_PROPERTIES_ASPECT_NAME))) + .thenReturn(mockVersionPropsAspect); + + VersionSetProperties versionSetProps = + new VersionSetProperties() + .setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB) + .setLatest(TEST_DATASET_URN_3); + SystemAspect mockVersionSetPropsAspect = mock(SystemAspect.class); + when(mockVersionSetPropsAspect.getRecordTemplate()).thenReturn(versionSetProps); + when(mockVersionSetPropsAspect.getSystemMetadataVersion()).thenReturn(Optional.of(1L)); + when(mockAspectRetriever.getLatestSystemAspect( + eq(TEST_VERSION_SET_URN), eq(VERSION_SET_PROPERTIES_ASPECT_NAME))) + .thenReturn(mockVersionSetPropsAspect); + + // Mock graph retriever response + SearchEntityArray relatedEntities = new SearchEntityArray(); + relatedEntities.add(new SearchEntity().setEntity(TEST_DATASET_URN)); + relatedEntities.add(new SearchEntity().setEntity(TEST_DATASET_URN_2)); + + ScrollResult scrollResult = + new ScrollResult().setEntities(relatedEntities).setMetadata(new SearchResultMetadata()); + when(mockSearchRetriever.scroll(any(), any(), any(), eq(2), any(), any())) + .thenReturn(scrollResult); + + // Mock delete aspect response + RollbackResult versionPropsDeleteResult = + new RollbackResult( + TEST_DATASET_URN_3, + "dataset", + VERSION_PROPERTIES_ASPECT_NAME, + null, + null, + null, + null, + null, + false, + 0); + when(mockEntityService.deleteAspect( + eq(mockOpContext), anyString(), eq(VERSION_PROPERTIES_ASPECT_NAME), anyMap(), eq(true))) + .thenReturn(Optional.of(versionPropsDeleteResult)); + + // Execute + List results = + versioningService.unlinkVersion(mockOpContext, TEST_VERSION_SET_URN, TEST_DATASET_URN_3); + + // Verify + assertEquals(results.size(), 1); + verify(mockEntityService) + .deleteAspect( + eq(mockOpContext), + eq(TEST_DATASET_URN_3.toString()), + eq(VERSION_PROPERTIES_ASPECT_NAME), + anyMap(), + eq(true)); + verify(mockEntityService).ingestProposal(eq(mockOpContext), any(), eq(false)); + verify(mockEntityService, never()).deleteUrn(eq(mockOpContext), eq(TEST_VERSION_SET_URN)); + } + + @Test + public void testUnlinkNonVersionedEntity() throws Exception { + + // Mock no version properties aspect + when(mockAspectRetriever.getLatestSystemAspect( + eq(TEST_DATASET_URN), eq(VERSION_PROPERTIES_ASPECT_NAME))) + .thenReturn(null); + + // Execute + List results = + versioningService.unlinkVersion(mockOpContext, TEST_VERSION_SET_URN, TEST_DATASET_URN); + + // Verify + assertTrue(results.isEmpty()); + verify(mockEntityService, never()).deleteAspect(any(), any(), any(), any(), anyBoolean()); + verify(mockEntityService, never()).deleteUrn(any(), any()); + verify(mockSearchRetriever, never()).scroll(any(), any(), anyString(), anyInt(), any(), any()); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/sideeffects/VersionSetSideEffectTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/sideeffects/VersionSetSideEffectTest.java new file mode 100644 index 00000000000000..35445efaedc607 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/sideeffects/VersionSetSideEffectTest.java @@ -0,0 +1,229 @@ +package com.linkedin.metadata.entity.versioning.sideeffects; + +import static com.linkedin.metadata.Constants.DATASET_ENTITY_NAME; +import static com.linkedin.metadata.Constants.GLOBAL_TAGS_ASPECT_NAME; +import static com.linkedin.metadata.Constants.VERSION_SET_ENTITY_NAME; +import static com.linkedin.metadata.Constants.VERSION_SET_PROPERTIES_ASPECT_NAME; +import static org.mockito.Mockito.mock; +import static org.testng.Assert.assertEquals; + +import com.linkedin.common.GlobalTags; +import com.linkedin.common.TagAssociationArray; +import com.linkedin.common.VersionProperties; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.metadata.aspect.GraphRetriever; +import com.linkedin.metadata.aspect.batch.MCPItem; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.entity.SearchRetriever; +import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; +import com.linkedin.metadata.entity.ebean.batch.MCLItemImpl; +import com.linkedin.metadata.entity.ebean.batch.PatchItemImpl; +import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.metadata.utils.AuditStampUtils; +import com.linkedin.test.metadata.aspect.MockAspectRetriever; +import com.linkedin.test.metadata.aspect.TestEntityRegistry; +import com.linkedin.versionset.VersionSetProperties; +import com.linkedin.versionset.VersioningScheme; +import io.datahubproject.metadata.context.RetrieverContext; +import jakarta.json.JsonObject; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class VersionSetSideEffectTest { + private static final TestEntityRegistry TEST_REGISTRY = new TestEntityRegistry(); + private static final Urn TEST_VERSION_SET_URN = + UrnUtils.getUrn("urn:li:versionSet:(123456,dataset)"); + private static final Urn PREVIOUS_LATEST_URN = + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)"); + private static final Urn NEW_LATEST_URN = + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDatasetV2,PROD)"); + + private static final AspectPluginConfig TEST_PLUGIN_CONFIG = + AspectPluginConfig.builder() + .className(VersionSetSideEffect.class.getName()) + .enabled(true) + .supportedOperations( + List.of("CREATE", "PATCH", "CREATE_ENTITY", "UPSERT", "DELETE", "RESTATE")) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .aspectName(VERSION_SET_PROPERTIES_ASPECT_NAME) + .entityName(VERSION_SET_ENTITY_NAME) + .build())) + .build(); + + private MockAspectRetriever mockAspectRetriever; + private RetrieverContext retrieverContext; + private VersionSetSideEffect sideEffect; + + @BeforeMethod + public void setup() { + GraphRetriever graphRetriever = mock(GraphRetriever.class); + VersionProperties existingProperties = + new VersionProperties() + .setVersionSet(TEST_VERSION_SET_URN) + .setIsLatest(false) + .setSortId("AAAAAAAA"); + VersionProperties previousLatestProperties = + new VersionProperties() + .setVersionSet(TEST_VERSION_SET_URN) + .setIsLatest(true) + .setSortId("AAAAAAAB"); + Map> data = new HashMap<>(); + data.put(NEW_LATEST_URN, Collections.singletonList(existingProperties)); + data.put(PREVIOUS_LATEST_URN, Collections.singletonList(previousLatestProperties)); + mockAspectRetriever = new MockAspectRetriever(data); + mockAspectRetriever.setEntityRegistry(TEST_REGISTRY); + + retrieverContext = + RetrieverContext.builder() + .searchRetriever(mock(SearchRetriever.class)) + .aspectRetriever(mockAspectRetriever) + .graphRetriever(graphRetriever) + .build(); + + sideEffect = new VersionSetSideEffect(); + sideEffect.setConfig(TEST_PLUGIN_CONFIG); + } + + @Test + public void testUpdateLatestVersion() { + // Create previous version set properties with different latest + VersionSetProperties previousProperties = new VersionSetProperties(); + previousProperties.setLatest(PREVIOUS_LATEST_URN); + previousProperties.setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB); + + // Create new version set properties + VersionSetProperties newProperties = new VersionSetProperties(); + newProperties.setLatest(NEW_LATEST_URN); + newProperties.setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB); + + EntitySpec entitySpec = TEST_REGISTRY.getEntitySpec(VERSION_SET_ENTITY_NAME); + + // Create change item + ChangeItemImpl changeItem = + ChangeItemImpl.builder() + .urn(TEST_VERSION_SET_URN) + .aspectName(VERSION_SET_PROPERTIES_ASPECT_NAME) + .entitySpec(entitySpec) + .aspectSpec(entitySpec.getAspectSpec(VERSION_SET_PROPERTIES_ASPECT_NAME)) + .recordTemplate(newProperties) + .auditStamp(AuditStampUtils.createDefaultAuditStamp()) + .build(mockAspectRetriever); + + // Create MCL item with previous aspect + MCLItemImpl mclItem = + MCLItemImpl.builder() + .previousRecordTemplate(previousProperties) + .build(changeItem, previousProperties, null, retrieverContext.getAspectRetriever()); + + // Run side effect + List sideEffectResults = + sideEffect + .postMCPSideEffect(Collections.singletonList(mclItem), retrieverContext) + .collect(Collectors.toList()); + + // Verify results + assertEquals(sideEffectResults.size(), 2, "Expected two patch operations"); + + // Verify patch for previous latest version + MCPItem previousPatch = sideEffectResults.get(0); + assertEquals(previousPatch.getUrn(), PREVIOUS_LATEST_URN); + JsonObject previousPatchOp = + ((PatchItemImpl) previousPatch).getPatch().toJsonArray().getJsonObject(0); + assertEquals(previousPatchOp.getString("op"), "add"); + assertEquals(previousPatchOp.getString("path"), "/isLatest"); + assertEquals(previousPatchOp.getBoolean("value"), false); + + // Verify patch for new latest version + MCPItem newPatch = sideEffectResults.get(1); + assertEquals(newPatch.getUrn(), NEW_LATEST_URN); + JsonObject newPatchOp = ((PatchItemImpl) newPatch).getPatch().toJsonArray().getJsonObject(0); + assertEquals(newPatchOp.getString("op"), "add"); + assertEquals(newPatchOp.getString("path"), "/isLatest"); + assertEquals(newPatchOp.getBoolean("value"), true); + } + + @Test + public void testNoChangesWhenLatestRemainsSame() { + // Create version set properties with same latest + VersionSetProperties previousProperties = new VersionSetProperties(); + previousProperties.setLatest(NEW_LATEST_URN); + previousProperties.setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB); + + VersionSetProperties newProperties = new VersionSetProperties(); + newProperties.setLatest(NEW_LATEST_URN); + newProperties.setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB); + + EntitySpec entitySpec = TEST_REGISTRY.getEntitySpec(VERSION_SET_ENTITY_NAME); + + // Create change item + ChangeItemImpl changeItem = + ChangeItemImpl.builder() + .urn(TEST_VERSION_SET_URN) + .aspectName(VERSION_SET_PROPERTIES_ASPECT_NAME) + .entitySpec(entitySpec) + .aspectSpec(entitySpec.getAspectSpec(VERSION_SET_PROPERTIES_ASPECT_NAME)) + .recordTemplate(newProperties) + .auditStamp(AuditStampUtils.createDefaultAuditStamp()) + .build(mockAspectRetriever); + + // Create MCL item with previous aspect + MCLItemImpl mclItem = + MCLItemImpl.builder() + .previousRecordTemplate(previousProperties) + .build(changeItem, null, null, retrieverContext.getAspectRetriever()); + + // Run side effect + List sideEffectResults = + sideEffect + .postMCPSideEffect(Collections.singletonList(mclItem), retrieverContext) + .collect(Collectors.toList()); + + // Verify results - should still get one patch to set isLatest=true on current latest + assertEquals(sideEffectResults.size(), 1, "Expected one patch operation"); + + // Verify patch operation + MCPItem patch = sideEffectResults.get(0); + assertEquals(patch.getUrn(), NEW_LATEST_URN); + JsonObject patchOp = ((PatchItemImpl) patch).getPatch().toJsonArray().getJsonObject(0); + assertEquals(patchOp.getString("op"), "add"); + assertEquals(patchOp.getString("path"), "/isLatest"); + assertEquals(patchOp.getBoolean("value"), true); + } + + @Test + public void testNoChangesForNonVersionSetProperties() { + // Create some other type of aspect change + EntitySpec entitySpec = TEST_REGISTRY.getEntitySpec(DATASET_ENTITY_NAME); + ChangeItemImpl changeItem = + ChangeItemImpl.builder() + .urn(PREVIOUS_LATEST_URN) + .aspectName(GLOBAL_TAGS_ASPECT_NAME) + .entitySpec(entitySpec) + .aspectSpec(entitySpec.getAspectSpec(GLOBAL_TAGS_ASPECT_NAME)) + .recordTemplate(new GlobalTags().setTags(new TagAssociationArray())) + .auditStamp(AuditStampUtils.createDefaultAuditStamp()) + .build(mockAspectRetriever); + + MCLItemImpl mclItem = + MCLItemImpl.builder().build(changeItem, null, null, retrieverContext.getAspectRetriever()); + + // Run side effect + List sideEffectResults = + sideEffect + .postMCPSideEffect(Collections.singletonList(mclItem), retrieverContext) + .collect(Collectors.toList()); + + // Verify no changes for non-version set properties aspects + assertEquals( + sideEffectResults.size(), 0, "Expected no changes for non-version set properties aspect"); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/validation/VersionPropertiesValidatorTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/validation/VersionPropertiesValidatorTest.java new file mode 100644 index 00000000000000..4afd05c98a9312 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/validation/VersionPropertiesValidatorTest.java @@ -0,0 +1,165 @@ +package com.linkedin.metadata.entity.versioning.validation; + +import static com.linkedin.metadata.Constants.CHART_ENTITY_NAME; + +import com.linkedin.common.VersionProperties; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.metadata.aspect.GraphRetriever; +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; +import com.linkedin.metadata.entity.SearchRetriever; +import com.linkedin.metadata.key.VersionSetKey; +import com.linkedin.test.metadata.aspect.MockAspectRetriever; +import com.linkedin.test.metadata.aspect.TestEntityRegistry; +import com.linkedin.test.metadata.aspect.batch.TestMCP; +import com.linkedin.versionset.VersionSetProperties; +import com.linkedin.versionset.VersioningScheme; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Stream; +import org.mockito.Mockito; +import org.testng.Assert; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class VersionPropertiesValidatorTest { + + private static final String ENTITY_TYPE = "dataset"; + private static final Urn TEST_VERSION_SET_URN = + UrnUtils.getUrn("urn:li:versionSet:(12356,dataset)"); + private static final Urn TEST_ENTITY_URN = + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)"); + + private SearchRetriever mockSearchRetriever; + private MockAspectRetriever mockAspectRetriever; + private GraphRetriever mockGraphRetriever; + private RetrieverContext retrieverContext; + + @BeforeMethod + public void setup() { + mockSearchRetriever = Mockito.mock(SearchRetriever.class); + mockGraphRetriever = Mockito.mock(GraphRetriever.class); + + // Create version set key and properties + VersionSetKey versionSetKey = new VersionSetKey(); + versionSetKey.setEntityType(ENTITY_TYPE); + + VersionSetProperties versionSetProperties = new VersionSetProperties(); + versionSetProperties.setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB); + + // Initialize mock aspect retriever with version set data + Map> data = new HashMap<>(); + data.put(TEST_VERSION_SET_URN, Arrays.asList(versionSetKey, versionSetProperties)); + mockAspectRetriever = new MockAspectRetriever(data); + + retrieverContext = + io.datahubproject.metadata.context.RetrieverContext.builder() + .aspectRetriever(mockAspectRetriever) + .searchRetriever(mockSearchRetriever) + .graphRetriever(mockGraphRetriever) + .build(); + } + + @Test + public void testValidVersionProperties() { + VersionProperties properties = new VersionProperties(); + properties.setVersionSet(TEST_VERSION_SET_URN); + properties.setSortId("ABCDEFGH"); // Valid 8-char uppercase alpha + + Stream validationResult = + VersionPropertiesValidator.validatePropertiesUpserts( + TestMCP.ofOneUpsertItem(TEST_ENTITY_URN, properties, new TestEntityRegistry()), + retrieverContext); + + Assert.assertTrue(validationResult.findAny().isEmpty()); + } + + @Test + public void testInvalidSortId() { + VersionProperties properties = new VersionProperties(); + properties.setVersionSet(TEST_VERSION_SET_URN); + properties.setSortId("123"); // Invalid - not 8 chars, not alpha + + Stream validationResult = + VersionPropertiesValidator.validatePropertiesUpserts( + TestMCP.ofOneUpsertItem(TEST_ENTITY_URN, properties, new TestEntityRegistry()), + retrieverContext); + + AspectValidationException exception = validationResult.findAny().get(); + Assert.assertNotNull(exception); + Assert.assertTrue(exception.getMessage().contains("Invalid sortID for Versioning Scheme")); + } + + @Test + public void testNonexistentVersionSet() { + Urn nonexistentUrn = UrnUtils.getUrn("urn:li:versionSet:(nonexistent,dataset)"); + + VersionProperties properties = new VersionProperties(); + properties.setVersionSet(nonexistentUrn); + properties.setSortId("ABCDEFGH"); + + Stream validationResult = + VersionPropertiesValidator.validatePropertiesUpserts( + TestMCP.ofOneUpsertItem(TEST_ENTITY_URN, properties, new TestEntityRegistry()), + retrieverContext); + + AspectValidationException exception = validationResult.findAny().get(); + Assert.assertNotNull(exception); + Assert.assertTrue(exception.getMessage().contains("Version Set specified does not exist")); + } + + @Test + public void testEntityTypeMismatch() { + // Create version set with different entity type + VersionSetKey wrongTypeKey = new VersionSetKey(); + wrongTypeKey.setEntityType(CHART_ENTITY_NAME); + + VersionSetProperties versionSetProperties = new VersionSetProperties(); + versionSetProperties.setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB); + + Map> data = new HashMap<>(); + data.put(TEST_VERSION_SET_URN, Arrays.asList(wrongTypeKey, versionSetProperties)); + mockAspectRetriever = new MockAspectRetriever(data); + + retrieverContext = + io.datahubproject.metadata.context.RetrieverContext.builder() + .aspectRetriever(mockAspectRetriever) + .searchRetriever(mockSearchRetriever) + .graphRetriever(mockGraphRetriever) + .build(); + + VersionProperties properties = new VersionProperties(); + properties.setVersionSet(TEST_VERSION_SET_URN); + properties.setSortId("ABCDEFGH"); + + Stream validationResult = + VersionPropertiesValidator.validatePropertiesUpserts( + TestMCP.ofOneUpsertItem(TEST_ENTITY_URN, properties, new TestEntityRegistry()), + retrieverContext); + + AspectValidationException exception = validationResult.findAny().get(); + Assert.assertNotNull(exception); + Assert.assertTrue( + exception.getMessage().contains("Version Set specified entity type does not match")); + } + + @Test + public void testIsLatestFieldSpecified() { + VersionProperties properties = new VersionProperties(); + properties.setVersionSet(TEST_VERSION_SET_URN); + properties.setSortId("ABCDEFGH"); + properties.setIsLatest(true); // Should not be specified + + Stream validationResult = + VersionPropertiesValidator.validatePropertiesProposals( + TestMCP.ofOneUpsertItem(TEST_ENTITY_URN, properties, new TestEntityRegistry())); + + AspectValidationException exception = validationResult.findAny().get(); + Assert.assertNotNull(exception); + Assert.assertTrue(exception.getMessage().contains("IsLatest should not be specified")); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/validation/VersionSetPropertiesValidatorTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/validation/VersionSetPropertiesValidatorTest.java new file mode 100644 index 00000000000000..c91495271f6149 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/validation/VersionSetPropertiesValidatorTest.java @@ -0,0 +1,139 @@ +package com.linkedin.metadata.entity.versioning.validation; + +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.metadata.aspect.GraphRetriever; +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; +import com.linkedin.metadata.entity.SearchRetriever; +import com.linkedin.test.metadata.aspect.MockAspectRetriever; +import com.linkedin.test.metadata.aspect.TestEntityRegistry; +import com.linkedin.test.metadata.aspect.batch.TestMCP; +import com.linkedin.versionset.VersionSetProperties; +import com.linkedin.versionset.VersioningScheme; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Stream; +import org.mockito.Mockito; +import org.testng.Assert; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class VersionSetPropertiesValidatorTest { + + private static final Urn TEST_VERSION_SET_URN = + UrnUtils.getUrn("urn:li:versionSet:(123456,dataset)"); + + private SearchRetriever mockSearchRetriever; + private MockAspectRetriever mockAspectRetriever; + private GraphRetriever mockGraphRetriever; + private RetrieverContext retrieverContext; + + @BeforeMethod + public void setup() { + mockSearchRetriever = Mockito.mock(SearchRetriever.class); + mockGraphRetriever = Mockito.mock(GraphRetriever.class); + + Map> emptyData = new HashMap<>(); + mockAspectRetriever = new MockAspectRetriever(emptyData); + + retrieverContext = + io.datahubproject.metadata.context.RetrieverContext.builder() + .aspectRetriever(mockAspectRetriever) + .searchRetriever(mockSearchRetriever) + .graphRetriever(mockGraphRetriever) + .build(); + } + + @Test + public void testValidUpsertWithNoExistingProperties() { + // Create version set properties + VersionSetProperties properties = + new VersionSetProperties() + .setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB); + + // Test validation with no existing properties + Stream validationResult = + VersionSetPropertiesValidator.validatePropertiesUpserts( + TestMCP.ofOneUpsertItem(TEST_VERSION_SET_URN, properties, new TestEntityRegistry()), + retrieverContext); + + // Assert no validation exceptions + Assert.assertTrue(validationResult.findAny().isEmpty()); + } + + @Test + public void testValidUpsertWithSameVersioningScheme() { + // Create existing properties with semantic versioning + VersionSetProperties existingProperties = + new VersionSetProperties() + .setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB); + + // Set up mock retriever with existing properties + Map> data = new HashMap<>(); + data.put(TEST_VERSION_SET_URN, Collections.singletonList(existingProperties)); + mockAspectRetriever = new MockAspectRetriever(data); + + retrieverContext = + io.datahubproject.metadata.context.RetrieverContext.builder() + .aspectRetriever(mockAspectRetriever) + .searchRetriever(mockSearchRetriever) + .graphRetriever(mockGraphRetriever) + .build(); + + // Create new properties with same versioning scheme + VersionSetProperties newProperties = + new VersionSetProperties() + .setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB); + + // Test validation + Stream validationResult = + VersionSetPropertiesValidator.validatePropertiesUpserts( + TestMCP.ofOneUpsertItem(TEST_VERSION_SET_URN, newProperties, new TestEntityRegistry()), + retrieverContext); + + // Assert no validation exceptions + Assert.assertTrue(validationResult.findAny().isEmpty()); + } + + @Test + public void testInvalidUpsertWithDifferentVersioningScheme() { + // Create existing properties with semantic versioning + VersionSetProperties existingProperties = + new VersionSetProperties() + .setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB); + + // Set up mock retriever with existing properties + Map> data = new HashMap<>(); + data.put(TEST_VERSION_SET_URN, Collections.singletonList(existingProperties)); + mockAspectRetriever = new MockAspectRetriever(data); + + retrieverContext = + io.datahubproject.metadata.context.RetrieverContext.builder() + .aspectRetriever(mockAspectRetriever) + .searchRetriever(mockSearchRetriever) + .graphRetriever(mockGraphRetriever) + .build(); + + // Create new properties with different versioning scheme + VersionSetProperties newProperties = + new VersionSetProperties().setVersioningScheme(VersioningScheme.$UNKNOWN); + + // Test validation + Stream validationResult = + VersionSetPropertiesValidator.validatePropertiesUpserts( + TestMCP.ofOneUpsertItem(TEST_VERSION_SET_URN, newProperties, new TestEntityRegistry()), + retrieverContext); + + // Assert validation exception exists + AspectValidationException exception = validationResult.findAny().get(); + Assert.assertNotNull(exception); + Assert.assertTrue(exception.getMessage().contains("Versioning Scheme cannot change")); + Assert.assertTrue( + exception.getMessage().contains("Expected Scheme: ALPHANUMERIC_GENERATED_BY_DATAHUB")); + Assert.assertTrue(exception.getMessage().contains("Provided Scheme: $UNKNOWN")); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java index c5205906e9d373..23d493b7287f78 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java @@ -1,9 +1,12 @@ package com.linkedin.metadata.search.query.request; +import static com.linkedin.metadata.Constants.DATASET_ENTITY_NAME; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static org.mockito.Mockito.mock; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; +import com.google.common.collect.ImmutableList; import com.linkedin.metadata.TestEntitySpecBuilder; import com.linkedin.metadata.config.search.ExactMatchConfiguration; import com.linkedin.metadata.config.search.PartialConfiguration; @@ -13,22 +16,35 @@ import com.linkedin.metadata.config.search.custom.BoolQueryConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.config.search.custom.QueryConfiguration; +import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.query.filter.Condition; +import com.linkedin.metadata.query.filter.ConjunctiveCriterion; +import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; +import com.linkedin.metadata.query.filter.Criterion; +import com.linkedin.metadata.query.filter.CriterionArray; +import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.elasticsearch.query.request.AutocompleteRequestHandler; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.test.metadata.context.TestOperationContexts; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; import org.opensearch.action.search.SearchRequest; import org.opensearch.common.lucene.search.function.FieldValueFactorFunction; import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.ExistsQueryBuilder; import org.opensearch.index.query.MatchAllQueryBuilder; import org.opensearch.index.query.MatchPhrasePrefixQueryBuilder; import org.opensearch.index.query.MatchQueryBuilder; import org.opensearch.index.query.MultiMatchQueryBuilder; import org.opensearch.index.query.QueryBuilder; import org.opensearch.index.query.QueryBuilders; +import org.opensearch.index.query.TermQueryBuilder; import org.opensearch.index.query.functionscore.FunctionScoreQueryBuilder; import org.opensearch.index.query.functionscore.ScoreFunctionBuilders; import org.opensearch.search.builder.SearchSourceBuilder; @@ -40,6 +56,8 @@ public class AutocompleteRequestHandlerTest { private static AutocompleteRequestHandler handler; private OperationContext mockOpContext = TestOperationContexts.systemContextNoSearchAuthorization(mock(EntityRegistry.class)); + private OperationContext nonMockOpContext = + TestOperationContexts.systemContextNoSearchAuthorization(); static { testQueryConfig = new SearchConfiguration(); @@ -465,10 +483,148 @@ public void testCustomConfigWithFunctionScores() { assertEquals(wrapper.filterFunctionBuilders(), expectedCustomScoreFunctions); } + @Test + public void testFilterLatestVersions() { + final Criterion filterCriterion = + buildCriterion("platform", Condition.EQUAL, "mysql", "bigquery"); + + final BoolQueryBuilder testQuery = + getQuery( + filterCriterion, + nonMockOpContext.getEntityRegistry().getEntitySpec(DATASET_ENTITY_NAME), + true); + + List isLatestQueries = + testQuery.filter().stream() + .filter(filter -> filter instanceof BoolQueryBuilder) + .flatMap(filter -> ((BoolQueryBuilder) filter).must().stream()) + .filter(must -> must instanceof BoolQueryBuilder) + .flatMap(must -> ((BoolQueryBuilder) must).should().stream()) + .filter(should -> should instanceof BoolQueryBuilder) + .flatMap( + should -> { + BoolQueryBuilder boolShould = (BoolQueryBuilder) should; + + // Get isLatest: true term queries + Stream filterQueries = + boolShould.filter().stream() + .filter( + f -> + f instanceof TermQueryBuilder + && ((TermQueryBuilder) f).fieldName().equals("isLatest")); + + // Get isLatest exists queries + Stream existsQueries = + boolShould.mustNot().stream() + .filter(mn -> mn instanceof BoolQueryBuilder) + .flatMap(mn -> ((BoolQueryBuilder) mn).must().stream()) + .filter( + mq -> + mq instanceof ExistsQueryBuilder + && ((ExistsQueryBuilder) mq).fieldName().equals("isLatest")); + + return Stream.concat(filterQueries, existsQueries); + }) + .collect(Collectors.toList()); + + assertTrue(isLatestQueries.size() == 2, "Expected to find two queries"); + final TermQueryBuilder termQueryBuilder = (TermQueryBuilder) isLatestQueries.get(0); + assertEquals(termQueryBuilder.fieldName(), "isLatest"); + Set values = new HashSet<>(); + values.add((Boolean) termQueryBuilder.value()); + + assertEquals(values.size(), 1, "Expected only true value."); + assertTrue(values.contains(true)); + final ExistsQueryBuilder existsQueryBuilder = (ExistsQueryBuilder) isLatestQueries.get(1); + assertEquals(existsQueryBuilder.fieldName(), "isLatest"); + } + + @Test + public void testNoFilterLatestVersions() { + final Criterion filterCriterion = + buildCriterion("platform", Condition.EQUAL, "mysql", "bigquery"); + + final BoolQueryBuilder testQuery = + getQuery( + filterCriterion, + nonMockOpContext.getEntityRegistry().getEntitySpec(DATASET_ENTITY_NAME), + false); + + // bool -> filter -> [bool] -> must -> [bool] + List isLatestQueries = + testQuery.filter().stream() + .filter(filter -> filter instanceof BoolQueryBuilder) + .flatMap(filter -> ((BoolQueryBuilder) filter).must().stream()) + .filter(must -> must instanceof BoolQueryBuilder) + .flatMap(must -> ((BoolQueryBuilder) must).should().stream()) + .filter(should -> should instanceof BoolQueryBuilder) + .flatMap( + should -> { + BoolQueryBuilder boolShould = (BoolQueryBuilder) should; + + // Get isLatest: true term queries + Stream filterQueries = + boolShould.filter().stream() + .filter( + f -> + f instanceof TermQueryBuilder + && ((TermQueryBuilder) f).fieldName().equals("isLatest")); + + // Get isLatest exists queries + Stream existsQueries = + boolShould.mustNot().stream() + .filter(mn -> mn instanceof BoolQueryBuilder) + .flatMap(mn -> ((BoolQueryBuilder) mn).must().stream()) + .filter( + mq -> + mq instanceof ExistsQueryBuilder + && ((ExistsQueryBuilder) mq).fieldName().equals("isLatest")); + + return Stream.concat(filterQueries, existsQueries); + }) + .collect(Collectors.toList()); + + assertTrue(isLatestQueries.isEmpty(), "Expected to find no queries"); + } + private static QueryBuilder extractNestedQuery(BoolQueryBuilder nested) { assertEquals(nested.should().size(), 1); BoolQueryBuilder firstLevel = (BoolQueryBuilder) nested.should().get(0); assertEquals(firstLevel.should().size(), 1); return firstLevel.should().get(0); } + + private BoolQueryBuilder getQuery( + final Criterion filterCriterion, final EntitySpec entitySpec, boolean filterNonLatest) { + final Filter filter = + new Filter() + .setOr( + new ConjunctiveCriterionArray( + new ConjunctiveCriterion() + .setAnd(new CriterionArray(ImmutableList.of(filterCriterion))))); + + AutocompleteRequestHandler requestHandler = + AutocompleteRequestHandler.getBuilder( + entitySpec, + CustomSearchConfiguration.builder().build(), + QueryFilterRewriteChain.EMPTY, + testQueryConfig); + + return (BoolQueryBuilder) + ((FunctionScoreQueryBuilder) + requestHandler + .getSearchRequest( + mockOpContext.withSearchFlags( + flags -> + flags + .setFulltext(false) + .setFilterNonLatestVersions(filterNonLatest)), + "", + "platform", + filter, + 3) + .source() + .query()) + .query(); + } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java index 1a91ae35c6595b..1fea4476d75abb 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java @@ -1,6 +1,7 @@ package com.linkedin.metadata.search.query.request; import static com.linkedin.datahub.graphql.resolvers.search.SearchUtils.SEARCHABLE_ENTITY_TYPES; +import static com.linkedin.metadata.Constants.DATASET_ENTITY_NAME; import static com.linkedin.metadata.Constants.STATUS_ASPECT_NAME; import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static com.linkedin.metadata.utils.CriterionUtils.buildExistsCriterion; @@ -56,6 +57,8 @@ import org.opensearch.index.query.BoolQueryBuilder; import org.opensearch.index.query.ExistsQueryBuilder; import org.opensearch.index.query.MatchQueryBuilder; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.TermQueryBuilder; import org.opensearch.index.query.TermsQueryBuilder; import org.opensearch.search.aggregations.AggregationBuilder; import org.opensearch.search.aggregations.AggregationBuilders; @@ -805,7 +808,214 @@ public void testQueryByDefault() { } } + @Test + public void testFilterLatestVersions() { + final Criterion filterCriterion = + buildCriterion("platform", Condition.EQUAL, "mysql", "bigquery"); + + final BoolQueryBuilder testQuery = + getQuery( + filterCriterion, + operationContext.getEntityRegistry().getEntitySpec(DATASET_ENTITY_NAME), + true); + + List isLatestQueries = + testQuery.filter().stream() + .filter(filter -> filter instanceof BoolQueryBuilder) + .flatMap(filter -> ((BoolQueryBuilder) filter).must().stream()) + .filter(must -> must instanceof BoolQueryBuilder) + .flatMap(must -> ((BoolQueryBuilder) must).should().stream()) + .filter(should -> should instanceof BoolQueryBuilder) + .flatMap( + should -> { + BoolQueryBuilder boolShould = (BoolQueryBuilder) should; + + // Get isLatest: true term queries + Stream filterQueries = + boolShould.filter().stream() + .filter( + f -> + f instanceof TermQueryBuilder + && ((TermQueryBuilder) f).fieldName().equals("isLatest")); + + // Get isLatest exists queries + Stream existsQueries = + boolShould.mustNot().stream() + .filter(mn -> mn instanceof BoolQueryBuilder) + .flatMap(mn -> ((BoolQueryBuilder) mn).must().stream()) + .filter( + mq -> + mq instanceof ExistsQueryBuilder + && ((ExistsQueryBuilder) mq).fieldName().equals("isLatest")); + + return Stream.concat(filterQueries, existsQueries); + }) + .collect(Collectors.toList()); + + assertEquals(isLatestQueries.size(), 2, "Expected to find two queries"); + final TermQueryBuilder termQueryBuilder = (TermQueryBuilder) isLatestQueries.get(0); + assertEquals(termQueryBuilder.fieldName(), "isLatest"); + Set values = new HashSet<>(); + values.add((Boolean) termQueryBuilder.value()); + + assertEquals(values.size(), 1, "Expected only true value."); + assertTrue(values.contains(true)); + final ExistsQueryBuilder existsQueryBuilder = (ExistsQueryBuilder) isLatestQueries.get(1); + assertEquals(existsQueryBuilder.fieldName(), "isLatest"); + } + + @Test + public void testNoFilterLatestVersions() { + final Criterion filterCriterion = + buildCriterion("platform", Condition.EQUAL, "mysql", "bigquery"); + + final BoolQueryBuilder testQuery = + getQuery( + filterCriterion, + operationContext.getEntityRegistry().getEntitySpec(DATASET_ENTITY_NAME), + false); + + List isLatestQueries = + testQuery.filter().stream() + .filter(filter -> filter instanceof BoolQueryBuilder) + .flatMap(filter -> ((BoolQueryBuilder) filter).must().stream()) + .filter(must -> must instanceof BoolQueryBuilder) + .flatMap(must -> ((BoolQueryBuilder) must).should().stream()) + .filter(should -> should instanceof BoolQueryBuilder) + .flatMap( + should -> { + BoolQueryBuilder boolShould = (BoolQueryBuilder) should; + + // Get isLatest: true term queries + Stream filterQueries = + boolShould.filter().stream() + .filter( + f -> + f instanceof TermQueryBuilder + && ((TermQueryBuilder) f).fieldName().equals("isLatest")); + + // Get isLatest exists queries + Stream existsQueries = + boolShould.mustNot().stream() + .filter(mn -> mn instanceof BoolQueryBuilder) + .flatMap(mn -> ((BoolQueryBuilder) mn).must().stream()) + .filter( + mq -> + mq instanceof ExistsQueryBuilder + && ((ExistsQueryBuilder) mq).fieldName().equals("isLatest")); + + return Stream.concat(filterQueries, existsQueries); + }) + .collect(Collectors.toList()); + + assertTrue(isLatestQueries.isEmpty(), "Expected to find no queries"); + } + + @Test + public void testAggregationFilterLatestVersions() { + final Criterion filterCriterion = + buildCriterion("platform", Condition.EQUAL, "mysql", "bigquery"); + + final BoolQueryBuilder testQuery = + getAggregationQuery( + filterCriterion, + operationContext.getEntityRegistry().getEntitySpec(DATASET_ENTITY_NAME), + true); + + List isLatestQueries = + testQuery.must().stream() + .filter(must -> must instanceof BoolQueryBuilder) + .flatMap(must -> ((BoolQueryBuilder) must).should().stream()) + .filter(should -> should instanceof BoolQueryBuilder) + .flatMap( + should -> { + BoolQueryBuilder boolShould = (BoolQueryBuilder) should; + + // Get isLatest: true term queries + Stream filterQueries = + boolShould.filter().stream() + .filter( + f -> + f instanceof TermQueryBuilder + && ((TermQueryBuilder) f).fieldName().equals("isLatest")); + + // Get isLatest exists queries + Stream existsQueries = + boolShould.mustNot().stream() + .filter(mn -> mn instanceof BoolQueryBuilder) + .flatMap(mn -> ((BoolQueryBuilder) mn).must().stream()) + .filter( + mq -> + mq instanceof ExistsQueryBuilder + && ((ExistsQueryBuilder) mq).fieldName().equals("isLatest")); + + return Stream.concat(filterQueries, existsQueries); + }) + .collect(Collectors.toList()); + + assertEquals(isLatestQueries.size(), 2, "Expected to find two queries"); + final TermQueryBuilder termQueryBuilder = (TermQueryBuilder) isLatestQueries.get(0); + assertEquals(termQueryBuilder.fieldName(), "isLatest"); + Set values = new HashSet<>(); + values.add((Boolean) termQueryBuilder.value()); + + assertEquals(values.size(), 1, "Expected only true value."); + assertTrue(values.contains(true)); + final ExistsQueryBuilder existsQueryBuilder = (ExistsQueryBuilder) isLatestQueries.get(1); + assertEquals(existsQueryBuilder.fieldName(), "isLatest"); + } + + @Test + public void testAggregationNoFilterLatestVersions() { + final Criterion filterCriterion = + buildCriterion("platform", Condition.EQUAL, "mysql", "bigquery"); + + final BoolQueryBuilder testQuery = + getAggregationQuery( + filterCriterion, + operationContext.getEntityRegistry().getEntitySpec(DATASET_ENTITY_NAME), + false); + + List isLatestQueries = + testQuery.must().stream() + .filter(must -> must instanceof BoolQueryBuilder) + .flatMap(must -> ((BoolQueryBuilder) must).should().stream()) + .filter(should -> should instanceof BoolQueryBuilder) + .flatMap( + should -> { + BoolQueryBuilder boolShould = (BoolQueryBuilder) should; + + // Get isLatest: true term queries + Stream filterQueries = + boolShould.filter().stream() + .filter( + f -> + f instanceof TermQueryBuilder + && ((TermQueryBuilder) f).fieldName().equals("isLatest")); + + // Get isLatest exists queries + Stream existsQueries = + boolShould.mustNot().stream() + .filter(mn -> mn instanceof BoolQueryBuilder) + .flatMap(mn -> ((BoolQueryBuilder) mn).must().stream()) + .filter( + mq -> + mq instanceof ExistsQueryBuilder + && ((ExistsQueryBuilder) mq).fieldName().equals("isLatest")); + + return Stream.concat(filterQueries, existsQueries); + }) + .collect(Collectors.toList()); + + assertTrue(isLatestQueries.isEmpty(), "Expected to find no queries"); + } + private BoolQueryBuilder getQuery(final Criterion filterCriterion) { + return getQuery(filterCriterion, TestEntitySpecBuilder.getSpec(), true); + } + + private BoolQueryBuilder getQuery( + final Criterion filterCriterion, final EntitySpec entitySpec, boolean filterNonLatest) { final Filter filter = new Filter() .setOr( @@ -816,7 +1026,7 @@ private BoolQueryBuilder getQuery(final Criterion filterCriterion) { final SearchRequestHandler requestHandler = SearchRequestHandler.getBuilder( operationContext.getEntityRegistry(), - TestEntitySpecBuilder.getSpec(), + entitySpec, testQueryConfig, null, QueryFilterRewriteChain.EMPTY); @@ -824,7 +1034,8 @@ private BoolQueryBuilder getQuery(final Criterion filterCriterion) { return (BoolQueryBuilder) requestHandler .getSearchRequest( - operationContext.withSearchFlags(flags -> flags.setFulltext(false)), + operationContext.withSearchFlags( + flags -> flags.setFulltext(false).setFilterNonLatestVersions(filterNonLatest)), "", filter, null, @@ -834,4 +1045,33 @@ private BoolQueryBuilder getQuery(final Criterion filterCriterion) { .source() .query(); } + + private BoolQueryBuilder getAggregationQuery( + final Criterion filterCriterion, final EntitySpec entitySpec, boolean filterNonLatest) { + final Filter filter = + new Filter() + .setOr( + new ConjunctiveCriterionArray( + new ConjunctiveCriterion() + .setAnd(new CriterionArray(ImmutableList.of(filterCriterion))))); + + final SearchRequestHandler requestHandler = + SearchRequestHandler.getBuilder( + operationContext.getEntityRegistry(), + entitySpec, + testQueryConfig, + null, + QueryFilterRewriteChain.EMPTY); + + return (BoolQueryBuilder) + requestHandler + .getAggregationRequest( + operationContext.withSearchFlags( + flags -> flags.setFulltext(false).setFilterNonLatestVersions(filterNonLatest)), + "platform", + filter, + 10) + .source() + .query(); + } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java index 65b73b7425b743..5a4fb39bd50e96 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java @@ -430,4 +430,23 @@ public void testEmptyDescription() throws RemoteInvocationException, URISyntaxEx assertTrue(transformed.get().get("description").isNull()); assertFalse(transformed.get().get("hasDescription").asBoolean()); } + + @Test + public void testHandleRemoveFieldsWithStructuredProperties() throws IOException { + ObjectNode previousDoc = JsonNodeFactory.instance.objectNode(); + previousDoc.put("structuredProperties.prop1", "value1"); + previousDoc.put("structuredProperties.prop2", "value2"); + previousDoc.put("otherField", "value3"); + + ObjectNode newDoc = JsonNodeFactory.instance.objectNode(); + newDoc.put("structuredProperties.prop1", "updatedValue1"); + newDoc.put("otherField", "updatedValue3"); + + ObjectNode result = SearchDocumentTransformer.handleRemoveFields(newDoc, previousDoc); + + assertEquals(result.get("structuredProperties.prop1").asText(), "updatedValue1"); + assertTrue(result.has("structuredProperties.prop2")); + assertTrue(result.get("structuredProperties.prop2").isNull()); + assertEquals(result.get("otherField").asText(), "updatedValue3"); + } } diff --git a/metadata-models/src/main/pegasus/com/linkedin/common/VersionProperties.pdl b/metadata-models/src/main/pegasus/com/linkedin/common/VersionProperties.pdl new file mode 100644 index 00000000000000..af4d48debe0217 --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/common/VersionProperties.pdl @@ -0,0 +1,77 @@ +namespace com.linkedin.common + +/** + * Properties about a versioned asset i.e. dataset, ML Model, etc. + */ +@Aspect = { + "name": "versionProperties" +} +record VersionProperties { + /** + * The linked Version Set entity that ties multiple versioned assets together + */ + @Searchable = { + "queryByDefault": false + } + @Relationship = { + "name": "VersionOf", + "entityTypes": [ "versionSet" ] + } + versionSet: Urn + + /** + * Label for this versioned asset, is unique within a version set + */ + @Searchable = { + "/versionTag": { + "fieldName": "version", + "queryByDefault": false + } + } + version: VersionTag + + /** + * Associated aliases for this versioned asset + */ + @Searchable = { + "/*/versionTag": { + "fieldName": "aliases", + "queryByDefault": false + } + } + aliases: array[VersionTag] = [] + + /** + * Comment documenting what this version was created for, changes, or represents + */ + comment: optional string + + /** + * Sort identifier that determines where a version lives in the order of the Version Set. + * What this looks like depends on the Version Scheme. For sort ids generated by DataHub we use an 8 character string representation. + */ + @Searchable = { + "queryByDefault": false, + "fieldName": "versionSortId" + } + sortId: string + + /** + * Timestamp reflecting when this asset version was created in the source system. + */ + sourceCreatedTimestamp: optional AuditStamp + + /** + * Timestamp reflecting when the metadata for this version was created in DataHub + */ + metadataCreatedTimestamp: optional AuditStamp + + /** + * Marks whether this version is currently the latest. Set by a side effect and should not be modified by API. + */ + @Searchable = { + "queryByDefault": false, + "fieldType": "BOOLEAN" + } + isLatest: optional boolean +} \ No newline at end of file diff --git a/li-utils/src/main/pegasus/com/linkedin/common/VersionTag.pdl b/metadata-models/src/main/pegasus/com/linkedin/common/VersionTag.pdl similarity index 78% rename from li-utils/src/main/pegasus/com/linkedin/common/VersionTag.pdl rename to metadata-models/src/main/pegasus/com/linkedin/common/VersionTag.pdl index f26a1b0140b793..82f2193747c5d3 100644 --- a/li-utils/src/main/pegasus/com/linkedin/common/VersionTag.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/common/VersionTag.pdl @@ -5,4 +5,5 @@ namespace com.linkedin.common */ record VersionTag { versionTag: optional string + metadataAttribution: optional MetadataAttribution } \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/key/VersionSetKey.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/key/VersionSetKey.pdl new file mode 100644 index 00000000000000..edbddc29e2023f --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/key/VersionSetKey.pdl @@ -0,0 +1,20 @@ +namespace com.linkedin.metadata.key + + +/** + * Key for a Version Set entity + */ +@Aspect = { + "name": "versionSetKey" +} +record VersionSetKey { + /** + * ID of the Version Set, generated from platform + asset id / name + */ + id: string + + /** + * Type of entities included in version set, limits to a single entity type between linked versioned entities + */ + entityType: string +} \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/query/SearchFlags.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/query/SearchFlags.pdl index a3a7a8cda58a8d..ab5873452641ed 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/metadata/query/SearchFlags.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/query/SearchFlags.pdl @@ -64,4 +64,9 @@ record SearchFlags { * By default we include these, but custom aggregation requests don't need them. */ includeDefaultFacets: optional boolean = true + + /** + * Include only latest versions in version sets, default true + */ + filterNonLatestVersions: optional boolean = true } diff --git a/metadata-models/src/main/pegasus/com/linkedin/versionset/VersionSetProperties.pdl b/metadata-models/src/main/pegasus/com/linkedin/versionset/VersionSetProperties.pdl new file mode 100644 index 00000000000000..0e50c33aa2b7d4 --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/versionset/VersionSetProperties.pdl @@ -0,0 +1,24 @@ +namespace com.linkedin.versionset + +import com.linkedin.common.CustomProperties +import com.linkedin.common.Urn + +@Aspect = { + "name": "versionSetProperties" +} +record VersionSetProperties includes CustomProperties { + /** + * The latest versioned entity linked to in this version set + */ + @Searchable = { + "queryByDefault": "false" + } + latest: Urn + + /** + * What versioning scheme is being utilized for the versioned entities sort criterion. Static once set + */ + versioningScheme: enum VersioningScheme { + ALPHANUMERIC_GENERATED_BY_DATAHUB + } +} \ No newline at end of file diff --git a/metadata-models/src/main/resources/entity-registry.yml b/metadata-models/src/main/resources/entity-registry.yml index 1556b72e4aefb1..32f9d1b98db5df 100644 --- a/metadata-models/src/main/resources/entity-registry.yml +++ b/metadata-models/src/main/resources/entity-registry.yml @@ -46,6 +46,7 @@ entities: - structuredProperties - forms - partitionsSummary + - versionProperties - name: dataHubPolicy doc: DataHub Policies represent access policies granted to users or groups on metadata operations like edit, view etc. category: internal @@ -365,6 +366,7 @@ entities: - structuredProperties - forms - testResults + - versionProperties - name: mlModelGroup category: core keyAspect: mlModelGroupKey @@ -494,6 +496,11 @@ entities: keyAspect: globalSettingsKey aspects: - globalSettingsInfo + - name: versionSet + category: core + keyAspect: versionSetKey + aspects: + - versionSetProperties - name: incident doc: An incident for an asset. category: core diff --git a/metadata-service/configuration/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java b/metadata-service/configuration/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java index 97ca0dcabea9f3..eeb90d09204bb9 100644 --- a/metadata-service/configuration/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java +++ b/metadata-service/configuration/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java @@ -26,4 +26,5 @@ public class FeatureFlags { private boolean alternateMCPValidation = false; private boolean showManageStructuredProperties = false; private boolean dataProcessInstanceEntityEnabled = true; + private boolean entityVersioning = false; } diff --git a/metadata-service/configuration/src/main/resources/application.yaml b/metadata-service/configuration/src/main/resources/application.yaml index c029cb4648d012..69b86962442b91 100644 --- a/metadata-service/configuration/src/main/resources/application.yaml +++ b/metadata-service/configuration/src/main/resources/application.yaml @@ -466,6 +466,7 @@ featureFlags: showSeparateSiblings: ${SHOW_SEPARATE_SIBLINGS:false} # If turned on, all siblings will be separated with no way to get to a "combined" sibling view editableDatasetNameEnabled: ${EDITABLE_DATASET_NAME_ENABLED:false} # Enables the ability to edit the dataset name in the UI showManageStructuredProperties: ${SHOW_MANAGE_STRUCTURED_PROPERTIES:true} # If turned on, show the manage structured properties button on the govern dropdown + entityVersioning: ${ENTITY_VERSIONING_ENABLED:false} # Enables entity versioning APIs, validators, and side effects entityChangeEvents: enabled: ${ENABLE_ENTITY_CHANGE_EVENTS_HOOK:true} diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/versioning/EntityVersioningServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/versioning/EntityVersioningServiceFactory.java new file mode 100644 index 00000000000000..4d03860cccb5ca --- /dev/null +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/versioning/EntityVersioningServiceFactory.java @@ -0,0 +1,21 @@ +package com.linkedin.gms.factory.entity.versioning; + +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.versioning.EntityVersioningService; +import com.linkedin.metadata.entity.versioning.EntityVersioningServiceImpl; +import javax.annotation.Nonnull; +import lombok.extern.slf4j.Slf4j; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +@Slf4j +@Configuration +public class EntityVersioningServiceFactory { + + @Bean(name = "entityVersioningService") + @Nonnull + protected EntityVersioningService createInstance(EntityService entityService) { + + return new EntityVersioningServiceImpl(entityService); + } +} diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java index 3229f12f9021d0..8f389eccc4cf8f 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java @@ -27,6 +27,7 @@ import com.linkedin.metadata.config.GraphQLConcurrencyConfiguration; import com.linkedin.metadata.connection.ConnectionService; import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.versioning.EntityVersioningService; import com.linkedin.metadata.graph.GraphClient; import com.linkedin.metadata.graph.GraphService; import com.linkedin.metadata.graph.SiblingGraphService; @@ -205,7 +206,8 @@ public class GraphQLEngineFactory { @Nonnull protected GraphQLEngine graphQLEngine( @Qualifier("entityClient") final EntityClient entityClient, - @Qualifier("systemEntityClient") final SystemEntityClient systemEntityClient) { + @Qualifier("systemEntityClient") final SystemEntityClient systemEntityClient, + final EntityVersioningService entityVersioningService) { GmsGraphQLEngineArgs args = new GmsGraphQLEngineArgs(); args.setEntityClient(entityClient); args.setSystemEntityClient(systemEntityClient); @@ -255,6 +257,7 @@ protected GraphQLEngine graphQLEngine( configProvider.getGraphQL().getQuery().isIntrospectionEnabled()); args.setGraphQLQueryDepthLimit(configProvider.getGraphQL().getQuery().getDepthLimit()); args.setBusinessAttributeService(businessAttributeService); + args.setEntityVersioningService(entityVersioningService); args.setConnectionService(_connectionService); args.setAssertionService(assertionService); return new GmsGraphQLEngine(args).builder().build(); diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java index 2349dbd169f1d9..7d0937663fecb0 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java @@ -6,6 +6,9 @@ import static com.linkedin.metadata.Constants.SCHEMA_METADATA_ASPECT_NAME; import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_ENTITY_NAME; import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_SETTINGS_ASPECT_NAME; +import static com.linkedin.metadata.Constants.VERSION_PROPERTIES_ASPECT_NAME; +import static com.linkedin.metadata.Constants.VERSION_SET_ENTITY_NAME; +import static com.linkedin.metadata.Constants.VERSION_SET_PROPERTIES_ASPECT_NAME; import com.linkedin.metadata.Constants; import com.linkedin.metadata.aspect.hooks.IgnoreUnknownMutator; @@ -16,6 +19,9 @@ import com.linkedin.metadata.aspect.validation.ExecutionRequestResultValidator; import com.linkedin.metadata.aspect.validation.FieldPathValidator; import com.linkedin.metadata.dataproducts.sideeffects.DataProductUnsetSideEffect; +import com.linkedin.metadata.entity.versioning.sideeffects.VersionSetSideEffect; +import com.linkedin.metadata.entity.versioning.validation.VersionPropertiesValidator; +import com.linkedin.metadata.entity.versioning.validation.VersionSetPropertiesValidator; import com.linkedin.metadata.schemafields.sideeffects.SchemaFieldSideEffect; import com.linkedin.metadata.structuredproperties.validation.HidePropertyValidator; import com.linkedin.metadata.structuredproperties.validation.ShowPropertyAsBadgeValidator; @@ -32,6 +38,13 @@ @Slf4j public class SpringStandardPluginConfiguration { private static final String ALL = "*"; + private static final String UPSERT = "UPSERT"; + private static final String UPDATE = "UPDATE"; + private static final String CREATE = "CREATE"; + private static final String CREATE_ENTITY = "CREATE_ENTITY"; + private static final String PATCH = "PATCH"; + private static final String DELETE = "DELETE"; + private static final String RESTATE = "RESTATE"; @Value("${metadataChangeProposal.validation.ignoreUnknown}") private boolean ignoreUnknownEnabled; @@ -189,4 +202,58 @@ public AspectPayloadValidator showPropertyAsAssetBadgeValidator() { .build())) .build()); } + + @Bean + @ConditionalOnProperty(name = "featureFlags.entityVersioning", havingValue = "true") + public AspectPayloadValidator versionPropertiesValidator() { + return new VersionPropertiesValidator() + .setConfig( + AspectPluginConfig.builder() + .className(VersionPropertiesValidator.class.getName()) + .enabled(true) + .supportedOperations(List.of(UPSERT, UPDATE, PATCH, CREATE, CREATE_ENTITY)) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName(ALL) + .aspectName(VERSION_PROPERTIES_ASPECT_NAME) + .build())) + .build()); + } + + @Bean + @ConditionalOnProperty(name = "featureFlags.entityVersioning", havingValue = "true") + public AspectPayloadValidator versionSetPropertiesValidator() { + return new VersionSetPropertiesValidator() + .setConfig( + AspectPluginConfig.builder() + .className(VersionSetPropertiesValidator.class.getName()) + .enabled(true) + .supportedOperations(List.of(UPSERT, UPDATE, PATCH, CREATE, CREATE_ENTITY)) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName(VERSION_SET_ENTITY_NAME) + .aspectName(VERSION_SET_PROPERTIES_ASPECT_NAME) + .build())) + .build()); + } + + @Bean + @ConditionalOnProperty(name = "featureFlags.entityVersioning", havingValue = "true") + public MCPSideEffect versionSetSideEffect() { + return new VersionSetSideEffect() + .setConfig( + AspectPluginConfig.builder() + .className(VersionSetSideEffect.class.getName()) + .enabled(true) + .supportedOperations(List.of(UPSERT, UPDATE, PATCH, CREATE, CREATE_ENTITY)) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName(VERSION_SET_ENTITY_NAME) + .aspectName(VERSION_SET_PROPERTIES_ASPECT_NAME) + .build())) + .build()); + } } diff --git a/metadata-service/openapi-analytics-servlet/src/test/java/io/datahubproject/openapi/delegates/DatahubUsageEventsImplTest.java b/metadata-service/openapi-analytics-servlet/src/test/java/io/datahubproject/openapi/delegates/DatahubUsageEventsImplTest.java index 68b13bd5fb4ee8..07557ece381a0a 100644 --- a/metadata-service/openapi-analytics-servlet/src/test/java/io/datahubproject/openapi/delegates/DatahubUsageEventsImplTest.java +++ b/metadata-service/openapi-analytics-servlet/src/test/java/io/datahubproject/openapi/delegates/DatahubUsageEventsImplTest.java @@ -3,11 +3,13 @@ import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertNotNull; +import com.linkedin.gms.factory.config.ConfigurationProvider; import io.datahubproject.openapi.config.OpenAPIAnalyticsTestConfiguration; import io.datahubproject.openapi.config.SpringWebConfig; import io.datahubproject.openapi.v2.generated.controller.DatahubUsageEventsApiController; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.boot.test.mock.mockito.MockBean; import org.springframework.context.annotation.ComponentScan; import org.springframework.context.annotation.Import; import org.springframework.http.HttpStatus; @@ -22,6 +24,8 @@ public class DatahubUsageEventsImplTest extends AbstractTestNGSpringContextTests @Autowired private DatahubUsageEventsApiController analyticsController; + @MockBean private ConfigurationProvider configurationProvider; + @Test public void initTest() { assertNotNull(analyticsController); diff --git a/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/v2/delegates/EntityApiDelegateImplTest.java b/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/v2/delegates/EntityApiDelegateImplTest.java index 2beb210e5bc4ff..31b35b65ea1a8c 100644 --- a/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/v2/delegates/EntityApiDelegateImplTest.java +++ b/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/v2/delegates/EntityApiDelegateImplTest.java @@ -4,6 +4,7 @@ import static org.testng.Assert.*; import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; +import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.metadata.models.registry.EntityRegistry; import io.datahubproject.openapi.config.OpenAPIEntityTestConfiguration; import io.datahubproject.openapi.config.SpringWebConfig; @@ -38,6 +39,7 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.autoconfigure.web.servlet.AutoConfigureMockMvc; import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.boot.test.mock.mockito.MockBean; import org.springframework.context.annotation.ComponentScan; import org.springframework.context.annotation.Import; import org.springframework.http.HttpStatus; @@ -68,6 +70,7 @@ public void disableAssert() { @Autowired private DatasetApiController datasetApiController; @Autowired private EntityRegistry entityRegistry; @Autowired private MockMvc mockMvc; + @MockBean private ConfigurationProvider configurationProvider; @Test public void initTest() { diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/GlobalControllerExceptionHandler.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/GlobalControllerExceptionHandler.java index c756827cad56ba..01493d71643481 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/GlobalControllerExceptionHandler.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/GlobalControllerExceptionHandler.java @@ -4,6 +4,9 @@ import io.datahubproject.metadata.exception.ActorAccessException; import io.datahubproject.openapi.exception.InvalidUrnException; import io.datahubproject.openapi.exception.UnauthorizedException; +import jakarta.servlet.http.HttpServletRequest; +import jakarta.servlet.http.HttpServletResponse; +import java.io.IOException; import java.util.Map; import javax.annotation.PostConstruct; import lombok.extern.slf4j.Slf4j; @@ -64,4 +67,25 @@ public static ResponseEntity> handleUnauthorizedException( public static ResponseEntity> actorAccessException(ActorAccessException e) { return new ResponseEntity<>(Map.of("error", e.getMessage()), HttpStatus.FORBIDDEN); } + + @Override + protected void logException(Exception ex, HttpServletRequest request) { + log.error("Error while resolving request: " + request.getRequestURI(), ex); + } + + @Override + protected void sendServerError( + Exception ex, HttpServletRequest request, HttpServletResponse response) throws IOException { + log.error("Error while resolving request: " + request.getRequestURI(), ex); + request.setAttribute("jakarta.servlet.error.exception", ex); + response.sendError(500); + } + + @ExceptionHandler(Exception.class) + public ResponseEntity> handleGenericException( + Exception e, HttpServletRequest request) { + log.error("Unhandled exception occurred for request: " + request.getRequestURI(), e); + return new ResponseEntity<>( + Map.of("error", "Internal server error occurred"), HttpStatus.INTERNAL_SERVER_ERROR); + } } diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/SpringWebConfig.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/SpringWebConfig.java index 622cf20af9ff57..c4b4431e77c4ef 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/SpringWebConfig.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/SpringWebConfig.java @@ -7,6 +7,7 @@ import com.fasterxml.jackson.core.StreamReadConstraints; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; +import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.metadata.models.registry.EntityRegistry; import io.datahubproject.openapi.converter.StringToChangeCategoryConverter; import io.datahubproject.openapi.v3.OpenAPIV3Generator; @@ -81,13 +82,15 @@ public void addFormatters(FormatterRegistry registry) { } @Bean - public GroupedOpenApi v3OpenApiGroup(final EntityRegistry entityRegistry) { + public GroupedOpenApi v3OpenApiGroup( + final EntityRegistry entityRegistry, final ConfigurationProvider configurationProvider) { return GroupedOpenApi.builder() .group("10-openapi-v3") .displayName("DataHub v3 (OpenAPI)") .addOpenApiCustomizer( openApi -> { - OpenAPI v3OpenApi = OpenAPIV3Generator.generateOpenApiSpec(entityRegistry); + OpenAPI v3OpenApi = + OpenAPIV3Generator.generateOpenApiSpec(entityRegistry, configurationProvider); openApi.setInfo(v3OpenApi.getInfo()); openApi.setTags(Collections.emptyList()); openApi.getPaths().putAll(v3OpenApi.getPaths()); diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java index c6b8d579d879e0..f7764f2ddb39a1 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java @@ -1,11 +1,14 @@ package io.datahubproject.openapi.v3; +import static com.linkedin.metadata.Constants.VERSION_PROPERTIES_ASPECT_NAME; +import static com.linkedin.metadata.Constants.VERSION_SET_ENTITY_NAME; import static io.datahubproject.openapi.util.ReflectionCache.toUpperFirst; import com.fasterxml.jackson.databind.JsonNode; import com.github.fge.processing.ProcessingUtil; import com.google.common.collect.ImmutableMap; import com.linkedin.data.avro.SchemaTranslator; +import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.registry.EntityRegistry; @@ -64,7 +67,8 @@ public class OpenAPIV3Generator { private static final String ASPECTS = "Aspects"; private static final String ENTITIES = "Entities"; - public static OpenAPI generateOpenApiSpec(EntityRegistry entityRegistry) { + public static OpenAPI generateOpenApiSpec( + EntityRegistry entityRegistry, ConfigurationProvider configurationProvider) { final Set aspectNames = entityRegistry.getAspectSpecs().keySet(); final Set entityNames = entityRegistry.getEntitySpecs().values().stream() @@ -125,22 +129,25 @@ public static OpenAPI generateOpenApiSpec(EntityRegistry entityRegistry) { buildAspectRefResponseSchema(upperAspectName)); }); + List definedEntitySpecs = + entityRegistry.getEntitySpecs().values().stream() + .filter(entitySpec -> definitionNames.contains(entitySpec.getName())) + .sorted(Comparator.comparing(EntitySpec::getName)) + .collect(Collectors.toList()); // --> Entity components - entityRegistry.getEntitySpecs().values().stream() - .filter(e -> aspectNames.contains(e.getKeyAspectName())) - .forEach( - e -> { - final String entityName = toUpperFirst(e.getName()); - components.addSchemas( - entityName + ENTITY_REQUEST_SUFFIX, buildEntitySchema(e, aspectNames, false)); - components.addSchemas( - entityName + ENTITY_RESPONSE_SUFFIX, buildEntitySchema(e, aspectNames, true)); - components.addSchemas( - "Scroll" + entityName + ENTITY_RESPONSE_SUFFIX, buildEntityScrollSchema(e)); - components.addSchemas( - "BatchGet" + entityName + ENTITY_REQUEST_SUFFIX, - buildEntityBatchGetRequestSchema(e, aspectNames)); - }); + definedEntitySpecs.forEach( + e -> { + final String entityName = toUpperFirst(e.getName()); + components.addSchemas( + entityName + ENTITY_REQUEST_SUFFIX, buildEntitySchema(e, aspectNames, false)); + components.addSchemas( + entityName + ENTITY_RESPONSE_SUFFIX, buildEntitySchema(e, aspectNames, true)); + components.addSchemas( + "Scroll" + entityName + ENTITY_RESPONSE_SUFFIX, buildEntityScrollSchema(e)); + components.addSchemas( + "BatchGet" + entityName + ENTITY_REQUEST_SUFFIX, + buildEntityBatchGetRequestSchema(e, aspectNames)); + }); components.addSchemas("SortOrder", new Schema()._enum(List.of("ASCENDING", "DESCENDING"))); // TODO: Correct handling of SystemMetadata and AuditStamp @@ -151,14 +158,12 @@ public static OpenAPI generateOpenApiSpec(EntityRegistry entityRegistry) { // Parameters // --> Entity Parameters - entityRegistry.getEntitySpecs().values().stream() - .filter(e -> definitionNames.contains(e.getKeyAspectName())) - .forEach( - e -> { - final String parameterName = toUpperFirst(e.getName()) + ASPECTS; - components.addParameters( - parameterName + MODEL_VERSION, buildParameterSchema(e, definitionNames)); - }); + definedEntitySpecs.forEach( + e -> { + final String parameterName = toUpperFirst(e.getName()) + ASPECTS; + components.addParameters( + parameterName + MODEL_VERSION, buildParameterSchema(e, definitionNames)); + }); addExtraParameters(components); @@ -169,39 +174,56 @@ public static OpenAPI generateOpenApiSpec(EntityRegistry entityRegistry) { paths.addPathItem("/v3/entity/scroll", buildGenericListEntitiesPath()); // --> Entity Paths - entityRegistry.getEntitySpecs().values().stream() - .filter(e -> definitionNames.contains(e.getName())) - .sorted(Comparator.comparing(EntitySpec::getName)) - .forEach( - e -> { - paths.addPathItem( - String.format("/v3/entity/%s", e.getName().toLowerCase()), - buildListEntityPath(e)); - paths.addPathItem( - String.format("/v3/entity/%s/batchGet", e.getName().toLowerCase()), - buildBatchGetEntityPath(e)); - paths.addPathItem( - String.format("/v3/entity/%s/{urn}", e.getName().toLowerCase()), - buildSingleEntityPath(e)); - }); + definedEntitySpecs.forEach( + e -> { + paths.addPathItem( + String.format("/v3/entity/%s", e.getName().toLowerCase()), buildListEntityPath(e)); + paths.addPathItem( + String.format("/v3/entity/%s/batchGet", e.getName().toLowerCase()), + buildBatchGetEntityPath(e)); + paths.addPathItem( + String.format("/v3/entity/%s/{urn}", e.getName().toLowerCase()), + buildSingleEntityPath(e)); + }); // --> Aspect Paths - entityRegistry.getEntitySpecs().values().stream() - .filter(e -> definitionNames.contains(e.getName())) - .sorted(Comparator.comparing(EntitySpec::getName)) - .forEach( - e -> { - e.getAspectSpecs().stream() - .filter(a -> definitionNames.contains(a.getName())) - .sorted(Comparator.comparing(AspectSpec::getName)) - .forEach( - a -> - paths.addPathItem( - String.format( - "/v3/entity/%s/{urn}/%s", - e.getName().toLowerCase(), a.getName().toLowerCase()), - buildSingleEntityAspectPath(e, a))); - }); + definedEntitySpecs.forEach( + e -> + e.getAspectSpecs().stream() + .filter(a -> definitionNames.contains(a.getName())) + .sorted(Comparator.comparing(AspectSpec::getName)) + .forEach( + a -> + paths.addPathItem( + String.format( + "/v3/entity/%s/{urn}/%s", + e.getName().toLowerCase(), a.getName().toLowerCase()), + buildSingleEntityAspectPath(e, a)))); + definedEntitySpecs.forEach( + e -> + e.getAspectSpecs().stream() + .filter(a -> definitionNames.contains(a.getName())) + .sorted(Comparator.comparing(AspectSpec::getName)) + .forEach( + a -> + paths.addPathItem( + String.format( + "/v3/entity/%s/{urn}/%s", + e.getName().toLowerCase(), a.getName().toLowerCase()), + buildSingleEntityAspectPath(e, a)))); + + // --> Link & Unlink APIs + if (configurationProvider.getFeatureFlags().isEntityVersioning()) { + definedEntitySpecs.stream() + .filter(entitySpec -> VERSION_SET_ENTITY_NAME.equals(entitySpec.getName())) + .forEach( + entitySpec -> { + paths.addPathItem( + "/v3/entity/versioning/{versionSetUrn}/relationship/versionOf/{entityUrn}", + buildVersioningRelationshipPath()); + }); + } + return new OpenAPI().openapi("3.0.1").info(info).paths(paths).components(components); } @@ -1198,4 +1220,115 @@ private static PathItem buildSingleEntityAspectPath( .post(postOperation) .patch(patchOperation); } + + private static Schema buildVersionPropertiesRequestSchema() { + return new Schema<>() + .type(TYPE_OBJECT) + .description("Properties for creating a version relationship") + .properties( + Map.of( + "comment", + new Schema<>() + .type(TYPE_STRING) + .description("Comment about the version") + .nullable(true), + "label", + new Schema<>() + .type(TYPE_STRING) + .description("Label for the version") + .nullable(true), + "sourceCreationTimestamp", + new Schema<>() + .type(TYPE_INTEGER) + .description("Timestamp when version was created in source system") + .nullable(true), + "sourceCreator", + new Schema<>() + .type(TYPE_STRING) + .description("Creator of version in source system") + .nullable(true))); + } + + private static PathItem buildVersioningRelationshipPath() { + final PathItem result = new PathItem(); + + // Common parameters for path + final List parameters = + List.of( + new Parameter() + .in(NAME_PATH) + .name("versionSetUrn") + .description("The Version Set URN to unlink from") + .required(true) + .schema(new Schema().type(TYPE_STRING)), + new Parameter() + .in(NAME_PATH) + .name("entityUrn") + .description("The Entity URN to be unlinked") + .required(true) + .schema(new Schema().type(TYPE_STRING))); + + // Success response for DELETE + final ApiResponse successDeleteResponse = + new ApiResponse() + .description("Successfully unlinked entity from version set") + .content(new Content().addMediaType("application/json", new MediaType())); + + // DELETE operation + final Operation deleteOperation = + new Operation() + .summary("Unlink an entity from a version set") + .description("Removes the version relationship between an entity and a version set") + .tags(List.of("Version Relationships")) + .parameters(parameters) + .responses( + new ApiResponses() + .addApiResponse("200", successDeleteResponse) + .addApiResponse( + "404", new ApiResponse().description("Version Set or Entity not found"))); + + // Success response for POST + final ApiResponse successPostResponse = + new ApiResponse() + .description("Successfully linked entity to version set") + .content( + new Content() + .addMediaType( + "application/json", + new MediaType() + .schema( + new Schema<>() + .$ref( + String.format( + "#/components/schemas/%s%s", + toUpperFirst(VERSION_PROPERTIES_ASPECT_NAME), + ASPECT_RESPONSE_SUFFIX))))); + + // Request body for POST + final RequestBody requestBody = + new RequestBody() + .description("Version properties for the link operation") + .required(true) + .content( + new Content() + .addMediaType( + "application/json", + new MediaType().schema(buildVersionPropertiesRequestSchema()))); + + // POST operation + final Operation postOperation = + new Operation() + .summary("Link an entity to a version set") + .description("Creates a version relationship between an entity and a version set") + .tags(List.of("Version Relationships")) + .parameters(parameters) + .requestBody(requestBody) + .responses( + new ApiResponses() + .addApiResponse("201", successPostResponse) + .addApiResponse( + "404", new ApiResponse().description("Version Set or Entity not found"))); + + return result.delete(deleteOperation).post(postOperation); + } } diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java index af13cd3aab0510..a4583082d57c7f 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java @@ -1,7 +1,9 @@ package io.datahubproject.openapi.v3.controller; +import static com.linkedin.metadata.Constants.VERSION_SET_ENTITY_NAME; import static com.linkedin.metadata.aspect.validation.ConditionalWriteValidator.HTTP_HEADER_IF_VERSION_MATCH; import static com.linkedin.metadata.authorization.ApiOperation.READ; +import static com.linkedin.metadata.authorization.ApiOperation.UPDATE; import com.datahub.authentication.Actor; import com.datahub.authentication.Authentication; @@ -11,22 +13,28 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; +import com.google.common.collect.ImmutableSet; import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.ByteString; import com.linkedin.data.template.SetMode; import com.linkedin.data.template.StringMap; import com.linkedin.entity.EnvelopedAspect; import com.linkedin.events.metadata.ChangeType; +import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.batch.AspectsBatch; import com.linkedin.metadata.aspect.batch.BatchItem; import com.linkedin.metadata.aspect.batch.ChangeMCP; import com.linkedin.metadata.entity.EntityApiUtils; import com.linkedin.metadata.entity.IngestResult; +import com.linkedin.metadata.entity.RollbackResult; import com.linkedin.metadata.entity.UpdateAspectResult; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; import com.linkedin.metadata.entity.ebean.batch.ProposedItem; +import com.linkedin.metadata.entity.versioning.EntityVersioningService; +import com.linkedin.metadata.entity.versioning.VersionPropertiesInput; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.query.filter.SortCriterion; @@ -71,9 +79,12 @@ import javax.annotation.Nullable; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; import org.springframework.http.MediaType; import org.springframework.http.ResponseEntity; import org.springframework.util.CollectionUtils; +import org.springframework.web.bind.annotation.DeleteMapping; +import org.springframework.web.bind.annotation.PathVariable; import org.springframework.web.bind.annotation.PostMapping; import org.springframework.web.bind.annotation.RequestBody; import org.springframework.web.bind.annotation.RequestMapping; @@ -89,6 +100,9 @@ public class EntityController extends GenericEntitiesController< GenericAspectV3, GenericEntityV3, GenericEntityScrollResultV3> { + @Autowired private final EntityVersioningService entityVersioningService; + @Autowired private final ConfigurationProvider configurationProvider; + @Tag(name = "Generic Entities") @PostMapping(value = "/{entityName}/batchGet", produces = MediaType.APPLICATION_JSON_VALUE) @Operation(summary = "Get a batch of entities") @@ -222,6 +236,111 @@ public ResponseEntity scrollEntities( entityAspectsBody.getAspects() != null)); } + @Tag(name = "EntityVersioning") + @PostMapping( + value = "/versioning/{versionSetUrn}/relationship/versionOf/{entityUrn}", + produces = MediaType.APPLICATION_JSON_VALUE) + @Operation(summary = "Link an Entity to a Version Set as the latest version") + public ResponseEntity> linkLatestVersion( + HttpServletRequest request, + @PathVariable("versionSetUrn") String versionSetUrnString, + @PathVariable("entityUrn") String entityUrnString, + @RequestParam(value = "systemMetadata", required = false, defaultValue = "false") + Boolean withSystemMetadata, + @RequestBody @Nonnull VersionPropertiesInput versionPropertiesInput) + throws URISyntaxException, JsonProcessingException { + + if (!configurationProvider.getFeatureFlags().isEntityVersioning()) { + throw new IllegalAccessError( + "Entity Versioning is not configured, please enable before attempting to use this feature."); + } + Authentication authentication = AuthenticationContext.getAuthentication(); + Urn versionSetUrn = UrnUtils.getUrn(versionSetUrnString); + if (!VERSION_SET_ENTITY_NAME.equals(versionSetUrn.getEntityType())) { + throw new IllegalArgumentException( + String.format("Version Set urn %s must be of type Version Set.", versionSetUrnString)); + } + Urn entityUrn = UrnUtils.getUrn(entityUrnString); + OperationContext opContext = + OperationContext.asSession( + systemOperationContext, + RequestContext.builder() + .buildOpenapi( + authentication.getActor().toUrnStr(), + request, + "linkLatestVersion", + ImmutableSet.of(entityUrn.getEntityType(), versionSetUrn.getEntityType())), + authorizationChain, + authentication, + true); + if (!AuthUtil.isAPIAuthorizedEntityUrns( + opContext, UPDATE, ImmutableSet.of(versionSetUrn, entityUrn))) { + throw new UnauthorizedException( + String.format( + "%s is unauthorized to %s entities %s and %s", + authentication.getActor().toUrnStr(), UPDATE, versionSetUrnString, entityUrnString)); + } + + return ResponseEntity.ok( + buildEntityList( + opContext, + entityVersioningService.linkLatestVersion( + opContext, versionSetUrn, entityUrn, versionPropertiesInput), + false)); + } + + @Tag(name = "EntityVersioning") + @DeleteMapping( + value = "/versioning/{versionSetUrn}/relationship/versionOf/{entityUrn}", + produces = MediaType.APPLICATION_JSON_VALUE) + @Operation(summary = "Unlink the latest linked version of an entity") + public ResponseEntity> unlinkVersion( + HttpServletRequest request, + @PathVariable("versionSetUrn") String versionSetUrnString, + @PathVariable("entityUrn") String entityUrnString, + @RequestParam(value = "systemMetadata", required = false, defaultValue = "false") + Boolean withSystemMetadata) + throws URISyntaxException, JsonProcessingException { + + if (!configurationProvider.getFeatureFlags().isEntityVersioning()) { + throw new IllegalAccessError( + "Entity Versioning is not configured, please enable before attempting to use this feature."); + } + Authentication authentication = AuthenticationContext.getAuthentication(); + Urn versionSetUrn = UrnUtils.getUrn(versionSetUrnString); + if (!VERSION_SET_ENTITY_NAME.equals(versionSetUrn.getEntityType())) { + throw new IllegalArgumentException( + String.format("Version Set urn %s must be of type Version Set.", versionSetUrnString)); + } + Urn entityUrn = UrnUtils.getUrn(entityUrnString); + OperationContext opContext = + OperationContext.asSession( + systemOperationContext, + RequestContext.builder() + .buildOpenapi( + authentication.getActor().toUrnStr(), + request, + "unlinkVersion", + ImmutableSet.of(entityUrn.getEntityType(), versionSetUrn.getEntityType())), + authorizationChain, + authentication, + true); + if (!AuthUtil.isAPIAuthorizedEntityUrns( + opContext, UPDATE, ImmutableSet.of(versionSetUrn, entityUrn))) { + throw new UnauthorizedException( + String.format( + "%s is unauthorized to %s entities %s and %s", + authentication.getActor().toUrnStr(), UPDATE, versionSetUrnString, entityUrnString)); + } + List rollbackResults = + entityVersioningService.unlinkVersion(opContext, versionSetUrn, entityUrn); + + return ResponseEntity.ok( + rollbackResults.stream() + .map(rollbackResult -> rollbackResult.getUrn().toString()) + .collect(Collectors.toList())); + } + @Override public GenericEntityScrollResultV3 buildScrollResult( @Nonnull OperationContext opContext, @@ -361,7 +480,10 @@ protected List buildEntityList( .auditStamp( withSystemMetadata ? ingest.getRequest().getAuditStamp() : null) .build())) - .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + // Map merge strategy, just take latest one + .collect( + Collectors.toMap( + Map.Entry::getKey, Map.Entry::getValue, (value1, value2) -> value2)); responseList.add( GenericEntityV3.builder().build(objectMapper, urnAspects.getKey(), aspectsMap)); } diff --git a/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/OpenAPIV3GeneratorTest.java b/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/OpenAPIV3GeneratorTest.java index e1568017156d9b..d8f04b60455abb 100644 --- a/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/OpenAPIV3GeneratorTest.java +++ b/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/OpenAPIV3GeneratorTest.java @@ -6,6 +6,8 @@ import static org.testng.Assert.assertTrue; import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; +import com.linkedin.datahub.graphql.featureflags.FeatureFlags; +import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.metadata.models.registry.ConfigEntityRegistry; import io.swagger.v3.core.util.Yaml; import io.swagger.v3.oas.models.OpenAPI; @@ -36,8 +38,10 @@ public void testOpenApiSpecBuilder() throws Exception { OpenAPIV3GeneratorTest.class .getClassLoader() .getResourceAsStream("entity-registry.yml")); + ConfigurationProvider configurationProvider = new ConfigurationProvider(); + configurationProvider.setFeatureFlags(new FeatureFlags()); - OpenAPI openAPI = OpenAPIV3Generator.generateOpenApiSpec(er); + OpenAPI openAPI = OpenAPIV3Generator.generateOpenApiSpec(er, configurationProvider); String openapiYaml = Yaml.pretty(openAPI); Files.write( Path.of(getClass().getResource("/").getPath(), "open-api.yaml"), diff --git a/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/controller/EntityControllerTest.java b/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/controller/EntityControllerTest.java index 952dc31c5ba386..e82ab50a0defeb 100644 --- a/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/controller/EntityControllerTest.java +++ b/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/controller/EntityControllerTest.java @@ -33,9 +33,12 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.template.RecordTemplate; +import com.linkedin.datahub.graphql.featureflags.FeatureFlags; import com.linkedin.dataset.DatasetProfile; import com.linkedin.entity.Aspect; import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.gms.factory.config.ConfigurationProvider; +import com.linkedin.gms.factory.entity.versioning.EntityVersioningServiceFactory; import com.linkedin.metadata.aspect.batch.AspectsBatch; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.EntityServiceImpl; @@ -57,6 +60,7 @@ import io.datahubproject.openapi.config.SpringWebConfig; import io.datahubproject.openapi.exception.InvalidUrnException; import io.datahubproject.test.metadata.context.TestOperationContexts; +import jakarta.servlet.ServletException; import java.util.Collections; import java.util.List; import java.util.Map; @@ -81,7 +85,11 @@ @SpringBootTest(classes = {SpringWebConfig.class}) @ComponentScan(basePackages = {"io.datahubproject.openapi.v3.controller"}) -@Import({SpringWebConfig.class, EntityControllerTest.EntityControllerTestConfig.class}) +@Import({ + SpringWebConfig.class, + EntityControllerTest.EntityControllerTestConfig.class, + EntityVersioningServiceFactory.class +}) @AutoConfigureWebMvc @AutoConfigureMockMvc public class EntityControllerTest extends AbstractTestNGSpringContextTests { @@ -92,6 +100,7 @@ public class EntityControllerTest extends AbstractTestNGSpringContextTests { @Autowired private TimeseriesAspectService mockTimeseriesAspectService; @Autowired private EntityRegistry entityRegistry; @Autowired private OperationContext opContext; + @MockBean private ConfigurationProvider configurationProvider; @Test public void initTest() { @@ -431,4 +440,211 @@ public TimeseriesAspectService timeseriesAspectService() { return timeseriesAspectService; } } + + @Test + public void testGetEntityBatchWithMultipleEntities() throws Exception { + List TEST_URNS = + List.of( + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:testPlatform,1,PROD)"), + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:testPlatform,2,PROD)")); + + // Mock entity aspect response + when(mockEntityService.getEnvelopedVersionedAspects( + any(OperationContext.class), anyMap(), eq(false))) + .thenReturn( + Map.of( + TEST_URNS.get(0), + List.of( + new EnvelopedAspect() + .setName("status") + .setValue(new Aspect(new Status().data()))), + TEST_URNS.get(1), + List.of( + new EnvelopedAspect() + .setName("status") + .setValue(new Aspect(new Status().data()))))); + + String requestBody = + String.format( + "[{\"urn\": \"%s\"}, {\"urn\": \"%s\"}]", + TEST_URNS.get(0).toString(), TEST_URNS.get(1).toString()); + + mockMvc + .perform( + MockMvcRequestBuilders.post("/v3/entity/dataset/batchGet") + .content(requestBody) + .contentType(MediaType.APPLICATION_JSON) + .accept(MediaType.APPLICATION_JSON)) + .andExpect(status().is2xxSuccessful()) + .andExpect(MockMvcResultMatchers.jsonPath("$[0].urn").value(TEST_URNS.get(0).toString())) + .andExpect(MockMvcResultMatchers.jsonPath("$[1].urn").value(TEST_URNS.get(1).toString())); + } + + @Test(expectedExceptions = ServletException.class) + public void testGetEntityBatchWithInvalidUrn() throws Exception { + String requestBody = "[{\"urn\": \"invalid:urn\"}]"; + + mockMvc + .perform( + MockMvcRequestBuilders.post("/v3/entity/dataset/batchGet") + .content(requestBody) + .contentType(MediaType.APPLICATION_JSON) + .accept(MediaType.APPLICATION_JSON)) + .andExpect(status().is4xxClientError()); + } + + @Test + public void testScrollEntitiesWithMultipleSortFields() throws Exception { + List TEST_URNS = + List.of( + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:testPlatform,1,PROD)"), + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:testPlatform,2,PROD)")); + + ScrollResult expectedResult = + new ScrollResult() + .setEntities( + new SearchEntityArray( + List.of( + new SearchEntity().setEntity(TEST_URNS.get(0)), + new SearchEntity().setEntity(TEST_URNS.get(1))))); + + when(mockSearchService.scrollAcrossEntities( + any(OperationContext.class), + eq(List.of("dataset")), + anyString(), + nullable(Filter.class), + any(), + nullable(String.class), + nullable(String.class), + anyInt())) + .thenReturn(expectedResult); + + when(mockEntityService.getEnvelopedVersionedAspects( + any(OperationContext.class), anyMap(), eq(false))) + .thenReturn( + Map.of( + TEST_URNS.get(0), + List.of( + new EnvelopedAspect() + .setName("status") + .setValue(new Aspect(new Status().data()))))); + + mockMvc + .perform( + MockMvcRequestBuilders.post("/v3/entity/scroll") + .content("{\"entities\":[\"dataset\"]}") + .param("sortCriteria", "name", "urn") + .contentType(MediaType.APPLICATION_JSON) + .accept(MediaType.APPLICATION_JSON)) + .andExpect(status().is2xxSuccessful()) + .andExpect( + MockMvcResultMatchers.jsonPath("$.entities[0].urn").value(TEST_URNS.get(0).toString())); + } + + @Test + public void testScrollEntitiesWithPitKeepAlive() throws Exception { + List TEST_URNS = + List.of(UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:testPlatform,1,PROD)")); + + ScrollResult expectedResult = + new ScrollResult() + .setEntities( + new SearchEntityArray(List.of(new SearchEntity().setEntity(TEST_URNS.get(0))))) + .setScrollId("test-scroll-id"); + + when(mockSearchService.scrollAcrossEntities( + any(OperationContext.class), + eq(List.of("dataset")), + anyString(), + nullable(Filter.class), + any(), + nullable(String.class), + eq("10m"), + anyInt())) + .thenReturn(expectedResult); + + when(mockEntityService.getEnvelopedVersionedAspects( + any(OperationContext.class), anyMap(), eq(false))) + .thenReturn( + Map.of( + TEST_URNS.get(0), + List.of( + new EnvelopedAspect() + .setName("status") + .setValue(new Aspect(new Status().data()))))); + + mockMvc + .perform( + MockMvcRequestBuilders.post("/v3/entity/scroll") + .content("{\"entities\":[\"dataset\"]}") + .param("pitKeepAlive", "10m") + .contentType(MediaType.APPLICATION_JSON) + .accept(MediaType.APPLICATION_JSON)) + .andExpect(status().is2xxSuccessful()) + .andExpect(MockMvcResultMatchers.jsonPath("$.scrollId").value("test-scroll-id")); + } + + @Test(expectedExceptions = ServletException.class) + public void testEntityVersioningFeatureFlagDisabled() throws Exception { + Urn TEST_URN = UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:testPlatform,1,PROD)"); + Urn VERSION_SET_URN = UrnUtils.getUrn("urn:li:versionSet:test-version-set"); + + FeatureFlags mockFeatureFlags = mock(FeatureFlags.class); + when(configurationProvider.getFeatureFlags()).thenReturn(mockFeatureFlags); + when(mockFeatureFlags.isEntityVersioning()).thenReturn(false); + + // Test linking version with disabled flag + mockMvc + .perform( + MockMvcRequestBuilders.post( + String.format( + "/v3/entity/versioning/%s/relationship/versionOf/%s", + VERSION_SET_URN, TEST_URN)) + .content("{}") + .contentType(MediaType.APPLICATION_JSON) + .accept(MediaType.APPLICATION_JSON)) + .andExpect(status().is4xxClientError()); + + // Test unlinking version with disabled flag + mockMvc + .perform( + MockMvcRequestBuilders.delete( + String.format( + "/v3/entity/versioning/%s/relationship/versionOf/%s", + VERSION_SET_URN, TEST_URN)) + .accept(MediaType.APPLICATION_JSON)) + .andExpect(status().is4xxClientError()); + } + + @Test(expectedExceptions = ServletException.class) + public void testInvalidVersionSetUrn() throws Exception { + Urn TEST_URN = UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:testPlatform,1,PROD)"); + String INVALID_VERSION_SET_URN = "urn:li:dataset:invalid-version-set"; + + FeatureFlags mockFeatureFlags = mock(FeatureFlags.class); + when(configurationProvider.getFeatureFlags()).thenReturn(mockFeatureFlags); + when(mockFeatureFlags.isEntityVersioning()).thenReturn(true); + + // Test linking with invalid version set URN + mockMvc + .perform( + MockMvcRequestBuilders.post( + String.format( + "/v3/entity/versioning/%s/relationship/versionOf/%s", + INVALID_VERSION_SET_URN, TEST_URN)) + .content("{}") + .contentType(MediaType.APPLICATION_JSON) + .accept(MediaType.APPLICATION_JSON)) + .andExpect(status().is4xxClientError()); + + // Test unlinking with invalid version set URN + mockMvc + .perform( + MockMvcRequestBuilders.delete( + String.format( + "/v3/entity/versioning/%s/relationship/versionOf/%s", + INVALID_VERSION_SET_URN, TEST_URN)) + .accept(MediaType.APPLICATION_JSON)) + .andExpect(status().is4xxClientError()); + } } diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json index 432c4a9ddcb73f..af11532ccf4ece 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json @@ -1382,6 +1382,10 @@ "name" : "versionTag", "type" : "string", "optional" : true + }, { + "name" : "metadataAttribution", + "type" : "MetadataAttribution", + "optional" : true } ] }, { "type" : "record", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json index 45e91873de10ff..f58d83dd1e5cb7 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json @@ -1409,6 +1409,10 @@ "name" : "versionTag", "type" : "string", "optional" : true + }, { + "name" : "metadataAttribution", + "type" : "MetadataAttribution", + "optional" : true } ] }, "com.linkedin.common.fieldtransformer.TransformationType", "com.linkedin.common.fieldtransformer.UDFTransformer", { "type" : "record", @@ -6139,6 +6143,12 @@ "doc" : "Include default facets when getting facets to aggregate on in search requests.\nBy default we include these, but custom aggregation requests don't need them.", "default" : true, "optional" : true + }, { + "name" : "filterNonLatestVersions", + "type" : "boolean", + "doc" : "Include only latest versions in version sets, default true", + "default" : true, + "optional" : true } ] }, { "type" : "enum", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json index 9061cbff188135..61c31f93987b88 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json @@ -1115,6 +1115,10 @@ "name" : "versionTag", "type" : "string", "optional" : true + }, { + "name" : "metadataAttribution", + "type" : "MetadataAttribution", + "optional" : true } ] }, { "type" : "record", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json index e6be4e828c976f..75793be7331da4 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json @@ -1115,6 +1115,10 @@ "name" : "versionTag", "type" : "string", "optional" : true + }, { + "name" : "metadataAttribution", + "type" : "MetadataAttribution", + "optional" : true } ] }, { "type" : "record", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json index 10f3218d469757..58ba2ad05dfe74 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json @@ -1409,6 +1409,10 @@ "name" : "versionTag", "type" : "string", "optional" : true + }, { + "name" : "metadataAttribution", + "type" : "MetadataAttribution", + "optional" : true } ] }, "com.linkedin.common.fieldtransformer.TransformationType", "com.linkedin.common.fieldtransformer.UDFTransformer", { "type" : "record", diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/versioning/EntityVersioningService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/versioning/EntityVersioningService.java new file mode 100644 index 00000000000000..9e82efa913a98d --- /dev/null +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/versioning/EntityVersioningService.java @@ -0,0 +1,36 @@ +package com.linkedin.metadata.entity.versioning; + +import com.linkedin.common.urn.Urn; +import com.linkedin.metadata.entity.IngestResult; +import com.linkedin.metadata.entity.RollbackResult; +import io.datahubproject.metadata.context.OperationContext; +import java.util.List; + +public interface EntityVersioningService { + + /** + * Generates a new set of VersionProperties for the latest version and links it to the specified + * version set. If the specified version set does not yet exist, will create it. Order of + * operations here is important: 1. Create initial Version Set if necessary, do not generate + * Version Set Properties 2. Create Version Properties for specified entity. 3. Generate version + * properties with the properly set latest version Will eventually want to add in the scheme here + * as a parameter + * + * @return ingestResult -> the results of the ingested linked version + */ + List linkLatestVersion( + OperationContext opContext, + Urn versionSet, + Urn newLatestVersion, + VersionPropertiesInput inputProperties); + + /** + * Unlinks the latest version from a version set. Will attempt to set up the previous version as + * the new latest. This fully removes the version properties and unversions the specified entity. + * + * @param opContext operational context containing various information about the current execution + * @param currentLatest the currently linked latest versioned entity urn + * @return the deletion result + */ + List unlinkVersion(OperationContext opContext, Urn versionSet, Urn currentLatest); +} diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/versioning/VersionPropertiesInput.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/versioning/VersionPropertiesInput.java new file mode 100644 index 00000000000000..28c320ec717201 --- /dev/null +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/versioning/VersionPropertiesInput.java @@ -0,0 +1,20 @@ +package com.linkedin.metadata.entity.versioning; + +import com.fasterxml.jackson.annotation.JsonInclude; +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@JsonInclude(JsonInclude.Include.NON_NULL) +@NoArgsConstructor(force = true, access = AccessLevel.PRIVATE) +@AllArgsConstructor +public class VersionPropertiesInput { + private String comment; + private String version; + private Long sourceCreationTimestamp; + private String sourceCreator; +} diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java b/metadata-service/services/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java index 7e9d1701bf79a9..4cd9ec6c75b786 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java @@ -87,6 +87,14 @@ public static Filter newDisjunctiveFilter(@Nonnull Criterion... orCriterion) { .collect(Collectors.toCollection(ConjunctiveCriterionArray::new))); } + @Nonnull + public static Filter newConjunctiveFilter(@Nonnull Criterion... andCriterion) { + ConjunctiveCriterionArray orCriteria = new ConjunctiveCriterionArray(); + orCriteria.add( + new ConjunctiveCriterion().setAnd(new CriterionArray(Arrays.asList(andCriterion)))); + return new Filter().setOr(orCriteria); + } + @Nonnull public static ConjunctiveCriterion add( @Nonnull ConjunctiveCriterion conjunctiveCriterion, @Nonnull Criterion element) { diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java b/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java index 80a11ab98bbf4a..3c623f8df7c1bf 100644 --- a/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java +++ b/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java @@ -749,6 +749,14 @@ public class PoliciesConfig { EDIT_ENTITY_TAGS_PRIVILEGE, EDIT_ENTITY_GLOSSARY_TERMS_PRIVILEGE)); + // Version Set privileges + public static final ResourcePrivileges VERSION_SET_PRIVILEGES = + ResourcePrivileges.of( + "versionSet", + "Version Set", + "A logical collection of versioned entities.", + COMMON_ENTITY_PRIVILEGES); + public static final List ENTITY_RESOURCE_PRIVILEGES = ImmutableList.of( DATASET_PRIVILEGES, @@ -767,7 +775,8 @@ public class PoliciesConfig { DATA_PRODUCT_PRIVILEGES, ER_MODEL_RELATIONSHIP_PRIVILEGES, BUSINESS_ATTRIBUTE_PRIVILEGES, - STRUCTURED_PROPERTIES_PRIVILEGES); + STRUCTURED_PROPERTIES_PRIVILEGES, + VERSION_SET_PRIVILEGES); // Merge all entity specific resource privileges to create a superset of all resource privileges public static final ResourcePrivileges ALL_RESOURCE_PRIVILEGES = diff --git a/smoke-test/tests/entity_versioning/__init__.py b/smoke-test/tests/entity_versioning/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/smoke-test/tests/entity_versioning/test_versioning.py b/smoke-test/tests/entity_versioning/test_versioning.py new file mode 100644 index 00000000000000..c331cc5305a336 --- /dev/null +++ b/smoke-test/tests/entity_versioning/test_versioning.py @@ -0,0 +1,64 @@ +import pytest + + +@pytest.fixture(scope="module", autouse=True) +def test_link_unlink_version(auth_session): + """Fixture to execute setup before and tear down after all tests are run""" + res_data = link_version(auth_session) + + assert res_data + assert res_data["data"] + assert res_data["data"]["linkAssetVersion"] + assert ( + res_data["data"]["linkAssetVersion"] + == "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD)" + ) + + res_data = unlink_version(auth_session) + + assert res_data + assert res_data["data"] + assert res_data["data"]["unlinkAssetVersion"] + + +def link_version(auth_session): + json = { + "mutation": """mutation linkAssetVersion($input: LinkVersionInput!) {\n + linkAssetVersion(input: $input) + }\n + }""", + "variables": { + "input": { + "version": "1233456", + "versionSet": "urn:li:versionSet:(12345678910,dataset)", + "linkedEntity": "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD)", + } + }, + } + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=json + ) + response.raise_for_status() + + return response.json() + + +def unlink_version(auth_session): + json = { + "mutation": """mutation unlinkAssetVersion($input: UnlinkVersionInput!) {\n + unlinkAssetVersion(input: $input) + }\n + }""", + "variables": { + "input": { + "versionSet": "urn:li:versionSet:(12345678910,dataset)", + "unlinkedEntity": "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD)", + } + }, + } + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=json + ) + response.raise_for_status() + + return response.json() diff --git a/test-models/build.gradle b/test-models/build.gradle index e8733f0525870b..89bf4ec445440d 100644 --- a/test-models/build.gradle +++ b/test-models/build.gradle @@ -18,3 +18,4 @@ idea { } sourceSets.mainGeneratedDataTemplate.java.srcDirs('src/main/javaPegasus/') +spotlessJava.dependsOn generateTestDataTemplate \ No newline at end of file From b252f782c56327175a1a0bddc95b5c417db285a1 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Tue, 14 Jan 2025 17:04:49 -0800 Subject: [PATCH 6/8] feat(build): use remote gradle cache (#12344) --- settings.gradle | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/settings.gradle b/settings.gradle index 77d0706549a439..437a353f210ac4 100644 --- a/settings.gradle +++ b/settings.gradle @@ -79,6 +79,20 @@ include ':metadata-service:openapi-servlet:models' include ':metadata-integration:java:datahub-schematron:lib' include ':metadata-integration:java:datahub-schematron:cli' +buildCache { + def depotSecret = System.getenv('DEPOT_TOKEN'); + + remote(HttpBuildCache) { + url = 'https://cache.depot.dev' + enabled = depotSecret != null + push = true + credentials { + username = '' + password = depotSecret + } + } +} + def installPreCommitHooks() { def preCommitInstalled = false try { @@ -116,7 +130,7 @@ def installPreCommitHooks() { def stderr = new StringBuilder() installHooksProcess.waitForProcessOutput(stdout, stderr) if (installHooksProcess.exitValue() != 0) { - println "Failed to install hooks: ${stderr}" + println "Failed to install hooks: ${stdout}" return } println "Hooks output: ${stdout}" From a0575329848d65eafb455a3f400e8f47bc7e9bb7 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Tue, 14 Jan 2025 19:35:36 -0600 Subject: [PATCH 7/8] feat(docker-profiles): version mixing & docs (#12342) --- docker/build.gradle | 6 +----- docker/profiles/README.md | 28 +++++++++++++++++++++++++- docker/profiles/docker-compose.gms.yml | 16 +++++++-------- 3 files changed, 36 insertions(+), 14 deletions(-) diff --git a/docker/build.gradle b/docker/build.gradle index 576e47a53e6ef5..0070d814286cf0 100644 --- a/docker/build.gradle +++ b/docker/build.gradle @@ -42,7 +42,6 @@ ext { modules: python_services_modules + backend_profile_modules + [':datahub-frontend'], isDebug: true ], - 'quickstartDebugConsumers': [ profile: 'debug-consumers', modules: python_services_modules + backend_profile_modules + [':datahub-frontend', @@ -50,7 +49,6 @@ ext { ':metadata-jobs:mae-consumer-job'], isDebug: true ], - 'quickstartPg': [ profile: 'quickstart-postgres', modules: (backend_profile_modules - [':docker:mysql-setup']) + [ @@ -108,9 +106,7 @@ dockerCompose { } // Common environment variables - environment.put 'DATAHUB_VERSION', config.isDebug ? - System.getenv("DATAHUB_VERSION") ?: "v${version}" : - "v${version}" + environment.put 'DATAHUB_VERSION', System.getenv("DATAHUB_VERSION") ?: "v${version}" environment.put 'DATAHUB_TELEMETRY_ENABLED', 'false' environment.put "METADATA_TESTS_ENABLED", "true" environment.put "DATAHUB_REPO", "${docker_registry}" diff --git a/docker/profiles/README.md b/docker/profiles/README.md index fb3c9e3c84a7a2..192fde3130a895 100644 --- a/docker/profiles/README.md +++ b/docker/profiles/README.md @@ -101,4 +101,30 @@ Runs everything except for the GMS. Useful for running just a local (non-docker) | debug-cassandra | | | X | | X | X | X | X | | | X | X | | | debug-consumers | X | | | | X | X | X | X | X | X | X | X | | | debug-neo4j | X | | | X | X | X | X | X | | | X | X | | -| debug-elasticsearch | X | | | | X | X | X | X | | | X | | X | \ No newline at end of file +| debug-elasticsearch | X | | | | X | X | X | X | | | X | | X | + +## Advanced Setups + +### Version Mixing + +In some cases, it might be useful to debug upgrade scenarios where there are intentional version miss-matches. It is possible +to override individual component versions. + +Note: This only works for `non-debug` profiles because of the file mounts when in `debug` which would run older containers +but still pickup the latest application jars. + +In this example we are interested in upgrading two components (the `mae-consumer` and the `mce-consumer`) to a fresh build `v0.15.1-SNAPSHOT` +while maintaining older components on `v0.14.1` (especially the `system-update` container). + +This configuration reproduces the situation where the consumers were upgraded prior to running the latest version of `system-update`. In this +scenario we expect the consumers to block their startup waiting for the successful completion of a newer `system-update`. + +`DATAHUB_VERSION` - specifies the default component version of `v0.14.1` +`DATAHUB_MAE_VERSION` - specifies an override of just the `mae-consumer` to version `v0.15.1-SNAPSHOT`[1] +`DATAHUB_MCE_VERSION` - specifies an override of just the `mce-consumer` to version `v0.15.1-SNAPSHOT`[1] + +```shell + DATAHUB_MAE_VERSION="v0.15.1-SNAPSHOT" DATAHUB_MCE_VERSION="v0.15.1-SNAPSHOT" DATAHUB_VERSION="v0.14.1" ./gradlew quickstart +``` + +[1] Image versions were `v0.15.1-SNAPSHOT` built locally prior to running the command. diff --git a/docker/profiles/docker-compose.gms.yml b/docker/profiles/docker-compose.gms.yml index ada7df51e20bef..2147d6b5a0247f 100644 --- a/docker/profiles/docker-compose.gms.yml +++ b/docker/profiles/docker-compose.gms.yml @@ -54,7 +54,7 @@ x-datahub-dev-telemetry-env: &datahub-dev-telemetry-env ################################# x-datahub-system-update-service: &datahub-system-update-service hostname: datahub-system-update - image: ${DATAHUB_UPGRADE_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-upgrade}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_UPGRADE_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-upgrade}:${DATAHUB_UPDATE_VERSION:-${DATAHUB_VERSION:-head}} command: - -u - SystemUpdate @@ -73,7 +73,7 @@ x-datahub-system-update-service: &datahub-system-update-service x-datahub-system-update-service-dev: &datahub-system-update-service-dev <<: *datahub-system-update-service - image: ${DATAHUB_UPGRADE_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-upgrade}:${DATAHUB_VERSION:-debug} + image: ${DATAHUB_UPGRADE_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-upgrade}:${DATAHUB_UPDATE_VERSION:-${DATAHUB_VERSION:-debug}} ports: - ${DATAHUB_MAPPED_UPGRADE_DEBUG_PORT:-5003}:5003 environment: &datahub-system-update-dev-env @@ -92,7 +92,7 @@ x-datahub-system-update-service-dev: &datahub-system-update-service-dev ################################# x-datahub-gms-service: &datahub-gms-service hostname: datahub-gms - image: ${DATAHUB_GMS_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-gms}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_GMS_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-gms}:${DATAHUB_GMS_VERSION:-${DATAHUB_VERSION:-head}} ports: - ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080 env_file: @@ -118,7 +118,7 @@ x-datahub-gms-service: &datahub-gms-service x-datahub-gms-service-dev: &datahub-gms-service-dev <<: *datahub-gms-service - image: ${DATAHUB_GMS_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-gms}:${DATAHUB_VERSION:-debug} + image: ${DATAHUB_GMS_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-gms}:${DATAHUB_GMS_VERSION:-${DATAHUB_VERSION:-debug}} ports: - ${DATAHUB_MAPPED_GMS_DEBUG_PORT:-5001}:5001 - ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080 @@ -150,7 +150,7 @@ x-datahub-gms-service-dev: &datahub-gms-service-dev ################################# x-datahub-mae-consumer-service: &datahub-mae-consumer-service hostname: datahub-mae-consumer - image: ${DATAHUB_MAE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mae-consumer}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_MAE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mae-consumer}:${DATAHUB_MAE_VERSION:-${DATAHUB_VERSION:-head}} ports: - 9091:9091 env_file: @@ -163,7 +163,7 @@ x-datahub-mae-consumer-service: &datahub-mae-consumer-service x-datahub-mae-consumer-service-dev: &datahub-mae-consumer-service-dev <<: *datahub-mae-consumer-service - image: ${DATAHUB_MAE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mae-consumer}:${DATAHUB_VERSION:-debug} + image: ${DATAHUB_MAE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mae-consumer}:${DATAHUB_MAE_VERSION:-${DATAHUB_VERSION:-debug}} environment: <<: [*datahub-dev-telemetry-env, *datahub-mae-consumer-env] ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true} @@ -178,7 +178,7 @@ x-datahub-mae-consumer-service-dev: &datahub-mae-consumer-service-dev ################################# x-datahub-mce-consumer-service: &datahub-mce-consumer-service hostname: datahub-mce-consumer - image: ${DATAHUB_MCE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mce-consumer}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_MCE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mce-consumer}:${DATAHUB_MCE_VERSION:-${DATAHUB_VERSION:-head}} ports: - 9090:9090 env_file: @@ -193,7 +193,7 @@ x-datahub-mce-consumer-service: &datahub-mce-consumer-service x-datahub-mce-consumer-service-dev: &datahub-mce-consumer-service-dev <<: *datahub-mce-consumer-service - image: ${DATAHUB_MCE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mce-consumer}:${DATAHUB_VERSION:-debug} + image: ${DATAHUB_MCE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mce-consumer}:${DATAHUB_MCE_VERSION:-${DATAHUB_VERSION:-debug}} environment: <<: [*datahub-dev-telemetry-env, *datahub-mce-consumer-env] ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true} From 3905c8ee4146c93a06653dbcd690775ae36bef0f Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Tue, 14 Jan 2025 19:36:02 -0600 Subject: [PATCH 8/8] docs(async-api): addition to known issues (#12339) --- docs/how/updating-datahub.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index 68b41c907c6ad6..eb5a792216d981 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -1,3 +1,8 @@ +# Known Issues + +- Async APIs - DataHub's asynchronous APIs perform only basic schema validation when receiving MCP requests, similar to direct production to MCP Kafka topics. While requests must conform to the MCP schema to be accepted, actual processing happens later in the pipeline. Any processing failures that occur after the initial acceptance are captured in the Failed MCP topic, but these failures are not immediately surfaced to the API caller since they happen asynchronously. + + # Updating DataHub