From e1d57e3f213bef8ba863426c0fbd2de098f6c6f6 Mon Sep 17 00:00:00 2001 From: kevinkarchacryl Date: Tue, 14 Jan 2025 15:53:51 -0500 Subject: [PATCH 01/48] Super type dbt redshift (#12337) --- .../ingestion/source/redshift/redshift.py | 1 + .../datahub/ingestion/source/sql/sql_types.py | 2 +- .../tests/unit/test_dbt_source.py | 34 ++++++++++++++++++- 3 files changed, 35 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py index 5371017a2a3212..9bfca941ce48fb 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py @@ -276,6 +276,7 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource): "HLLSKETCH": NullType, "TIMETZ": TimeType, "VARBYTE": StringType, + "SUPER": NullType, } def get_platform_instance_id(self) -> str: diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py index 9ec73a9af96dc5..1acf962d7c4750 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py @@ -93,7 +93,7 @@ "regtype": None, "regrole": None, "regnamespace": None, - "super": None, + "super": NullType, "uuid": StringType, "pg_lsn": None, "tsvector": None, # text search vector diff --git a/metadata-ingestion/tests/unit/test_dbt_source.py b/metadata-ingestion/tests/unit/test_dbt_source.py index 0a869297837014..ff22ffedc6228f 100644 --- a/metadata-ingestion/tests/unit/test_dbt_source.py +++ b/metadata-ingestion/tests/unit/test_dbt_source.py @@ -9,7 +9,12 @@ from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.source.dbt import dbt_cloud from datahub.ingestion.source.dbt.dbt_cloud import DBTCloudConfig -from datahub.ingestion.source.dbt.dbt_common import DBTNode +from datahub.ingestion.source.dbt.dbt_common import ( + DBTNode, + DBTSourceReport, + NullTypeClass, + get_column_type, +) from datahub.ingestion.source.dbt.dbt_core import ( DBTCoreConfig, DBTCoreSource, @@ -461,3 +466,30 @@ def test_dbt_time_parsing() -> None: assert timestamp.tzinfo is not None and timestamp.tzinfo.utcoffset( timestamp ) == timedelta(0) + + +def test_get_column_type_redshift(): + report = DBTSourceReport() + dataset_name = "test_dataset" + + # Test 'super' type which should not show any warnings/errors + result_super = get_column_type(report, dataset_name, "super", "redshift") + assert isinstance(result_super.type, NullTypeClass) + assert ( + len(report.infos) == 0 + ), "No warnings should be generated for known SUPER type" + + # Test unknown type, which generates a warning but resolves to NullTypeClass + unknown_type = "unknown_type" + result_unknown = get_column_type(report, dataset_name, unknown_type, "redshift") + assert isinstance(result_unknown.type, NullTypeClass) + + # exact warning message for an unknown type + expected_context = f"{dataset_name} - {unknown_type}" + messages = [info for info in report.infos if expected_context in str(info.context)] + assert len(messages) == 1 + assert messages[0].title == "Unable to map column types to DataHub types" + assert ( + messages[0].message + == "Got an unexpected column type. The column's parsed field type will not be populated." + ) From 90fe5b6cb71a953042ebba33af6d6431e6ae0046 Mon Sep 17 00:00:00 2001 From: ryota-cloud Date: Tue, 14 Jan 2025 14:26:59 -0800 Subject: [PATCH 02/48] fix(docker) add new gradle profile for consumer debug purpose (#12338) --- docker/build.gradle | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docker/build.gradle b/docker/build.gradle index 7b36c0d9acdcf0..576e47a53e6ef5 100644 --- a/docker/build.gradle +++ b/docker/build.gradle @@ -42,6 +42,15 @@ ext { modules: python_services_modules + backend_profile_modules + [':datahub-frontend'], isDebug: true ], + + 'quickstartDebugConsumers': [ + profile: 'debug-consumers', + modules: python_services_modules + backend_profile_modules + [':datahub-frontend', + ':metadata-jobs:mce-consumer-job', + ':metadata-jobs:mae-consumer-job'], + isDebug: true + ], + 'quickstartPg': [ profile: 'quickstart-postgres', modules: (backend_profile_modules - [':docker:mysql-setup']) + [ From 94b9da0bd8d9c04a5566a3c731f2f5418fc3eb0a Mon Sep 17 00:00:00 2001 From: RyanHolstien Date: Tue, 14 Jan 2025 17:28:34 -0600 Subject: [PATCH 03/48] feat(entityVersioning): initial implementation (#12166) --- .../datahub/graphql/GmsGraphQLEngine.java | 15 + .../datahub/graphql/GmsGraphQLEngineArgs.java | 2 + .../versioning/LinkAssetVersionResolver.java | 88 +++ .../UnlinkAssetVersionResolver.java | 67 ++ .../src/main/resources/entity.graphql | 60 ++ .../LinkAssetVersionResolverTest.java | 102 +++ .../UnlinkAssetVersionResolverTest.java | 123 ++++ docker/profiles/docker-compose.gms.yml | 8 + .../metadata/aspect/AspectRetriever.java | 16 +- .../aspect/CachingAspectRetriever.java | 6 + .../linkedin/metadata/aspect/ReadItem.java | 1 + .../patch/template/AspectTemplateEngine.java | 20 +- .../common/VersionPropertiesTemplate.java | 44 ++ .../metadata/entity/SearchRetriever.java | 52 +- .../registry/SnapshotEntityRegistry.java | 2 + .../metadata/aspect/MockAspectRetriever.java | 19 +- .../java/com/linkedin/metadata/Constants.java | 13 + metadata-io/build.gradle | 1 + .../client/EntityClientAspectRetriever.java | 19 + .../entity/EntityServiceAspectRetriever.java | 12 + .../AlphanumericSortIdGenerator.java | 70 ++ .../EntityVersioningServiceImpl.java | 356 +++++++++++ .../sideeffects/VersionSetSideEffect.java | 137 ++++ .../VersionPropertiesValidator.java | 158 +++++ .../VersionSetPropertiesValidator.java | 80 +++ .../search/SearchServiceSearchRetriever.java | 31 +- .../SearchDocumentTransformer.java | 2 +- .../metadata/search/utils/ESUtils.java | 36 ++ .../service/UpdateIndicesService.java | 3 +- .../AlphanumericSortIdGeneratorTest.java | 62 ++ .../EntityVersioningServiceTest.java | 603 ++++++++++++++++++ .../sideeffects/VersionSetSideEffectTest.java | 229 +++++++ .../VersionPropertiesValidatorTest.java | 165 +++++ .../VersionSetPropertiesValidatorTest.java | 139 ++++ .../AutocompleteRequestHandlerTest.java | 156 +++++ .../request/SearchRequestHandlerTest.java | 244 ++++++- .../SearchDocumentTransformerTest.java | 19 + .../com/linkedin/common/VersionProperties.pdl | 77 +++ .../com/linkedin/common/VersionTag.pdl | 1 + .../linkedin/metadata/key/VersionSetKey.pdl | 20 + .../linkedin/metadata/query/SearchFlags.pdl | 5 + .../versionset/VersionSetProperties.pdl | 24 + .../src/main/resources/entity-registry.yml | 7 + .../graphql/featureflags/FeatureFlags.java | 1 + .../src/main/resources/application.yaml | 1 + .../EntityVersioningServiceFactory.java | 21 + .../factory/graphql/GraphQLEngineFactory.java | 5 +- .../SpringStandardPluginConfiguration.java | 67 ++ .../delegates/DatahubUsageEventsImplTest.java | 4 + .../delegates/EntityApiDelegateImplTest.java | 3 + .../GlobalControllerExceptionHandler.java | 24 + .../openapi/config/SpringWebConfig.java | 7 +- .../openapi/v3/OpenAPIV3Generator.java | 243 +++++-- .../v3/controller/EntityController.java | 124 +++- .../openapi/v3/OpenAPIV3GeneratorTest.java | 6 +- .../v3/controller/EntityControllerTest.java | 218 ++++++- .../com.linkedin.entity.aspects.snapshot.json | 4 + ...com.linkedin.entity.entities.snapshot.json | 10 + .../com.linkedin.entity.runs.snapshot.json | 4 + ...nkedin.operations.operations.snapshot.json | 4 + ...m.linkedin.platform.platform.snapshot.json | 4 + .../versioning/EntityVersioningService.java | 36 ++ .../versioning/VersionPropertiesInput.java | 20 + .../metadata/search/utils/QueryUtils.java | 8 + .../authorization/PoliciesConfig.java | 11 +- .../tests/entity_versioning/__init__.py | 0 .../entity_versioning/test_versioning.py | 64 ++ test-models/build.gradle | 1 + 68 files changed, 4063 insertions(+), 121 deletions(-) create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolver.java create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolver.java create mode 100644 datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolverTest.java create mode 100644 datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolverTest.java create mode 100644 entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/common/VersionPropertiesTemplate.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/AlphanumericSortIdGenerator.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/EntityVersioningServiceImpl.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/sideeffects/VersionSetSideEffect.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/validation/VersionPropertiesValidator.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/validation/VersionSetPropertiesValidator.java create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/AlphanumericSortIdGeneratorTest.java create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/EntityVersioningServiceTest.java create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/sideeffects/VersionSetSideEffectTest.java create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/validation/VersionPropertiesValidatorTest.java create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/validation/VersionSetPropertiesValidatorTest.java create mode 100644 metadata-models/src/main/pegasus/com/linkedin/common/VersionProperties.pdl rename {li-utils => metadata-models}/src/main/pegasus/com/linkedin/common/VersionTag.pdl (78%) create mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/key/VersionSetKey.pdl create mode 100644 metadata-models/src/main/pegasus/com/linkedin/versionset/VersionSetProperties.pdl create mode 100644 metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/versioning/EntityVersioningServiceFactory.java create mode 100644 metadata-service/services/src/main/java/com/linkedin/metadata/entity/versioning/EntityVersioningService.java create mode 100644 metadata-service/services/src/main/java/com/linkedin/metadata/entity/versioning/VersionPropertiesInput.java create mode 100644 smoke-test/tests/entity_versioning/__init__.py create mode 100644 smoke-test/tests/entity_versioning/test_versioning.py diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java index 3c46c1a8dce35c..b15db80a8487ae 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java @@ -174,6 +174,8 @@ import com.linkedin.datahub.graphql.resolvers.embed.UpdateEmbedResolver; import com.linkedin.datahub.graphql.resolvers.entity.EntityExistsResolver; import com.linkedin.datahub.graphql.resolvers.entity.EntityPrivilegesResolver; +import com.linkedin.datahub.graphql.resolvers.entity.versioning.LinkAssetVersionResolver; +import com.linkedin.datahub.graphql.resolvers.entity.versioning.UnlinkAssetVersionResolver; import com.linkedin.datahub.graphql.resolvers.form.BatchAssignFormResolver; import com.linkedin.datahub.graphql.resolvers.form.BatchRemoveFormResolver; import com.linkedin.datahub.graphql.resolvers.form.CreateDynamicFormAssignmentResolver; @@ -391,6 +393,7 @@ import com.linkedin.metadata.config.telemetry.TelemetryConfiguration; import com.linkedin.metadata.connection.ConnectionService; import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.versioning.EntityVersioningService; import com.linkedin.metadata.graph.GraphClient; import com.linkedin.metadata.graph.SiblingGraphService; import com.linkedin.metadata.models.registry.EntityRegistry; @@ -476,6 +479,7 @@ public class GmsGraphQLEngine { private final RestrictedService restrictedService; private ConnectionService connectionService; private AssertionService assertionService; + private final EntityVersioningService entityVersioningService; private final BusinessAttributeService businessAttributeService; private final FeatureFlags featureFlags; @@ -599,6 +603,7 @@ public GmsGraphQLEngine(final GmsGraphQLEngineArgs args) { this.restrictedService = args.restrictedService; this.connectionService = args.connectionService; this.assertionService = args.assertionService; + this.entityVersioningService = args.entityVersioningService; this.businessAttributeService = args.businessAttributeService; this.ingestionConfiguration = Objects.requireNonNull(args.ingestionConfiguration); @@ -1392,6 +1397,16 @@ private void configureMutationResolvers(final RuntimeWiring.Builder builder) { "removeBusinessAttribute", new RemoveBusinessAttributeResolver(this.entityService)); } + if (featureFlags.isEntityVersioning()) { + typeWiring + .dataFetcher( + "linkAssetVersion", + new LinkAssetVersionResolver(this.entityVersioningService, this.featureFlags)) + .dataFetcher( + "unlinkAssetVersion", + new UnlinkAssetVersionResolver( + this.entityVersioningService, this.featureFlags)); + } return typeWiring; }); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngineArgs.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngineArgs.java index f6ab3a603dbb7b..131f4e87637807 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngineArgs.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngineArgs.java @@ -21,6 +21,7 @@ import com.linkedin.metadata.config.telemetry.TelemetryConfiguration; import com.linkedin.metadata.connection.ConnectionService; import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.versioning.EntityVersioningService; import com.linkedin.metadata.graph.GraphClient; import com.linkedin.metadata.graph.SiblingGraphService; import com.linkedin.metadata.models.registry.EntityRegistry; @@ -88,6 +89,7 @@ public class GmsGraphQLEngineArgs { BusinessAttributeService businessAttributeService; ConnectionService connectionService; AssertionService assertionService; + EntityVersioningService entityVersioningService; // any fork specific args should go below this line } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolver.java new file mode 100644 index 00000000000000..69e049af1e87b7 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolver.java @@ -0,0 +1,88 @@ +package com.linkedin.datahub.graphql.resolvers.entity.versioning; + +import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.bindArgument; +import static com.linkedin.metadata.Constants.VERSION_SET_ENTITY_NAME; +import static com.linkedin.metadata.authorization.ApiOperation.UPDATE; + +import com.datahub.authorization.AuthUtil; +import com.google.common.collect.ImmutableSet; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; +import com.linkedin.datahub.graphql.exception.AuthorizationException; +import com.linkedin.datahub.graphql.featureflags.FeatureFlags; +import com.linkedin.datahub.graphql.generated.LinkVersionInput; +import com.linkedin.metadata.entity.IngestResult; +import com.linkedin.metadata.entity.versioning.EntityVersioningService; +import com.linkedin.metadata.entity.versioning.VersionPropertiesInput; +import graphql.schema.DataFetcher; +import graphql.schema.DataFetchingEnvironment; +import io.datahubproject.metadata.context.OperationContext; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import org.apache.commons.lang.StringUtils; + +/** + * Currently only supports linking the latest version, but may be modified later to support inserts + */ +public class LinkAssetVersionResolver implements DataFetcher> { + + private final EntityVersioningService entityVersioningService; + private final FeatureFlags featureFlags; + + public LinkAssetVersionResolver( + EntityVersioningService entityVersioningService, FeatureFlags featureFlags) { + this.entityVersioningService = entityVersioningService; + this.featureFlags = featureFlags; + } + + @Override + public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { + final QueryContext context = environment.getContext(); + final LinkVersionInput input = + bindArgument(environment.getArgument("input"), LinkVersionInput.class); + if (!featureFlags.isEntityVersioning()) { + throw new IllegalAccessError( + "Entity Versioning is not configured, please enable before attempting to use this feature."); + } + Urn versionSetUrn = UrnUtils.getUrn(input.getVersionSet()); + if (!VERSION_SET_ENTITY_NAME.equals(versionSetUrn.getEntityType())) { + throw new IllegalArgumentException( + String.format("Version Set urn %s must be of type Version Set.", input.getVersionSet())); + } + Urn entityUrn = UrnUtils.getUrn(input.getLinkedEntity()); + OperationContext opContext = context.getOperationContext(); + if (!AuthUtil.isAPIAuthorizedEntityUrns( + opContext, UPDATE, ImmutableSet.of(versionSetUrn, entityUrn))) { + throw new AuthorizationException( + String.format( + "%s is unauthorized to %s entities %s and %s", + opContext.getAuthentication().getActor().toUrnStr(), + UPDATE, + input.getVersionSet(), + input.getLinkedEntity())); + } + VersionPropertiesInput versionPropertiesInput = + new VersionPropertiesInput( + input.getComment(), + input.getVersion(), + input.getSourceTimestamp(), + input.getSourceCreator()); + return GraphQLConcurrencyUtils.supplyAsync( + () -> { + List linkResults = + entityVersioningService.linkLatestVersion( + opContext, versionSetUrn, entityUrn, versionPropertiesInput); + + return linkResults.stream() + .filter( + ingestResult -> input.getLinkedEntity().equals(ingestResult.getUrn().toString())) + .map(ingestResult -> ingestResult.getUrn().toString()) + .findAny() + .orElse(StringUtils.EMPTY); + }, + this.getClass().getSimpleName(), + "get"); + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolver.java new file mode 100644 index 00000000000000..3d5027a0d668ac --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolver.java @@ -0,0 +1,67 @@ +package com.linkedin.datahub.graphql.resolvers.entity.versioning; + +import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.bindArgument; +import static com.linkedin.metadata.Constants.VERSION_SET_ENTITY_NAME; +import static com.linkedin.metadata.authorization.ApiOperation.UPDATE; + +import com.datahub.authorization.AuthUtil; +import com.google.common.collect.ImmutableSet; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; +import com.linkedin.datahub.graphql.exception.AuthorizationException; +import com.linkedin.datahub.graphql.featureflags.FeatureFlags; +import com.linkedin.datahub.graphql.generated.UnlinkVersionInput; +import com.linkedin.metadata.entity.versioning.EntityVersioningService; +import graphql.schema.DataFetcher; +import graphql.schema.DataFetchingEnvironment; +import io.datahubproject.metadata.context.OperationContext; +import java.util.concurrent.CompletableFuture; + +public class UnlinkAssetVersionResolver implements DataFetcher> { + + private final EntityVersioningService entityVersioningService; + private final FeatureFlags featureFlags; + + public UnlinkAssetVersionResolver( + EntityVersioningService entityVersioningService, FeatureFlags featureFlags) { + this.entityVersioningService = entityVersioningService; + this.featureFlags = featureFlags; + } + + @Override + public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { + if (!featureFlags.isEntityVersioning()) { + throw new IllegalAccessError( + "Entity Versioning is not configured, please enable before attempting to use this feature."); + } + final QueryContext context = environment.getContext(); + final UnlinkVersionInput input = + bindArgument(environment.getArgument("input"), UnlinkVersionInput.class); + Urn versionSetUrn = UrnUtils.getUrn(input.getVersionSet()); + if (!VERSION_SET_ENTITY_NAME.equals(versionSetUrn.getEntityType())) { + throw new IllegalArgumentException( + String.format("Version Set urn %s must be of type Version Set.", input.getVersionSet())); + } + Urn entityUrn = UrnUtils.getUrn(input.getUnlinkedEntity()); + OperationContext opContext = context.getOperationContext(); + if (!AuthUtil.isAPIAuthorizedEntityUrns( + opContext, UPDATE, ImmutableSet.of(versionSetUrn, entityUrn))) { + throw new AuthorizationException( + String.format( + "%s is unauthorized to %s entities %s and %s", + opContext.getAuthentication().getActor(), + UPDATE, + input.getVersionSet(), + input.getUnlinkedEntity())); + } + return GraphQLConcurrencyUtils.supplyAsync( + () -> { + entityVersioningService.unlinkVersion(opContext, versionSetUrn, entityUrn); + return true; + }, + this.getClass().getSimpleName(), + "get"); + } +} diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql index 9dd1948e18e042..b47be7ae32b2c4 100644 --- a/datahub-graphql-core/src/main/resources/entity.graphql +++ b/datahub-graphql-core/src/main/resources/entity.graphql @@ -956,6 +956,16 @@ type Mutation { Remove Business Attribute """ removeBusinessAttribute(input: AddBusinessAttributeInput!): Boolean + + """ + Link the latest versioned entity to a Version Set + """ + linkAssetVersion(input: LinkVersionInput!): String + + """ + Unlink a versioned entity from a Version Set + """ + unlinkAssetVersion(input: UnlinkVersionInput!): Boolean } """ @@ -12911,6 +12921,56 @@ input ListBusinessAttributesInput { query: String } +""" +Input for linking a versioned entity to a Version Set +""" +input LinkVersionInput { + """ + The target version set + """ + versionSet: String! + + """ + The target versioned entity to link + """ + linkedEntity: String! + + """ + Version Tag label for the version, should be unique within a Version Set + """ + version: String! + + """ + Optional timestamp from the source system + """ + sourceTimestamp: Long + + """ + Optional creator from the source system, will be converted to an Urn + """ + sourceCreator: String + + """ + Optional comment about the version + """ + comment: String +} + +""" +Input for unlinking a versioned entity from a Version Set +""" +input UnlinkVersionInput { + """ + The target version set + """ + versionSet: String + + """ + The target versioned entity to unlink + """ + unlinkedEntity: String +} + """ The result obtained when listing Business Attribute """ diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolverTest.java new file mode 100644 index 00000000000000..690856263fccc5 --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolverTest.java @@ -0,0 +1,102 @@ +package com.linkedin.datahub.graphql.resolvers.entity.versioning; + +import static com.linkedin.datahub.graphql.TestUtils.*; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.testng.Assert.*; + +import com.google.common.collect.ImmutableList; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.featureflags.FeatureFlags; +import com.linkedin.datahub.graphql.generated.LinkVersionInput; +import com.linkedin.metadata.entity.IngestResult; +import com.linkedin.metadata.entity.versioning.EntityVersioningService; +import com.linkedin.metadata.entity.versioning.VersionPropertiesInput; +import graphql.schema.DataFetchingEnvironment; +import org.mockito.Mockito; +import org.testng.annotations.Test; + +public class LinkAssetVersionResolverTest { + + private static final String TEST_VERSION_SET_URN = "urn:li:versionSet:test-version-set"; + private static final String TEST_ENTITY_URN = + "urn:li:dataset:(urn:li:dataPlatform:mysql,my-test,PROD)"; + + @Test + public void testGetSuccessful() throws Exception { + EntityVersioningService mockService = Mockito.mock(EntityVersioningService.class); + FeatureFlags mockFlags = Mockito.mock(FeatureFlags.class); + + Mockito.when(mockFlags.isEntityVersioning()).thenReturn(true); + + IngestResult mockResult = + IngestResult.builder().urn(Urn.createFromString(TEST_ENTITY_URN)).build(); + + Mockito.when( + mockService.linkLatestVersion( + any(), + eq(UrnUtils.getUrn(TEST_VERSION_SET_URN)), + eq(UrnUtils.getUrn(TEST_ENTITY_URN)), + any(VersionPropertiesInput.class))) + .thenReturn(ImmutableList.of(mockResult)); + + LinkAssetVersionResolver resolver = new LinkAssetVersionResolver(mockService, mockFlags); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + LinkVersionInput input = new LinkVersionInput(); + input.setVersionSet(TEST_VERSION_SET_URN); + input.setLinkedEntity(TEST_ENTITY_URN); + input.setComment("Test comment"); + input.setVersion("v1"); + + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + + String result = resolver.get(mockEnv).get(); + assertEquals(result, TEST_ENTITY_URN); + } + + @Test + public void testGetFeatureFlagDisabled() throws Exception { + EntityVersioningService mockService = Mockito.mock(EntityVersioningService.class); + FeatureFlags mockFlags = Mockito.mock(FeatureFlags.class); + + Mockito.when(mockFlags.isEntityVersioning()).thenReturn(false); + + LinkAssetVersionResolver resolver = new LinkAssetVersionResolver(mockService, mockFlags); + + // Execute resolver + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + LinkVersionInput input = new LinkVersionInput(); + input.setVersionSet(TEST_VERSION_SET_URN); + input.setLinkedEntity(TEST_ENTITY_URN); + + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + + assertThrows(IllegalAccessError.class, () -> resolver.get(mockEnv)); + } + + @Test + public void testGetInvalidVersionSetUrn() throws Exception { + EntityVersioningService mockService = Mockito.mock(EntityVersioningService.class); + FeatureFlags mockFlags = Mockito.mock(FeatureFlags.class); + + Mockito.when(mockFlags.isEntityVersioning()).thenReturn(true); + + LinkAssetVersionResolver resolver = new LinkAssetVersionResolver(mockService, mockFlags); + + // Execute resolver + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + LinkVersionInput input = new LinkVersionInput(); + input.setVersionSet("urn:li:dataset:invalid-version-set"); // Invalid URN type + input.setLinkedEntity(TEST_ENTITY_URN); + + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + + assertThrows(IllegalArgumentException.class, () -> resolver.get(mockEnv)); + } +} diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolverTest.java new file mode 100644 index 00000000000000..0000ad24a04537 --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolverTest.java @@ -0,0 +1,123 @@ +package com.linkedin.datahub.graphql.resolvers.entity.versioning; + +import static com.linkedin.datahub.graphql.TestUtils.*; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.testng.Assert.*; + +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.featureflags.FeatureFlags; +import com.linkedin.datahub.graphql.generated.UnlinkVersionInput; +import com.linkedin.metadata.entity.versioning.EntityVersioningService; +import graphql.schema.DataFetchingEnvironment; +import java.util.Collections; +import java.util.concurrent.CompletionException; +import org.mockito.Mockito; +import org.testng.annotations.Test; + +public class UnlinkAssetVersionResolverTest { + + private static final String TEST_VERSION_SET_URN = "urn:li:versionSet:test-version-set"; + private static final String TEST_ENTITY_URN = + "urn:li:dataset:(urn:li:dataPlatform:mysql,my-test,PROD)"; + + @Test + public void testGetSuccessful() throws Exception { + EntityVersioningService mockService = Mockito.mock(EntityVersioningService.class); + FeatureFlags mockFlags = Mockito.mock(FeatureFlags.class); + + Mockito.when(mockFlags.isEntityVersioning()).thenReturn(true); + + Mockito.when( + mockService.unlinkVersion( + any(), + eq(UrnUtils.getUrn(TEST_VERSION_SET_URN)), + eq(UrnUtils.getUrn(TEST_ENTITY_URN)))) + .thenReturn(Collections.emptyList()); + + UnlinkAssetVersionResolver resolver = new UnlinkAssetVersionResolver(mockService, mockFlags); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + UnlinkVersionInput input = new UnlinkVersionInput(); + input.setVersionSet(TEST_VERSION_SET_URN); + input.setUnlinkedEntity(TEST_ENTITY_URN); + + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + + assertTrue(resolver.get(mockEnv).get()); + + Mockito.verify(mockService) + .unlinkVersion( + any(), eq(UrnUtils.getUrn(TEST_VERSION_SET_URN)), eq(UrnUtils.getUrn(TEST_ENTITY_URN))); + } + + @Test + public void testGetFeatureFlagDisabled() throws Exception { + EntityVersioningService mockService = Mockito.mock(EntityVersioningService.class); + FeatureFlags mockFlags = Mockito.mock(FeatureFlags.class); + + Mockito.when(mockFlags.isEntityVersioning()).thenReturn(false); + + UnlinkAssetVersionResolver resolver = new UnlinkAssetVersionResolver(mockService, mockFlags); + + // Execute resolver + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + UnlinkVersionInput input = new UnlinkVersionInput(); + input.setVersionSet(TEST_VERSION_SET_URN); + input.setUnlinkedEntity(TEST_ENTITY_URN); + + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + + assertThrows(IllegalAccessError.class, () -> resolver.get(mockEnv)); + } + + @Test + public void testGetInvalidVersionSetUrn() throws Exception { + EntityVersioningService mockService = Mockito.mock(EntityVersioningService.class); + FeatureFlags mockFlags = Mockito.mock(FeatureFlags.class); + + Mockito.when(mockFlags.isEntityVersioning()).thenReturn(true); + + UnlinkAssetVersionResolver resolver = new UnlinkAssetVersionResolver(mockService, mockFlags); + + // Execute resolver + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + UnlinkVersionInput input = new UnlinkVersionInput(); + input.setVersionSet("urn:li:dataset:invalid-version-set"); // Invalid URN type + input.setUnlinkedEntity(TEST_ENTITY_URN); + + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + + assertThrows(IllegalArgumentException.class, () -> resolver.get(mockEnv)); + } + + @Test + public void testGetServiceException() throws Exception { + EntityVersioningService mockService = Mockito.mock(EntityVersioningService.class); + FeatureFlags mockFlags = Mockito.mock(FeatureFlags.class); + + Mockito.when(mockFlags.isEntityVersioning()).thenReturn(true); + + Mockito.doThrow(new RuntimeException("Service error")) + .when(mockService) + .unlinkVersion(any(), any(), any()); + + UnlinkAssetVersionResolver resolver = new UnlinkAssetVersionResolver(mockService, mockFlags); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + UnlinkVersionInput input = new UnlinkVersionInput(); + input.setVersionSet(TEST_VERSION_SET_URN); + input.setUnlinkedEntity(TEST_ENTITY_URN); + + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + + assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); + } +} diff --git a/docker/profiles/docker-compose.gms.yml b/docker/profiles/docker-compose.gms.yml index 64163ef970080a..ada7df51e20bef 100644 --- a/docker/profiles/docker-compose.gms.yml +++ b/docker/profiles/docker-compose.gms.yml @@ -67,6 +67,7 @@ x-datahub-system-update-service: &datahub-system-update-service SCHEMA_REGISTRY_SYSTEM_UPDATE: ${SCHEMA_REGISTRY_SYSTEM_UPDATE:-true} SPRING_KAFKA_PROPERTIES_AUTO_REGISTER_SCHEMAS: ${SPRING_KAFKA_PROPERTIES_AUTO_REGISTER_SCHEMAS:-true} SPRING_KAFKA_PROPERTIES_USE_LATEST_VERSION: ${SPRING_KAFKA_PROPERTIES_USE_LATEST_VERSION:-true} + ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true} volumes: - ${HOME}/.datahub/plugins:/etc/datahub/plugins @@ -80,6 +81,7 @@ x-datahub-system-update-service-dev: &datahub-system-update-service-dev SKIP_ELASTICSEARCH_CHECK: false REPROCESS_DEFAULT_BROWSE_PATHS_V2: ${REPROCESS_DEFAULT_BROWSE_PATHS_V2:-false} JAVA_TOOL_OPTIONS: '-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5003' + ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true} volumes: - ../../datahub-upgrade/build/libs/:/datahub/datahub-upgrade/bin/ - ../../metadata-models/src/main/resources/:/datahub/datahub-gms/resources @@ -101,6 +103,7 @@ x-datahub-gms-service: &datahub-gms-service <<: [*primary-datastore-mysql-env, *graph-datastore-search-env, *search-datastore-env, *datahub-quickstart-telemetry-env, *kafka-env] ALTERNATE_MCP_VALIDATION: ${ALTERNATE_MCP_VALIDATION:-true} STRICT_URN_VALIDATION_ENABLED: ${STRICT_URN_VALIDATION_ENABLED:-true} + ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true} healthcheck: test: curl -sS --fail http://datahub-gms:${DATAHUB_GMS_PORT:-8080}/health start_period: 90s @@ -131,6 +134,7 @@ x-datahub-gms-service-dev: &datahub-gms-service-dev SEARCH_SERVICE_ENABLE_CACHE: false LINEAGE_SEARCH_CACHE_ENABLED: false SHOW_BROWSE_V2: true + ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true} volumes: - ./datahub-gms/start.sh:/datahub/datahub-gms/scripts/start.sh - ./datahub-gms/jetty.xml:/datahub/datahub-gms/scripts/jetty.xml @@ -155,12 +159,14 @@ x-datahub-mae-consumer-service: &datahub-mae-consumer-service - ${DATAHUB_LOCAL_MAE_ENV:-empty2.env} environment: &datahub-mae-consumer-env <<: [*primary-datastore-mysql-env, *graph-datastore-search-env, *search-datastore-env, *kafka-env] + ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true} x-datahub-mae-consumer-service-dev: &datahub-mae-consumer-service-dev <<: *datahub-mae-consumer-service image: ${DATAHUB_MAE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mae-consumer}:${DATAHUB_VERSION:-debug} environment: <<: [*datahub-dev-telemetry-env, *datahub-mae-consumer-env] + ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true} volumes: - ./datahub-mae-consumer/start.sh:/datahub/datahub-mae-consumer/scripts/start.sh - ../../metadata-models/src/main/resources/:/datahub/datahub-mae-consumer/resources @@ -183,12 +189,14 @@ x-datahub-mce-consumer-service: &datahub-mce-consumer-service <<: [*primary-datastore-mysql-env, *graph-datastore-search-env, *search-datastore-env, *datahub-quickstart-telemetry-env, *kafka-env] ALTERNATE_MCP_VALIDATION: ${ALTERNATE_MCP_VALIDATION:-true} STRICT_URN_VALIDATION_ENABLED: ${STRICT_URN_VALIDATION_ENABLED:-true} + ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true} x-datahub-mce-consumer-service-dev: &datahub-mce-consumer-service-dev <<: *datahub-mce-consumer-service image: ${DATAHUB_MCE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mce-consumer}:${DATAHUB_VERSION:-debug} environment: <<: [*datahub-dev-telemetry-env, *datahub-mce-consumer-env] + ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true} volumes: - ./datahub-mce-consumer/start.sh:/datahub/datahub-mce-consumer/scripts/start.sh - ../../metadata-jobs/mce-consumer-job/build/libs/:/datahub/datahub-mce-consumer/bin diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/AspectRetriever.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/AspectRetriever.java index e34df7db481189..87939e14bfde68 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/AspectRetriever.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/AspectRetriever.java @@ -5,11 +5,9 @@ import com.linkedin.common.urn.Urn; import com.linkedin.entity.Aspect; import com.linkedin.metadata.models.registry.EntityRegistry; -import com.linkedin.util.Pair; import java.util.Collections; import java.util.Map; import java.util.Set; -import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -50,19 +48,7 @@ default SystemAspect getLatestSystemAspect( Map> getLatestSystemAspects(Map> urnAspectNames); @Nonnull - default Map entityExists(Set urns) { - Set keyAspectNames = - urns.stream() - .map(Urn::getEntityType) - .distinct() - .map(entityType -> getEntityRegistry().getEntitySpec(entityType).getKeyAspectName()) - .collect(Collectors.toSet()); - - Map> latest = getLatestAspectObjects(urns, keyAspectNames); - return urns.stream() - .map(urn -> Pair.of(urn, latest.containsKey(urn))) - .collect(Collectors.toMap(Pair::getKey, Pair::getValue)); - } + Map entityExists(Set urns); @Nonnull EntityRegistry getEntityRegistry(); diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/CachingAspectRetriever.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/CachingAspectRetriever.java index 375dd8cf8911e1..7b3233921d039e 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/CachingAspectRetriever.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/CachingAspectRetriever.java @@ -29,6 +29,12 @@ public Map> getLatestSystemAspects( return Collections.emptyMap(); } + @Nonnull + @Override + public Map entityExists(Set urns) { + return Collections.emptyMap(); + } + @Nonnull @Override public EntityRegistry getEntityRegistry() { diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/ReadItem.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/ReadItem.java index 106596bf80ccf0..341dec4d4741c7 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/ReadItem.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/ReadItem.java @@ -36,6 +36,7 @@ default String getAspectName() { @Nullable RecordTemplate getRecordTemplate(); + @Nullable default T getAspect(Class clazz) { return getAspect(clazz, getRecordTemplate()); } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/AspectTemplateEngine.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/AspectTemplateEngine.java index ce36b7e77a2b16..821dad13aa0c3c 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/AspectTemplateEngine.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/AspectTemplateEngine.java @@ -1,20 +1,6 @@ package com.linkedin.metadata.aspect.patch.template; -import static com.linkedin.metadata.Constants.CHART_INFO_ASPECT_NAME; -import static com.linkedin.metadata.Constants.DASHBOARD_INFO_ASPECT_NAME; -import static com.linkedin.metadata.Constants.DATASET_PROPERTIES_ASPECT_NAME; -import static com.linkedin.metadata.Constants.DATA_FLOW_INFO_ASPECT_NAME; -import static com.linkedin.metadata.Constants.DATA_JOB_INFO_ASPECT_NAME; -import static com.linkedin.metadata.Constants.DATA_JOB_INPUT_OUTPUT_ASPECT_NAME; -import static com.linkedin.metadata.Constants.DATA_PRODUCT_PROPERTIES_ASPECT_NAME; -import static com.linkedin.metadata.Constants.EDITABLE_SCHEMA_METADATA_ASPECT_NAME; -import static com.linkedin.metadata.Constants.FORM_INFO_ASPECT_NAME; -import static com.linkedin.metadata.Constants.GLOBAL_TAGS_ASPECT_NAME; -import static com.linkedin.metadata.Constants.GLOSSARY_TERMS_ASPECT_NAME; -import static com.linkedin.metadata.Constants.OWNERSHIP_ASPECT_NAME; -import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTIES_ASPECT_NAME; -import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME; -import static com.linkedin.metadata.Constants.UPSTREAM_LINEAGE_ASPECT_NAME; +import static com.linkedin.metadata.Constants.*; import com.fasterxml.jackson.core.JsonProcessingException; import com.linkedin.data.template.RecordTemplate; @@ -50,7 +36,9 @@ public class AspectTemplateEngine { DASHBOARD_INFO_ASPECT_NAME, STRUCTURED_PROPERTIES_ASPECT_NAME, STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME, - FORM_INFO_ASPECT_NAME) + FORM_INFO_ASPECT_NAME, + UPSTREAM_LINEAGE_ASPECT_NAME, + VERSION_PROPERTIES_ASPECT_NAME) .collect(Collectors.toSet()); private final Map> _aspectTemplateMap; diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/common/VersionPropertiesTemplate.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/common/VersionPropertiesTemplate.java new file mode 100644 index 00000000000000..2f7d24e2cdb4b7 --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/common/VersionPropertiesTemplate.java @@ -0,0 +1,44 @@ +package com.linkedin.metadata.aspect.patch.template.common; + +import com.fasterxml.jackson.databind.JsonNode; +import com.linkedin.common.VersionProperties; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.metadata.aspect.patch.template.Template; +import javax.annotation.Nonnull; + +public class VersionPropertiesTemplate implements Template { + + public static final String IS_LATEST_FIELD = "isLatest"; + + @Override + public VersionProperties getSubtype(RecordTemplate recordTemplate) throws ClassCastException { + if (recordTemplate instanceof VersionProperties) { + return (VersionProperties) recordTemplate; + } + throw new ClassCastException("Unable to cast RecordTemplate to VersionProperties"); + } + + @Override + public Class getTemplateType() { + return VersionProperties.class; + } + + @Nonnull + @Override + public VersionProperties getDefault() { + throw new UnsupportedOperationException( + "Unable to generate default version properties, no sensible default for " + "version set."); + } + + @Nonnull + @Override + public JsonNode transformFields(JsonNode baseNode) { + return baseNode; + } + + @Nonnull + @Override + public JsonNode rebaseFields(JsonNode patched) { + return patched; + } +} diff --git a/entity-registry/src/main/java/com/linkedin/metadata/entity/SearchRetriever.java b/entity-registry/src/main/java/com/linkedin/metadata/entity/SearchRetriever.java index d4894c97015f8f..19dc89d26cb1af 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/entity/SearchRetriever.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/entity/SearchRetriever.java @@ -1,6 +1,10 @@ package com.linkedin.metadata.entity; +import com.google.common.collect.ImmutableList; +import com.linkedin.metadata.query.SearchFlags; import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.query.filter.SortCriterion; +import com.linkedin.metadata.query.filter.SortOrder; import com.linkedin.metadata.search.ScrollResult; import com.linkedin.metadata.search.SearchEntityArray; import java.util.List; @@ -8,6 +12,40 @@ import javax.annotation.Nullable; public interface SearchRetriever { + + SearchFlags RETRIEVER_SEARCH_FLAGS = + new SearchFlags() + .setFulltext(false) + .setMaxAggValues(20) + .setSkipCache(false) + .setSkipAggregates(true) + .setSkipHighlighting(true) + .setIncludeSoftDeleted(false) + .setIncludeRestricted(false); + + SearchFlags RETRIEVER_SEARCH_FLAGS_NO_CACHE_ALL_VERSIONS = + new SearchFlags() + .setFulltext(false) + .setMaxAggValues(20) + .setSkipCache(true) + .setSkipAggregates(true) + .setSkipHighlighting(true) + .setIncludeSoftDeleted(false) + .setIncludeRestricted(false) + .setFilterNonLatestVersions(false); + + /** + * Allows for configuring the sort, should only be used when sort specified is unique. More often + * the default is desirable to just use the urnSort + */ + ScrollResult scroll( + @Nonnull List entities, + @Nullable Filter filters, + @Nullable String scrollId, + int count, + List sortCriteria, + @Nullable SearchFlags searchFlags); + /** * Returns search results for the given entities, filtered and sorted. * @@ -17,11 +55,17 @@ public interface SearchRetriever { * @param count size of a page * @return result of the search */ - ScrollResult scroll( + default ScrollResult scroll( @Nonnull List entities, @Nullable Filter filters, @Nullable String scrollId, - int count); + int count) { + SortCriterion urnSort = new SortCriterion(); + urnSort.setField("urn"); + urnSort.setOrder(SortOrder.ASCENDING); + return scroll( + entities, filters, scrollId, count, ImmutableList.of(urnSort), RETRIEVER_SEARCH_FLAGS); + } SearchRetriever EMPTY = new EmptySearchRetriever(); @@ -32,7 +76,9 @@ public ScrollResult scroll( @Nonnull List entities, @Nullable Filter filters, @Nullable String scrollId, - int count) { + int count, + List sortCriteria, + @Nullable SearchFlags searchFlags) { ScrollResult empty = new ScrollResult(); empty.setEntities(new SearchEntityArray()); empty.setNumEntities(0); diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/SnapshotEntityRegistry.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/SnapshotEntityRegistry.java index 16df2d452a619e..f4d6799bb476f5 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/SnapshotEntityRegistry.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/SnapshotEntityRegistry.java @@ -12,6 +12,7 @@ import com.linkedin.metadata.aspect.patch.template.common.GlossaryTermsTemplate; import com.linkedin.metadata.aspect.patch.template.common.OwnershipTemplate; import com.linkedin.metadata.aspect.patch.template.common.StructuredPropertiesTemplate; +import com.linkedin.metadata.aspect.patch.template.common.VersionPropertiesTemplate; import com.linkedin.metadata.aspect.patch.template.dashboard.DashboardInfoTemplate; import com.linkedin.metadata.aspect.patch.template.dataflow.DataFlowInfoTemplate; import com.linkedin.metadata.aspect.patch.template.datajob.DataJobInfoTemplate; @@ -113,6 +114,7 @@ private AspectTemplateEngine populateTemplateEngine(Map aspe aspectSpecTemplateMap.put( STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME, new StructuredPropertyDefinitionTemplate()); aspectSpecTemplateMap.put(FORM_INFO_ASPECT_NAME, new FormInfoTemplate()); + aspectSpecTemplateMap.put(VERSION_PROPERTIES_ASPECT_NAME, new VersionPropertiesTemplate()); return new AspectTemplateEngine(aspectSpecTemplateMap); } diff --git a/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java b/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java index 98a6d59004a92a..15f168f74a32df 100644 --- a/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java +++ b/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java @@ -20,11 +20,14 @@ import java.util.Set; import java.util.stream.Collectors; import javax.annotation.Nonnull; +import lombok.Getter; +import lombok.Setter; import org.mockito.Mockito; public class MockAspectRetriever implements CachingAspectRetriever { private final Map> data; private final Map> systemData = new HashMap<>(); + @Getter @Setter private EntityRegistry entityRegistry; public MockAspectRetriever(@Nonnull Map> data) { this.data = @@ -60,6 +63,7 @@ public MockAspectRetriever(@Nonnull Map> data) { .build()); } } + this.entityRegistry = Mockito.mock(EntityRegistry.class); } public MockAspectRetriever( @@ -71,6 +75,15 @@ public MockAspectRetriever(Urn propertyUrn, StructuredPropertyDefinition definit this(Map.of(propertyUrn, List.of(definition))); } + @Nonnull + public Map entityExists(Set urns) { + if (urns.isEmpty()) { + return Map.of(); + } else { + return urns.stream().collect(Collectors.toMap(urn -> urn, data::containsKey)); + } + } + @Nonnull @Override public Map> getLatestAspectObjects( @@ -90,10 +103,4 @@ public Map> getLatestSystemAspects( .map(urn -> Pair.of(urn, systemData.get(urn))) .collect(Collectors.toMap(Pair::getKey, Pair::getValue)); } - - @Nonnull - @Override - public EntityRegistry getEntityRegistry() { - return Mockito.mock(EntityRegistry.class); - } } diff --git a/li-utils/src/main/java/com/linkedin/metadata/Constants.java b/li-utils/src/main/java/com/linkedin/metadata/Constants.java index 01c33a2530efb5..463376edcdf259 100644 --- a/li-utils/src/main/java/com/linkedin/metadata/Constants.java +++ b/li-utils/src/main/java/com/linkedin/metadata/Constants.java @@ -128,6 +128,7 @@ public class Constants { public static final String INCIDENTS_SUMMARY_ASPECT_NAME = "incidentsSummary"; public static final String DOCUMENTATION_ASPECT_NAME = "documentation"; public static final String DATA_TRANSFORM_LOGIC_ASPECT_NAME = "dataTransformLogic"; + public static final String VERSION_PROPERTIES_ASPECT_NAME = "versionProperties"; // User public static final String CORP_USER_KEY_ASPECT_NAME = "corpUserKey"; @@ -464,6 +465,18 @@ public class Constants { // Incidents public static final String ENTITY_REF = "entities"; + // Version Set + public static final String VERSION_SET_ENTITY_NAME = "versionSet"; + public static final String VERSION_SET_KEY_ASPECT_NAME = "versionSetKey"; + public static final String VERSION_SET_PROPERTIES_ASPECT_NAME = "versionSetProperties"; + + // Versioning related + public static final String INITIAL_VERSION_SORT_ID = "AAAAAAAA"; + public static final String VERSION_SORT_ID_FIELD_NAME = "versionSortId"; + public static final String IS_LATEST_FIELD_NAME = "isLatest"; + + public static final String DISPLAY_PROPERTIES_ASPECT_NAME = "displayProperties"; + // Config public static final String ELASTICSEARCH_IMPLEMENTATION_OPENSEARCH = "opensearch"; public static final String ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH = "elasticsearch"; diff --git a/metadata-io/build.gradle b/metadata-io/build.gradle index 88bbfa2e10c4c1..aab29101b30f71 100644 --- a/metadata-io/build.gradle +++ b/metadata-io/build.gradle @@ -71,6 +71,7 @@ dependencies { testImplementation project(':datahub-graphql-core') testImplementation project(path: ':metadata-integration:java:datahub-client', configuration: 'shadow') testImplementation project(':metadata-service:auth-impl') + testImplementation project(':li-utils') testImplementation externalDependency.testng testImplementation externalDependency.h2 testImplementation externalDependency.mysqlConnector diff --git a/metadata-io/src/main/java/com/linkedin/metadata/client/EntityClientAspectRetriever.java b/metadata-io/src/main/java/com/linkedin/metadata/client/EntityClientAspectRetriever.java index 669ec751f87c69..bb9a5ad68c959b 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/client/EntityClientAspectRetriever.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/client/EntityClientAspectRetriever.java @@ -56,6 +56,25 @@ public Map> getLatestAspectObjects( } } + @Nonnull + public Map entityExists(Set urns) { + if (urns.isEmpty()) { + return Map.of(); + } else { + return urns.stream() + .collect( + Collectors.toMap( + urn -> urn, + urn -> { + try { + return entityClient.exists(systemOperationContext, urn); + } catch (RemoteInvocationException e) { + throw new RuntimeException(e); + } + })); + } + } + @Nonnull @Override public Map> getLatestSystemAspects( diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceAspectRetriever.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceAspectRetriever.java index 50cf8af30d606a..6ecf83b874dea0 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceAspectRetriever.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceAspectRetriever.java @@ -54,6 +54,18 @@ public Map> getLatestAspectObjects( } } + @Nonnull + public Map entityExists(Set urns) { + if (urns.isEmpty()) { + return Map.of(); + } else { + return urns.stream() + .collect( + Collectors.toMap( + urn -> urn, urn -> entityService.exists(systemOperationContext, urn))); + } + } + @Nonnull @Override public Map> getLatestSystemAspects( diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/AlphanumericSortIdGenerator.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/AlphanumericSortIdGenerator.java new file mode 100644 index 00000000000000..40553b338741f8 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/AlphanumericSortIdGenerator.java @@ -0,0 +1,70 @@ +package com.linkedin.metadata.entity.versioning; + +public class AlphanumericSortIdGenerator { + + private AlphanumericSortIdGenerator() {} + + private static final int STRING_LENGTH = 8; + private static final char[] ALLOWED_CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".toCharArray(); + + /** + * Increments an 8-character alphanumeric string. For example: "AAAAAAAA" -> "AAAAAAAB" "AAAAAAAZ" + * -> "AAAAAABA" + * + * @param currentId The current 8-character string + * @return The next string in sequence + * @throws IllegalArgumentException if input string is not 8 characters or contains invalid + * characters + */ + public static String increment(String currentId) { + if (currentId == null || currentId.length() != STRING_LENGTH) { + throw new IllegalArgumentException("Input string must be exactly 8 characters long"); + } + + // Convert string to char array for manipulation + char[] currentIdChars = currentId.toCharArray(); + + // Validate input characters + for (char c : currentIdChars) { + if (getCharIndex(c) == -1) { + throw new IllegalArgumentException("Invalid character in input string: " + c); + } + } + + // Start from rightmost position + for (int i = STRING_LENGTH - 1; i >= 0; i--) { + int currentCharIndex = getCharIndex(currentIdChars[i]); + + // If current character is not the last allowed character, + // simply increment it and we're done + if (currentCharIndex < ALLOWED_CHARS.length - 1) { + currentIdChars[i] = ALLOWED_CHARS[currentCharIndex + 1]; + return new String(currentIdChars); + } + + // If we're here, we need to carry over to next position + currentIdChars[i] = ALLOWED_CHARS[0]; + + // If we're at the leftmost position and need to carry, + // we've reached maximum value and need to wrap around + if (i == 0) { + return "AAAAAAAA"; + } + } + + // Should never reach here + throw new RuntimeException("Unexpected error in increment operation"); + } + + /** + * Gets the index of a character in the ALLOWED_CHARS array. Returns -1 if character is not found. + */ + private static int getCharIndex(char c) { + for (int i = 0; i < ALLOWED_CHARS.length; i++) { + if (ALLOWED_CHARS[i] == c) { + return i; + } + } + return -1; + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/EntityVersioningServiceImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/EntityVersioningServiceImpl.java new file mode 100644 index 00000000000000..48f5a00e9e8d5a --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/EntityVersioningServiceImpl.java @@ -0,0 +1,356 @@ +package com.linkedin.metadata.entity.versioning; + +import static com.linkedin.metadata.Constants.INITIAL_VERSION_SORT_ID; +import static com.linkedin.metadata.Constants.VERSION_PROPERTIES_ASPECT_NAME; +import static com.linkedin.metadata.Constants.VERSION_SET_ENTITY_NAME; +import static com.linkedin.metadata.Constants.VERSION_SET_KEY_ASPECT_NAME; +import static com.linkedin.metadata.Constants.VERSION_SET_PROPERTIES_ASPECT_NAME; +import static com.linkedin.metadata.Constants.VERSION_SORT_ID_FIELD_NAME; +import static com.linkedin.metadata.aspect.validation.ConditionalWriteValidator.HTTP_HEADER_IF_VERSION_MATCH; + +import com.datahub.util.RecordUtils; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import com.linkedin.common.AuditStamp; +import com.linkedin.common.MetadataAttribution; +import com.linkedin.common.VersionProperties; +import com.linkedin.common.VersionTag; +import com.linkedin.common.urn.CorpuserUrn; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.template.SetMode; +import com.linkedin.data.template.StringMap; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.SystemAspect; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.IngestResult; +import com.linkedin.metadata.entity.RollbackResult; +import com.linkedin.metadata.entity.RollbackRunResult; +import com.linkedin.metadata.entity.SearchRetriever; +import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; +import com.linkedin.metadata.key.VersionSetKey; +import com.linkedin.metadata.query.filter.Condition; +import com.linkedin.metadata.query.filter.SortCriterion; +import com.linkedin.metadata.query.filter.SortOrder; +import com.linkedin.metadata.search.ScrollResult; +import com.linkedin.metadata.search.SearchEntity; +import com.linkedin.metadata.search.SearchEntityArray; +import com.linkedin.metadata.search.utils.QueryUtils; +import com.linkedin.metadata.utils.CriterionUtils; +import com.linkedin.metadata.utils.EntityKeyUtils; +import com.linkedin.metadata.utils.GenericRecordUtils; +import com.linkedin.mxe.MetadataChangeProposal; +import com.linkedin.versionset.VersionSetProperties; +import com.linkedin.versionset.VersioningScheme; +import io.datahubproject.metadata.context.OperationContext; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class EntityVersioningServiceImpl implements EntityVersioningService { + + private final EntityService entityService; + + public EntityVersioningServiceImpl(EntityService entityService) { + this.entityService = entityService; + } + + /** + * Generates a new set of VersionProperties for the latest version and links it to the specified + * version set. If the specified version set does not yet exist, will create it. Order of + * operations here is important: 1. Create initial Version Set if necessary, do not generate + * Version Set Properties 2. Create Version Properties for specified entity. If this aspect + * already exists will fail. 3. Generate version properties with the properly set latest version + * Will eventually want to add in the scheme here as a parameter + * + * @return ingestResult -> the results of the ingested linked version + */ + @Override + public List linkLatestVersion( + OperationContext opContext, + Urn versionSet, + Urn newLatestVersion, + VersionPropertiesInput inputProperties) { + List proposals = new ArrayList<>(); + AspectRetriever aspectRetriever = opContext.getAspectRetriever(); + String sortId; + Long versionSetConstraint; + Long versionPropertiesConstraint; + VersionSetKey versionSetKey = + (VersionSetKey) + EntityKeyUtils.convertUrnToEntityKey( + versionSet, opContext.getEntityRegistryContext().getKeyAspectSpec(versionSet)); + if (!versionSetKey.getEntityType().equals(newLatestVersion.getEntityType())) { + throw new IllegalArgumentException( + "Entity type must match Version Set's specified type: " + + versionSetKey.getEntityType() + + " invalid type: " + + newLatestVersion.getEntityType()); + } + if (!aspectRetriever.entityExists(ImmutableSet.of(versionSet)).get(versionSet)) { + MetadataChangeProposal versionSetKeyProposal = new MetadataChangeProposal(); + versionSetKeyProposal.setEntityUrn(versionSet); + versionSetKeyProposal.setEntityType(VERSION_SET_ENTITY_NAME); + versionSetKeyProposal.setAspectName(VERSION_SET_KEY_ASPECT_NAME); + versionSetKeyProposal.setAspect(GenericRecordUtils.serializeAspect(versionSetKey)); + versionSetKeyProposal.setChangeType(ChangeType.CREATE_ENTITY); + entityService.ingestProposal( + opContext, versionSetKeyProposal, opContext.getAuditStamp(), false); + + sortId = INITIAL_VERSION_SORT_ID; + versionSetConstraint = -1L; + versionPropertiesConstraint = -1L; + } else { + SystemAspect versionSetPropertiesAspect = + aspectRetriever.getLatestSystemAspect(versionSet, VERSION_SET_PROPERTIES_ASPECT_NAME); + VersionSetProperties versionSetProperties = + RecordUtils.toRecordTemplate( + VersionSetProperties.class, versionSetPropertiesAspect.getRecordTemplate().data()); + versionSetConstraint = + versionSetPropertiesAspect + .getSystemMetadataVersion() + .orElse(versionSetPropertiesAspect.getVersion()); + SystemAspect latestVersion = + aspectRetriever.getLatestSystemAspect( + versionSetProperties.getLatest(), VERSION_PROPERTIES_ASPECT_NAME); + VersionProperties latestVersionProperties = + RecordUtils.toRecordTemplate( + VersionProperties.class, latestVersion.getRecordTemplate().data()); + versionPropertiesConstraint = + latestVersion.getSystemMetadataVersion().orElse(latestVersion.getVersion()); + // When more impls for versioning scheme are set up, this will need to be resolved to the + // correct scheme generation strategy + sortId = AlphanumericSortIdGenerator.increment(latestVersionProperties.getSortId()); + } + + SystemAspect currentVersionPropertiesAspect = + aspectRetriever.getLatestSystemAspect(newLatestVersion, VERSION_PROPERTIES_ASPECT_NAME); + if (currentVersionPropertiesAspect != null) { + VersionProperties currentVersionProperties = + RecordUtils.toRecordTemplate( + VersionProperties.class, currentVersionPropertiesAspect.getRecordTemplate().data()); + if (currentVersionProperties.getVersionSet().equals(versionSet)) { + return new ArrayList<>(); + } else { + throw new IllegalStateException( + String.format( + "Version already exists for specified entity: %s for a different Version Set: %s", + newLatestVersion, currentVersionProperties.getVersionSet())); + } + } + + VersionTag versionTag = new VersionTag(); + versionTag.setVersionTag(inputProperties.getVersion()); + MetadataAttribution metadataAttribution = new MetadataAttribution(); + metadataAttribution.setActor(opContext.getActorContext().getActorUrn()); + metadataAttribution.setTime(System.currentTimeMillis()); + versionTag.setMetadataAttribution(metadataAttribution); + VersionProperties versionProperties = + new VersionProperties() + .setVersionSet(versionSet) + .setComment(inputProperties.getComment(), SetMode.IGNORE_NULL) + .setVersion(versionTag) + .setMetadataCreatedTimestamp(opContext.getAuditStamp()) + .setSortId(sortId); + if (inputProperties.getSourceCreationTimestamp() != null) { + + AuditStamp sourceCreatedAuditStamp = + new AuditStamp().setTime(inputProperties.getSourceCreationTimestamp()); + Urn actor = null; + if (inputProperties.getSourceCreator() != null) { + actor = new CorpuserUrn(inputProperties.getSourceCreator()); + } + sourceCreatedAuditStamp.setActor(UrnUtils.getActorOrDefault(actor)); + + versionProperties.setSourceCreatedTimestamp(sourceCreatedAuditStamp); + } + MetadataChangeProposal versionPropertiesProposal = new MetadataChangeProposal(); + versionPropertiesProposal.setEntityUrn(newLatestVersion); + versionPropertiesProposal.setEntityType(newLatestVersion.getEntityType()); + versionPropertiesProposal.setAspectName(VERSION_PROPERTIES_ASPECT_NAME); + versionPropertiesProposal.setAspect(GenericRecordUtils.serializeAspect(versionProperties)); + versionPropertiesProposal.setChangeType(ChangeType.UPSERT); + StringMap headerMap = new StringMap(); + headerMap.put(HTTP_HEADER_IF_VERSION_MATCH, versionPropertiesConstraint.toString()); + versionPropertiesProposal.setChangeType(ChangeType.UPSERT); + proposals.add(versionPropertiesProposal); + + // Might want to refactor this to a Patch w/ Create if not exists logic if more properties get + // added + // to Version Set Properties + VersionSetProperties versionSetProperties = + new VersionSetProperties() + .setVersioningScheme( + VersioningScheme + .ALPHANUMERIC_GENERATED_BY_DATAHUB) // Only one available, will need to add to + // input properties once more are added. + .setLatest(newLatestVersion); + MetadataChangeProposal versionSetPropertiesProposal = new MetadataChangeProposal(); + versionSetPropertiesProposal.setEntityUrn(versionSet); + versionSetPropertiesProposal.setEntityType(VERSION_SET_ENTITY_NAME); + versionSetPropertiesProposal.setAspectName(VERSION_SET_PROPERTIES_ASPECT_NAME); + versionSetPropertiesProposal.setAspect( + GenericRecordUtils.serializeAspect(versionSetProperties)); + versionSetPropertiesProposal.setChangeType(ChangeType.UPSERT); + StringMap versionSetHeaderMap = new StringMap(); + versionSetHeaderMap.put(HTTP_HEADER_IF_VERSION_MATCH, versionSetConstraint.toString()); + versionSetPropertiesProposal.setHeaders(versionSetHeaderMap); + proposals.add(versionSetPropertiesProposal); + + return entityService.ingestProposal( + opContext, + AspectsBatchImpl.builder() + .mcps(proposals, opContext.getAuditStamp(), opContext.getRetrieverContext()) + .build(), + false); + } + + /** + * Unlinks a version from a version set. Will attempt to set up the previous version as the new + * latest. This fully removes the version properties and unversions the specified entity. + * + * @param opContext operational context containing various information about the current execution + * @param linkedVersion the currently linked latest versioned entity urn + * @return the deletion result + */ + @Override + public List unlinkVersion( + OperationContext opContext, Urn versionSet, Urn linkedVersion) { + List deletedAspects = new ArrayList<>(); + AspectRetriever aspectRetriever = opContext.getAspectRetriever(); + SystemAspect linkedVersionPropertiesAspect = + aspectRetriever.getLatestSystemAspect(linkedVersion, VERSION_PROPERTIES_ASPECT_NAME); + // Not currently versioned, do nothing + if (linkedVersionPropertiesAspect == null) { + return deletedAspects; + } + VersionProperties linkedVersionProperties = + RecordUtils.toRecordTemplate( + VersionProperties.class, linkedVersionPropertiesAspect.getRecordTemplate().data()); + Urn versionSetUrn = linkedVersionProperties.getVersionSet(); + if (!versionSet.equals(versionSetUrn)) { + throw new IllegalArgumentException( + String.format( + "Version is not linked to specified version set: %s but is linked to: %s", + versionSet, versionSetUrn)); + } + // Delete latest version properties + entityService + .deleteAspect( + opContext, + linkedVersion.toString(), + VERSION_PROPERTIES_ASPECT_NAME, + Collections.emptyMap(), + true) + .ifPresent(deletedAspects::add); + + // Get Version Set details + VersionSetKey versionSetKey = + (VersionSetKey) + EntityKeyUtils.convertUrnToEntityKey( + versionSetUrn, + opContext.getEntityRegistryContext().getKeyAspectSpec(versionSetUrn)); + SearchRetriever searchRetriever = opContext.getRetrieverContext().getSearchRetriever(); + + // Find current latest version and previous + ScrollResult linkedVersions = + searchRetriever.scroll( + ImmutableList.of(versionSetKey.getEntityType()), + QueryUtils.newConjunctiveFilter( + CriterionUtils.buildCriterion( + "versionSet", Condition.EQUAL, versionSetUrn.toString())), + null, + 2, + ImmutableList.of( + new SortCriterion() + .setField(VERSION_SORT_ID_FIELD_NAME) + .setOrder(SortOrder.DESCENDING)), + SearchRetriever.RETRIEVER_SEARCH_FLAGS_NO_CACHE_ALL_VERSIONS); + String updatedLatestVersionUrn = null; + + SearchEntityArray linkedEntities = linkedVersions.getEntities(); + SystemAspect versionSetPropertiesAspect = + aspectRetriever.getLatestSystemAspect(versionSetUrn, VERSION_SET_PROPERTIES_ASPECT_NAME); + if (versionSetPropertiesAspect == null) { + throw new IllegalStateException( + String.format( + "Version Set Properties must exist if entity version exists: %s", versionSetUrn)); + } + VersionSetProperties versionSetProperties = + RecordUtils.toRecordTemplate( + VersionSetProperties.class, versionSetPropertiesAspect.getRecordTemplate().data()); + long versionConstraint = + versionSetPropertiesAspect + .getSystemMetadataVersion() + .orElse(versionSetPropertiesAspect.getVersion()); + boolean isLatest = linkedVersion.equals(versionSetProperties.getLatest()); + + if (linkedEntities.size() == 2 && isLatest) { + // If the version to unlink is the same as the last search result and is currently the latest + // based on SQL, set to one immediately before. + // Otherwise set to most current one in search results assuming we have not gotten the index + // update for a recent update to latest. + // Does assume that there are not multiple index updates waiting in the queue so rapid fire + // updates intermixed with deletes should be avoided. + SearchEntity maybeLatestVersion = linkedEntities.get(0); + if (maybeLatestVersion.getEntity().equals(linkedVersion)) { + SearchEntity priorLatestVersion = linkedEntities.get(1); + updatedLatestVersionUrn = priorLatestVersion.getEntity().toString(); + } else { + updatedLatestVersionUrn = maybeLatestVersion.getEntity().toString(); + } + + } else if (linkedEntities.size() == 1 && isLatest) { + // Missing a version, if that version is not the one being unlinked then set as latest + // version. Same reasoning as above + SearchEntity maybePriorLatestVersion = linkedEntities.get(0); + if (!linkedVersion.equals(maybePriorLatestVersion.getEntity())) { + updatedLatestVersionUrn = maybePriorLatestVersion.getEntity().toString(); + } else { + // Delete Version Set if we are removing the last version + // TODO: Conditional deletes impl + only do the delete if version match + RollbackRunResult deleteResult = entityService.deleteUrn(opContext, versionSetUrn); + deletedAspects.addAll(deleteResult.getRollbackResults()); + } + } + + if (updatedLatestVersionUrn != null) { + + // Might want to refactor this to a Patch w/ Create if not exists logic if more properties + // get added + // to Version Set Properties + VersionSetProperties newVersionSetProperties = + new VersionSetProperties() + .setVersioningScheme( + VersioningScheme + .ALPHANUMERIC_GENERATED_BY_DATAHUB) // Only one available, will need to add + // to input properties once more are + // added. + .setLatest(UrnUtils.getUrn(updatedLatestVersionUrn)); + MetadataChangeProposal versionSetPropertiesProposal = new MetadataChangeProposal(); + versionSetPropertiesProposal.setEntityUrn(versionSetUrn); + versionSetPropertiesProposal.setEntityType(VERSION_SET_ENTITY_NAME); + versionSetPropertiesProposal.setAspectName(VERSION_SET_PROPERTIES_ASPECT_NAME); + versionSetPropertiesProposal.setAspect( + GenericRecordUtils.serializeAspect(newVersionSetProperties)); + versionSetPropertiesProposal.setChangeType(ChangeType.UPSERT); + StringMap headerMap = new StringMap(); + headerMap.put(HTTP_HEADER_IF_VERSION_MATCH, Long.toString(versionConstraint)); + versionSetPropertiesProposal.setHeaders(headerMap); + entityService.ingestProposal( + opContext, + AspectsBatchImpl.builder() + .mcps( + ImmutableList.of(versionSetPropertiesProposal), + opContext.getAuditStamp(), + opContext.getRetrieverContext()) + .build(), + false); + } + + return deletedAspects; + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/sideeffects/VersionSetSideEffect.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/sideeffects/VersionSetSideEffect.java new file mode 100644 index 00000000000000..7e9692841c79ae --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/sideeffects/VersionSetSideEffect.java @@ -0,0 +1,137 @@ +package com.linkedin.metadata.entity.versioning.sideeffects; + +import static com.linkedin.metadata.Constants.VERSION_PROPERTIES_ASPECT_NAME; +import static com.linkedin.metadata.Constants.VERSION_SET_PROPERTIES_ASPECT_NAME; + +import com.linkedin.common.urn.Urn; +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.batch.MCLItem; +import com.linkedin.metadata.aspect.batch.MCPItem; +import com.linkedin.metadata.aspect.patch.GenericJsonPatch; +import com.linkedin.metadata.aspect.patch.PatchOperationType; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.hooks.MCPSideEffect; +import com.linkedin.metadata.entity.ebean.batch.PatchItemImpl; +import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.versionset.VersionSetProperties; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.stream.Stream; +import javax.annotation.Nonnull; +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.Accessors; +import lombok.extern.slf4j.Slf4j; + +/** + * Side effect that updates the isLatest property for the referenced versioned entity's Version + * Properties aspect. + */ +@Slf4j +@Getter +@Setter +@Accessors(chain = true) +public class VersionSetSideEffect extends MCPSideEffect { + @Nonnull private AspectPluginConfig config; + + @Override + protected Stream applyMCPSideEffect( + Collection changeMCPS, @Nonnull RetrieverContext retrieverContext) { + return Stream.of(); + } + + @Override + protected Stream postMCPSideEffect( + Collection mclItems, @Nonnull RetrieverContext retrieverContext) { + return mclItems.stream().flatMap(item -> updateLatest(item, retrieverContext)); + } + + private static Stream updateLatest( + MCLItem mclItem, @Nonnull RetrieverContext retrieverContext) { + + if (VERSION_SET_PROPERTIES_ASPECT_NAME.equals(mclItem.getAspectName())) { + List mcpItems = new ArrayList<>(); + VersionSetProperties versionSetProperties = mclItem.getAspect(VersionSetProperties.class); + if (versionSetProperties == null) { + log.error("Unable to process version set properties for urn: {}", mclItem.getUrn()); + return Stream.empty(); + } + // Set old latest isLatest to false, set new latest isLatest to true + // This side effect assumes the entity is already versioned, if it is not yet versioned it + // will fail due + // to not having set default values for the aspect. This creates an implicit ordering of when + // aspects should be + // updated. Version Properties first, then Version Set Properties. + Urn newLatest = versionSetProperties.getLatest(); + + VersionSetProperties previousVersionSetProperties = + mclItem.getPreviousAspect(VersionSetProperties.class); + if (previousVersionSetProperties != null) { + Urn previousLatest = previousVersionSetProperties.getLatest(); + if (!newLatest.equals(previousLatest) + && retrieverContext + .getAspectRetriever() + .entityExists(Collections.singleton(previousLatest)) + .getOrDefault(previousLatest, false)) { + EntitySpec entitySpec = + retrieverContext + .getAspectRetriever() + .getEntityRegistry() + .getEntitySpec(previousLatest.getEntityType()); + GenericJsonPatch.PatchOp previousPatch = new GenericJsonPatch.PatchOp(); + previousPatch.setOp(PatchOperationType.ADD.getValue()); + previousPatch.setPath("/isLatest"); + previousPatch.setValue(false); + mcpItems.add( + PatchItemImpl.builder() + .urn(previousLatest) + .entitySpec(entitySpec) + .aspectName(VERSION_PROPERTIES_ASPECT_NAME) + .aspectSpec(entitySpec.getAspectSpec(VERSION_PROPERTIES_ASPECT_NAME)) + .patch( + GenericJsonPatch.builder() + .patch(List.of(previousPatch)) + .build() + .getJsonPatch()) + .auditStamp(mclItem.getAuditStamp()) + .systemMetadata(mclItem.getSystemMetadata()) + .build(retrieverContext.getAspectRetriever().getEntityRegistry())); + } + } + + // Explicitly error here to avoid downstream patch error with less context + if (retrieverContext + .getAspectRetriever() + .getLatestAspectObject(newLatest, VERSION_PROPERTIES_ASPECT_NAME) + == null) { + throw new UnsupportedOperationException( + "Cannot set latest version to unversioned entity: " + newLatest); + } + + EntitySpec entitySpec = + retrieverContext + .getAspectRetriever() + .getEntityRegistry() + .getEntitySpec(newLatest.getEntityType()); + GenericJsonPatch.PatchOp currentPatch = new GenericJsonPatch.PatchOp(); + currentPatch.setOp(PatchOperationType.ADD.getValue()); + currentPatch.setPath("/isLatest"); + currentPatch.setValue(true); + mcpItems.add( + PatchItemImpl.builder() + .urn(newLatest) + .entitySpec(entitySpec) + .aspectName(VERSION_PROPERTIES_ASPECT_NAME) + .aspectSpec(entitySpec.getAspectSpec(VERSION_PROPERTIES_ASPECT_NAME)) + .patch(GenericJsonPatch.builder().patch(List.of(currentPatch)).build().getJsonPatch()) + .auditStamp(mclItem.getAuditStamp()) + .systemMetadata(mclItem.getSystemMetadata()) + .build(retrieverContext.getAspectRetriever().getEntityRegistry())); + return mcpItems.stream(); + } + return Stream.empty(); + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/validation/VersionPropertiesValidator.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/validation/VersionPropertiesValidator.java new file mode 100644 index 00000000000000..4d29cc254c1ba6 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/validation/VersionPropertiesValidator.java @@ -0,0 +1,158 @@ +package com.linkedin.metadata.entity.versioning.validation; + +import static com.linkedin.metadata.Constants.VERSION_PROPERTIES_ASPECT_NAME; +import static com.linkedin.metadata.Constants.VERSION_SET_KEY_ASPECT_NAME; +import static com.linkedin.metadata.Constants.VERSION_SET_PROPERTIES_ASPECT_NAME; + +import com.datahub.util.RecordUtils; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableSet; +import com.linkedin.common.VersionProperties; +import com.linkedin.common.urn.Urn; +import com.linkedin.entity.Aspect; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.batch.BatchItem; +import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.validation.AspectPayloadValidator; +import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; +import com.linkedin.metadata.aspect.plugins.validation.ValidationExceptionCollection; +import com.linkedin.metadata.entity.ebean.batch.PatchItemImpl; +import com.linkedin.metadata.key.VersionSetKey; +import com.linkedin.versionset.VersionSetProperties; +import com.linkedin.versionset.VersioningScheme; +import java.util.Collection; +import java.util.Collections; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import javax.annotation.Nonnull; +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.Accessors; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang.StringUtils; + +@Setter +@Getter +@Slf4j +@Accessors(chain = true) +public class VersionPropertiesValidator extends AspectPayloadValidator { + + @Nonnull private AspectPluginConfig config; + + private static final Set SHOULD_VALIDATE_PROPOSED = + ImmutableSet.of(ChangeType.UPDATE, ChangeType.UPSERT, ChangeType.CREATE); + + @Override + protected Stream validateProposedAspects( + @Nonnull Collection mcpItems, + @Nonnull RetrieverContext retrieverContext) { + return validatePropertiesProposals( + mcpItems.stream() + .filter(mcpItem -> VERSION_PROPERTIES_ASPECT_NAME.equals(mcpItem.getAspectName())) + .filter(mcpItem -> SHOULD_VALIDATE_PROPOSED.contains(mcpItem.getChangeType())) + .collect(Collectors.toList())); + } + + @Override + protected Stream validatePreCommitAspects( + @Nonnull Collection changeMCPs, @Nonnull RetrieverContext retrieverContext) { + return validatePropertiesUpserts( + changeMCPs.stream() + .filter(changeMCP -> VERSION_PROPERTIES_ASPECT_NAME.equals(changeMCP.getAspectName())) + .collect(Collectors.toList()), + retrieverContext); + } + + @VisibleForTesting + public static Stream validatePropertiesUpserts( + @Nonnull Collection mcpItems, + @Nonnull RetrieverContext retrieverContext) { + ValidationExceptionCollection exceptions = ValidationExceptionCollection.newCollection(); + for (BatchItem mcpItem : mcpItems) { + VersionProperties versionProperties = mcpItem.getAspect(VersionProperties.class); + // Validate Version Set entity type + Urn versionSetUrn = versionProperties.getVersionSet(); + Map aspects = + retrieverContext + .getAspectRetriever() + .getLatestAspectObjects( + Collections.singleton(versionSetUrn), + ImmutableSet.of(VERSION_SET_KEY_ASPECT_NAME, VERSION_SET_PROPERTIES_ASPECT_NAME)) + .get(versionSetUrn); + if (aspects == null || aspects.isEmpty()) { + exceptions.addException(mcpItem, "Version Set specified does not exist: " + versionSetUrn); + continue; + } + Optional keyAspect = Optional.ofNullable(aspects.get(VERSION_SET_KEY_ASPECT_NAME)); + if (keyAspect.isPresent()) { + VersionSetKey versionSetKey = + RecordUtils.toRecordTemplate(VersionSetKey.class, keyAspect.get().data()); + if (!mcpItem.getEntitySpec().getName().equals(versionSetKey.getEntityType())) { + exceptions.addException( + mcpItem, + "Version Set specified entity type does not match, expected type: " + + versionSetKey.getEntityType()); + } + + // Validate sort ID scheme + String sortId = versionProperties.getSortId(); + Optional versionSetPropertiesAspect = + Optional.ofNullable(aspects.get(VERSION_SET_PROPERTIES_ASPECT_NAME)); + // Validate sort id matches scheme if version set properties exist + if (versionSetPropertiesAspect.isPresent()) { + VersionSetProperties versionSetProperties = + RecordUtils.toRecordTemplate( + VersionSetProperties.class, versionSetPropertiesAspect.get().data()); + VersioningScheme versioningScheme = versionSetProperties.getVersioningScheme(); + switch (versioningScheme) { + case ALPHANUMERIC_GENERATED_BY_DATAHUB: + validateDataHubGeneratedScheme(sortId, exceptions, mcpItem); + break; + default: + exceptions.addException(mcpItem, "Unsupported scheme type: " + versioningScheme); + } + } + } else { + exceptions.addException(mcpItem, "Version Set specified does not exist: " + versionSetUrn); + } + } + return exceptions.streamAllExceptions(); + } + + private static void validateDataHubGeneratedScheme( + String sortId, ValidationExceptionCollection exceptions, BatchItem mcpItem) { + if (!(sortId.length() == 8 + && StringUtils.isAllUpperCase(sortId) + && StringUtils.isAlpha(sortId))) { + exceptions.addException( + mcpItem, + "Invalid sortID for Versioning Scheme. ID: " + + sortId + + " Scheme: " + + VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB); + } + } + + @VisibleForTesting + public static Stream validatePropertiesProposals( + @Nonnull Collection mcpItems) { + ValidationExceptionCollection exceptions = ValidationExceptionCollection.newCollection(); + for (BatchItem mcpItem : mcpItems) { + if (mcpItem instanceof PatchItemImpl) { + throw new IllegalStateException("Patch item must have change type of PATCH."); + } + VersionProperties versionProperties = mcpItem.getAspect(VersionProperties.class); + // Validate isLatest not set + if (versionProperties.hasIsLatest()) { + exceptions.addException( + mcpItem, "IsLatest should not be specified, this is a computed field."); + } + } + return exceptions.streamAllExceptions(); + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/validation/VersionSetPropertiesValidator.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/validation/VersionSetPropertiesValidator.java new file mode 100644 index 00000000000000..8a7795f29ccfe0 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/validation/VersionSetPropertiesValidator.java @@ -0,0 +1,80 @@ +package com.linkedin.metadata.entity.versioning.validation; + +import static com.linkedin.metadata.Constants.VERSION_SET_PROPERTIES_ASPECT_NAME; + +import com.datahub.util.RecordUtils; +import com.google.common.annotations.VisibleForTesting; +import com.linkedin.entity.Aspect; +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.batch.BatchItem; +import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.validation.AspectPayloadValidator; +import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; +import com.linkedin.metadata.aspect.plugins.validation.ValidationExceptionCollection; +import com.linkedin.versionset.VersionSetProperties; +import java.util.Collection; +import java.util.Optional; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import javax.annotation.Nonnull; +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.Accessors; +import lombok.extern.slf4j.Slf4j; + +@Setter +@Getter +@Slf4j +@Accessors(chain = true) +public class VersionSetPropertiesValidator extends AspectPayloadValidator { + + @Nonnull private AspectPluginConfig config; + + @Override + protected Stream validateProposedAspects( + @Nonnull Collection mcpItems, + @Nonnull RetrieverContext retrieverContext) { + return validatePropertiesUpserts( + mcpItems.stream() + .filter(i -> VERSION_SET_PROPERTIES_ASPECT_NAME.equals(i.getAspectName())) + .collect(Collectors.toList()), + retrieverContext); + } + + @Override + protected Stream validatePreCommitAspects( + @Nonnull Collection changeMCPs, @Nonnull RetrieverContext retrieverContext) { + return Stream.empty(); + } + + @VisibleForTesting + public static Stream validatePropertiesUpserts( + @Nonnull Collection mcpItems, + @Nonnull RetrieverContext retrieverContext) { + ValidationExceptionCollection exceptions = ValidationExceptionCollection.newCollection(); + for (BatchItem mcpItem : mcpItems) { + VersionSetProperties versionSetProperties = mcpItem.getAspect(VersionSetProperties.class); + Optional aspect = + Optional.ofNullable( + retrieverContext + .getAspectRetriever() + .getLatestAspectObject(mcpItem.getUrn(), VERSION_SET_PROPERTIES_ASPECT_NAME)); + if (aspect.isPresent()) { + VersionSetProperties previousVersionSetProperties = + RecordUtils.toRecordTemplate(VersionSetProperties.class, aspect.get().data()); + if (!previousVersionSetProperties + .getVersioningScheme() + .equals(versionSetProperties.getVersioningScheme())) { + exceptions.addException( + mcpItem, + "Versioning Scheme cannot change. Expected Scheme: " + + previousVersionSetProperties.getVersioningScheme() + + " Provided Scheme: " + + versionSetProperties.getVersioningScheme()); + } + } + } + return exceptions.streamAllExceptions(); + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/SearchServiceSearchRetriever.java b/metadata-io/src/main/java/com/linkedin/metadata/search/SearchServiceSearchRetriever.java index 8d7548e0ba90a1..dae119beec4a7e 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/SearchServiceSearchRetriever.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/SearchServiceSearchRetriever.java @@ -6,7 +6,9 @@ import com.linkedin.metadata.query.filter.SortCriterion; import com.linkedin.metadata.query.filter.SortOrder; import io.datahubproject.metadata.context.OperationContext; +import java.util.ArrayList; import java.util.List; +import java.util.Optional; import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.Builder; @@ -16,15 +18,6 @@ @Getter @Builder public class SearchServiceSearchRetriever implements SearchRetriever { - private static final SearchFlags RETRIEVER_SEARCH_FLAGS = - new SearchFlags() - .setFulltext(false) - .setMaxAggValues(20) - .setSkipCache(false) - .setSkipAggregates(true) - .setSkipHighlighting(true) - .setIncludeSoftDeleted(false) - .setIncludeRestricted(false); @Setter private OperationContext systemOperationContext; private final SearchService searchService; @@ -34,16 +27,24 @@ public ScrollResult scroll( @Nonnull List entities, @Nullable Filter filters, @Nullable String scrollId, - int count) { - SortCriterion urnSort = new SortCriterion(); - urnSort.setField("urn"); - urnSort.setOrder(SortOrder.ASCENDING); + int count, + List sortCriteria, + @Nullable SearchFlags searchFlags) { + List finalCriteria = new ArrayList<>(sortCriteria); + if (sortCriteria.stream().noneMatch(sortCriterion -> "urn".equals(sortCriterion.getField()))) { + SortCriterion urnSort = new SortCriterion(); + urnSort.setField("urn"); + urnSort.setOrder(SortOrder.ASCENDING); + finalCriteria.add(urnSort); + } + final SearchFlags finalSearchFlags = + Optional.ofNullable(searchFlags).orElse(RETRIEVER_SEARCH_FLAGS); return searchService.scrollAcrossEntities( - systemOperationContext.withSearchFlags(flags -> RETRIEVER_SEARCH_FLAGS), + systemOperationContext.withSearchFlags(flags -> finalSearchFlags), entities, "*", filters, - List.of(urnSort), + finalCriteria, scrollId, null, count); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java b/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java index b4ad847cb7afc2..7a60b89d0127cc 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java @@ -147,7 +147,7 @@ private static Set objectFieldsFilter(Iterator fieldNames) { public Optional transformAspect( @Nonnull OperationContext opContext, final @Nonnull Urn urn, - final @Nonnull RecordTemplate aspect, + final @Nullable RecordTemplate aspect, final @Nonnull AspectSpec aspectSpec, final Boolean forDelete) throws RemoteInvocationException, URISyntaxException { diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java index 17bbbaf059dec4..95fff81d13957c 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java @@ -20,10 +20,12 @@ import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.Criterion; +import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriterContext; +import com.linkedin.metadata.utils.CriterionUtils; import io.datahubproject.metadata.context.OperationContext; import java.util.Collections; import java.util.HashMap; @@ -188,6 +190,13 @@ public static BoolQueryBuilder buildFilterQuery( }); finalQueryBuilder.should(andQueryBuilder); } + if (Boolean.TRUE.equals( + opContext.getSearchContext().getSearchFlags().isFilterNonLatestVersions())) { + BoolQueryBuilder filterNonLatestVersions = + ESUtils.buildFilterNonLatestEntities( + opContext, queryFilterRewriteChain, searchableFieldTypes); + finalQueryBuilder.must(filterNonLatestVersions); + } if (!finalQueryBuilder.should().isEmpty()) { finalQueryBuilder.minimumShouldMatch(1); } @@ -869,4 +878,31 @@ private static void filterSoftDeletedByDefault( } } } + + public static BoolQueryBuilder buildFilterNonLatestEntities( + OperationContext opContext, + QueryFilterRewriteChain queryFilterRewriteChain, + Map> searchableFieldTypes) { + ConjunctiveCriterion isLatestCriterion = new ConjunctiveCriterion(); + CriterionArray isLatestCriterionArray = new CriterionArray(); + isLatestCriterionArray.add( + CriterionUtils.buildCriterion(IS_LATEST_FIELD_NAME, Condition.EQUAL, "true")); + isLatestCriterion.setAnd(isLatestCriterionArray); + BoolQueryBuilder isLatest = + ESUtils.buildConjunctiveFilterQuery( + isLatestCriterion, false, searchableFieldTypes, opContext, queryFilterRewriteChain); + ConjunctiveCriterion isNotVersionedCriterion = new ConjunctiveCriterion(); + CriterionArray isNotVersionedCriterionArray = new CriterionArray(); + isNotVersionedCriterionArray.add( + CriterionUtils.buildCriterion(IS_LATEST_FIELD_NAME, Condition.EXISTS, true)); + isNotVersionedCriterion.setAnd(isNotVersionedCriterionArray); + BoolQueryBuilder isNotVersioned = + ESUtils.buildConjunctiveFilterQuery( + isNotVersionedCriterion, + false, + searchableFieldTypes, + opContext, + queryFilterRewriteChain); + return QueryBuilders.boolQuery().should(isLatest).should(isNotVersioned).minimumShouldMatch(1); + } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java index c5fc9ebdac9fa6..635d4472305c93 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java @@ -42,6 +42,7 @@ import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; +import javax.annotation.Nullable; import lombok.Getter; import lombok.extern.slf4j.Slf4j; @@ -399,7 +400,7 @@ private void deleteSearchData( Urn urn, String entityName, AspectSpec aspectSpec, - RecordTemplate aspect, + @Nullable RecordTemplate aspect, Boolean isKeyAspect) { String docId; try { diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/AlphanumericSortIdGeneratorTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/AlphanumericSortIdGeneratorTest.java new file mode 100644 index 00000000000000..8021507231d3b9 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/AlphanumericSortIdGeneratorTest.java @@ -0,0 +1,62 @@ +package com.linkedin.metadata.entity.versioning; + +import static com.linkedin.metadata.Constants.INITIAL_VERSION_SORT_ID; +import static org.testng.Assert.*; + +import org.testng.annotations.Test; + +public class AlphanumericSortIdGeneratorTest { + + @Test + public void testBasicIncrement() { + assertEquals(AlphanumericSortIdGenerator.increment(INITIAL_VERSION_SORT_ID), "AAAAAAAB"); + assertEquals(AlphanumericSortIdGenerator.increment("AAAAAAAB"), "AAAAAAAC"); + } + + @Test + public void testCarryOver() { + assertEquals(AlphanumericSortIdGenerator.increment("AAAAAAAZ"), "AAAAAABA"); + assertEquals(AlphanumericSortIdGenerator.increment("AAAAAZZZ"), "AAAABAAA"); + } + + @Test + public void testWrapAround() { + assertEquals(AlphanumericSortIdGenerator.increment("ZZZZZZZZ"), INITIAL_VERSION_SORT_ID); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testInvalidLength() { + AlphanumericSortIdGenerator.increment("AAA"); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testInvalidCharacters() { + AlphanumericSortIdGenerator.increment("AAAA$AAA"); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testNullInput() { + AlphanumericSortIdGenerator.increment(null); + } + + @Test + public void testSequence() { + String id = "AAAAAAAA"; + id = AlphanumericSortIdGenerator.increment(id); + assertEquals(id, "AAAAAAAB"); + id = AlphanumericSortIdGenerator.increment(id); + assertEquals(id, "AAAAAAAC"); + id = AlphanumericSortIdGenerator.increment(id); + assertEquals(id, "AAAAAAAD"); + } + + @Test + public void testLowerBoundary() { + assertEquals(AlphanumericSortIdGenerator.increment(INITIAL_VERSION_SORT_ID), "AAAAAAAB"); + } + + @Test + public void testUpperBoundary() { + assertEquals(AlphanumericSortIdGenerator.increment("ZZZZZZZZ"), "AAAAAAAA"); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/EntityVersioningServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/EntityVersioningServiceTest.java new file mode 100644 index 00000000000000..8c4d81af129428 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/EntityVersioningServiceTest.java @@ -0,0 +1,603 @@ +package com.linkedin.metadata.entity.versioning; + +import static com.linkedin.metadata.Constants.INITIAL_VERSION_SORT_ID; +import static com.linkedin.metadata.Constants.VERSION_PROPERTIES_ASPECT_NAME; +import static com.linkedin.metadata.Constants.VERSION_SET_PROPERTIES_ASPECT_NAME; +import static org.mockito.Mockito.*; +import static org.testng.Assert.*; + +import com.linkedin.common.FabricType; +import com.linkedin.common.VersionProperties; +import com.linkedin.common.VersionTag; +import com.linkedin.common.urn.DataPlatformUrn; +import com.linkedin.common.urn.DatasetUrn; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.CachingAspectRetriever; +import com.linkedin.metadata.aspect.GraphRetriever; +import com.linkedin.metadata.aspect.SystemAspect; +import com.linkedin.metadata.aspect.batch.AspectsBatch; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.EntityServiceAspectRetriever; +import com.linkedin.metadata.entity.RollbackResult; +import com.linkedin.metadata.entity.RollbackRunResult; +import com.linkedin.metadata.entity.SearchRetriever; +import com.linkedin.metadata.entity.TestEntityRegistry; +import com.linkedin.metadata.models.registry.ConfigEntityRegistry; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.models.registry.EntityRegistryException; +import com.linkedin.metadata.models.registry.MergedEntityRegistry; +import com.linkedin.metadata.search.ScrollResult; +import com.linkedin.metadata.search.SearchEntity; +import com.linkedin.metadata.search.SearchEntityArray; +import com.linkedin.metadata.search.SearchResultMetadata; +import com.linkedin.metadata.snapshot.Snapshot; +import com.linkedin.versionset.VersionSetProperties; +import com.linkedin.versionset.VersioningScheme; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.metadata.context.RetrieverContext; +import io.datahubproject.test.metadata.context.TestOperationContexts; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Collectors; +import org.mockito.ArgumentCaptor; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class EntityVersioningServiceTest { + + private EntityVersioningServiceImpl versioningService; + private EntityService mockEntityService; + private OperationContext mockOpContext; + private AspectRetriever mockAspectRetriever; + private CachingAspectRetriever mockCachingAspectRetriever; + private SearchRetriever mockSearchRetriever; + private static Urn TEST_VERSION_SET_URN = UrnUtils.getUrn("urn:li:versionSet:(123456,dataset)"); + private static Urn TEST_DATASET_URN = + new DatasetUrn(new DataPlatformUrn("kafka"), "myDataset", FabricType.PROD); + private static Urn TEST_DATASET_URN_2 = + new DatasetUrn(new DataPlatformUrn("hive"), "myHiveDataset", FabricType.PROD); + private static Urn TEST_DATASET_URN_3 = + new DatasetUrn(new DataPlatformUrn("hive"), "myHiveDataset2", FabricType.PROD); + + @BeforeMethod + public void setup() throws EntityRegistryException { + mockEntityService = mock(EntityService.class); + final EntityRegistry snapshotEntityRegistry = new TestEntityRegistry(); + final EntityRegistry configEntityRegistry = + new ConfigEntityRegistry( + Snapshot.class.getClassLoader().getResourceAsStream("entity-registry.yml")); + final EntityRegistry testEntityRegistry = + new MergedEntityRegistry(snapshotEntityRegistry).apply(configEntityRegistry); + mockAspectRetriever = mock(EntityServiceAspectRetriever.class); + mockCachingAspectRetriever = mock(CachingAspectRetriever.class); + mockSearchRetriever = mock(SearchRetriever.class); + when(mockAspectRetriever.getEntityRegistry()).thenReturn(testEntityRegistry); + mockOpContext = + TestOperationContexts.systemContext( + null, + null, + null, + () -> testEntityRegistry, + () -> + RetrieverContext.builder() + .aspectRetriever(mockAspectRetriever) + .graphRetriever(GraphRetriever.EMPTY) + .searchRetriever(mockSearchRetriever) + .cachingAspectRetriever(mockCachingAspectRetriever) + .build(), + null, + opContext -> + ((EntityServiceAspectRetriever) opContext.getAspectRetriever()) + .setSystemOperationContext(opContext), + null); + versioningService = new EntityVersioningServiceImpl(mockEntityService); + } + + @Test + public void testLinkLatestVersionNewVersionSet() throws Exception { + + VersionPropertiesInput input = + new VersionPropertiesInput("Test comment", "Test label", 123456789L, "testCreator"); + // Mock version set doesn't exist + when(mockAspectRetriever.entityExists(anySet())) + .thenReturn(Map.of(TEST_VERSION_SET_URN, false)); + + // Capture the proposals + ArgumentCaptor aspectsCaptor = ArgumentCaptor.forClass(AspectsBatch.class); + when(mockEntityService.ingestProposal(eq(mockOpContext), aspectsCaptor.capture(), eq(false))) + .thenReturn(List.of()); + + // Execute + versioningService.linkLatestVersion( + mockOpContext, TEST_VERSION_SET_URN, TEST_DATASET_URN, input); + + // Verify + List capturedAspects = aspectsCaptor.getAllValues(); + List versionPropertiesAspect = + capturedAspects.get(0).getMCPItems().stream() + .filter(mcpItem -> VERSION_PROPERTIES_ASPECT_NAME.equals(mcpItem.getAspectName())) + .map(mcpItem -> mcpItem.getAspect(VersionProperties.class)) + .collect(Collectors.toList()); + + // Verify VersionProperties has initial sort ID + VersionProperties versionProps = + (VersionProperties) + versionPropertiesAspect.stream() + .filter(a -> a instanceof VersionProperties) + .findFirst() + .orElseThrow(() -> new AssertionError("VersionProperties not found")); + + assertEquals(versionProps.getSortId(), INITIAL_VERSION_SORT_ID); + assertEquals(versionProps.getComment(), "Test comment"); + assertEquals(versionProps.getVersionSet(), TEST_VERSION_SET_URN); + + List versionSetPropertiesAspect = + capturedAspects.get(0).getMCPItems().stream() + .filter(mcpItem -> VERSION_SET_PROPERTIES_ASPECT_NAME.equals(mcpItem.getAspectName())) + .map(mcpItem -> mcpItem.getAspect(VersionSetProperties.class)) + .collect(Collectors.toList()); + VersionSetProperties versionSetProperties = + (VersionSetProperties) + versionSetPropertiesAspect.stream() + .filter(aspect -> aspect instanceof VersionSetProperties) + .findFirst() + .orElseThrow(() -> new AssertionError("Version Set Properties not found")); + assertEquals(versionSetProperties.getLatest(), TEST_DATASET_URN); + assertEquals( + versionSetProperties.getVersioningScheme(), + VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB); + } + + @Test + public void testLinkLatestVersionExistingVersionSet() throws Exception { + + VersionPropertiesInput input = + new VersionPropertiesInput("Test comment", "Label2", 123456789L, "testCreator"); + + // Mock version set exists + when(mockAspectRetriever.entityExists(anySet())).thenReturn(Map.of(TEST_VERSION_SET_URN, true)); + + // Mock existing version set properties + VersionSetProperties existingVersionSetProps = + new VersionSetProperties() + .setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB) + .setLatest(TEST_DATASET_URN); + SystemAspect mockVersionSetPropertiesAspect = mock(SystemAspect.class); + when(mockVersionSetPropertiesAspect.getRecordTemplate()).thenReturn(existingVersionSetProps); + when(mockVersionSetPropertiesAspect.getSystemMetadataVersion()).thenReturn(Optional.of(1L)); + when(mockAspectRetriever.getLatestSystemAspect(eq(TEST_VERSION_SET_URN), anyString())) + .thenReturn(mockVersionSetPropertiesAspect); + + // Mock existing version properties with a sort ID + VersionProperties existingVersionProps = + new VersionProperties() + .setSortId("AAAAAAAA") + .setVersion(new VersionTag().setVersionTag("Label1")) + .setVersionSet(TEST_VERSION_SET_URN); + SystemAspect mockVersionPropertiesAspect = mock(SystemAspect.class); + when(mockVersionPropertiesAspect.getRecordTemplate()).thenReturn(existingVersionProps); + when(mockVersionPropertiesAspect.getSystemMetadataVersion()).thenReturn(Optional.of(1L)); + when(mockAspectRetriever.getLatestSystemAspect(eq(TEST_DATASET_URN), anyString())) + .thenReturn(mockVersionPropertiesAspect); + + // Capture the proposals + ArgumentCaptor aspectsCaptor = ArgumentCaptor.forClass(AspectsBatch.class); + when(mockEntityService.ingestProposal(eq(mockOpContext), aspectsCaptor.capture(), eq(false))) + .thenReturn(List.of()); + + // Execute + versioningService.linkLatestVersion( + mockOpContext, TEST_VERSION_SET_URN, TEST_DATASET_URN_2, input); + + // Verify + List capturedAspects = aspectsCaptor.getAllValues(); + List aspects = + capturedAspects.get(0).getMCPItems().stream() + .filter(mcpItem -> VERSION_PROPERTIES_ASPECT_NAME.equals(mcpItem.getAspectName())) + .map(mcpItem -> mcpItem.getAspect(VersionProperties.class)) + .collect(Collectors.toList()); + + // Verify VersionProperties has incremented sort ID + VersionProperties versionProps = + (VersionProperties) + aspects.stream() + .filter(a -> a instanceof VersionProperties) + .findFirst() + .orElseThrow(() -> new AssertionError("VersionProperties not found")); + + assertEquals(versionProps.getSortId(), "AAAAAAAB"); + assertEquals(versionProps.getComment(), "Test comment"); + assertEquals(versionProps.getVersionSet(), TEST_VERSION_SET_URN); + } + + @Test + public void testUnlinkInitialVersion() throws Exception { + + // Mock version properties aspect + VersionProperties versionProps = + new VersionProperties() + .setVersionSet(TEST_VERSION_SET_URN) + .setSortId(INITIAL_VERSION_SORT_ID); + SystemAspect mockVersionPropsAspect = mock(SystemAspect.class); + when(mockVersionPropsAspect.getRecordTemplate()).thenReturn(versionProps); + when(mockVersionPropsAspect.getSystemMetadataVersion()).thenReturn(Optional.of(1L)); + when(mockAspectRetriever.getLatestSystemAspect( + eq(TEST_DATASET_URN), eq(VERSION_PROPERTIES_ASPECT_NAME))) + .thenReturn(mockVersionPropsAspect); + VersionSetProperties versionSetProps = + new VersionSetProperties() + .setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB) + .setLatest(TEST_DATASET_URN); + SystemAspect mockVersionSetPropsAspect = mock(SystemAspect.class); + when(mockVersionSetPropsAspect.getRecordTemplate()).thenReturn(versionSetProps); + when(mockVersionSetPropsAspect.getSystemMetadataVersion()).thenReturn(Optional.of(1L)); + when(mockAspectRetriever.getLatestSystemAspect( + eq(TEST_VERSION_SET_URN), eq(VERSION_SET_PROPERTIES_ASPECT_NAME))) + .thenReturn(mockVersionSetPropsAspect); + + // Mock delete aspect responses + RollbackResult versionSetDeleteResult = + new RollbackResult( + TEST_VERSION_SET_URN, + "versionSet", + VERSION_SET_PROPERTIES_ASPECT_NAME, + null, + null, + null, + null, + null, + false, + 0); + RollbackRunResult rollbackRunResult = + new RollbackRunResult(new ArrayList<>(), 1, List.of(versionSetDeleteResult)); + RollbackResult versionPropsDeleteResult = + new RollbackResult( + TEST_DATASET_URN, + "dataset", + VERSION_PROPERTIES_ASPECT_NAME, + null, + null, + null, + null, + null, + false, + 0); + + when(mockEntityService.deleteUrn(eq(mockOpContext), eq(TEST_VERSION_SET_URN))) + .thenReturn(rollbackRunResult); + when(mockEntityService.deleteAspect( + eq(mockOpContext), anyString(), eq(VERSION_PROPERTIES_ASPECT_NAME), anyMap(), eq(true))) + .thenReturn(Optional.of(versionPropsDeleteResult)); + + // Mock graph retriever response + SearchEntityArray relatedEntities = new SearchEntityArray(); + relatedEntities.add(new SearchEntity().setEntity(TEST_DATASET_URN)); + + ScrollResult scrollResult = + new ScrollResult().setEntities(relatedEntities).setMetadata(new SearchResultMetadata()); + when(mockSearchRetriever.scroll(any(), any(), any(), eq(2), any(), any())) + .thenReturn(scrollResult); + + // Execute + List results = + versioningService.unlinkVersion(mockOpContext, TEST_VERSION_SET_URN, TEST_DATASET_URN); + + // Verify + assertEquals(results.size(), 2); + verify(mockEntityService).deleteUrn(eq(mockOpContext), eq(TEST_VERSION_SET_URN)); + verify(mockEntityService) + .deleteAspect( + eq(mockOpContext), + eq(TEST_DATASET_URN.toString()), + eq(VERSION_PROPERTIES_ASPECT_NAME), + anyMap(), + eq(true)); + verify(mockSearchRetriever, never()).scroll(any(), any(), anyString(), anyInt(), any(), any()); + } + + @Test + public void testUnlinkLatestVersionWithPriorVersion() throws Exception { + + // Mock version properties aspect + VersionProperties versionProps = + new VersionProperties() + .setVersionSet(TEST_VERSION_SET_URN) + .setSortId("AAAAAAAB"); // Not initial version + SystemAspect mockVersionPropsAspect = mock(SystemAspect.class); + when(mockVersionPropsAspect.getRecordTemplate()).thenReturn(versionProps); + when(mockVersionPropsAspect.getSystemMetadataVersion()).thenReturn(Optional.of(1L)); + when(mockAspectRetriever.getLatestSystemAspect( + eq(TEST_DATASET_URN), eq(VERSION_PROPERTIES_ASPECT_NAME))) + .thenReturn(mockVersionPropsAspect); + + VersionSetProperties versionSetProps = + new VersionSetProperties() + .setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB) + .setLatest(TEST_DATASET_URN); + SystemAspect mockVersionSetPropsAspect = mock(SystemAspect.class); + when(mockVersionSetPropsAspect.getRecordTemplate()).thenReturn(versionSetProps); + when(mockVersionSetPropsAspect.getSystemMetadataVersion()).thenReturn(Optional.of(1L)); + when(mockAspectRetriever.getLatestSystemAspect( + eq(TEST_VERSION_SET_URN), eq(VERSION_SET_PROPERTIES_ASPECT_NAME))) + .thenReturn(mockVersionSetPropsAspect); + + // Mock graph retriever response + SearchEntityArray relatedEntities = new SearchEntityArray(); + relatedEntities.add(new SearchEntity().setEntity(TEST_DATASET_URN)); + relatedEntities.add(new SearchEntity().setEntity(TEST_DATASET_URN_2)); + + ScrollResult scrollResult = + new ScrollResult().setEntities(relatedEntities).setMetadata(new SearchResultMetadata()); + when(mockSearchRetriever.scroll(any(), any(), any(), eq(2), any(), any())) + .thenReturn(scrollResult); + + // Mock delete aspect response + RollbackResult versionPropsDeleteResult = + new RollbackResult( + TEST_DATASET_URN, + "dataset", + VERSION_PROPERTIES_ASPECT_NAME, + null, + null, + null, + null, + null, + false, + 0); + when(mockEntityService.deleteAspect( + eq(mockOpContext), anyString(), eq(VERSION_PROPERTIES_ASPECT_NAME), anyMap(), eq(true))) + .thenReturn(Optional.of(versionPropsDeleteResult)); + + // Execute + List results = + versioningService.unlinkVersion(mockOpContext, TEST_VERSION_SET_URN, TEST_DATASET_URN); + + // Verify + assertEquals(results.size(), 1); + verify(mockEntityService) + .deleteAspect( + eq(mockOpContext), + eq(TEST_DATASET_URN.toString()), + eq(VERSION_PROPERTIES_ASPECT_NAME), + anyMap(), + eq(true)); + verify(mockEntityService).ingestProposal(eq(mockOpContext), any(), eq(false)); + verify(mockEntityService, never()).deleteUrn(eq(mockOpContext), eq(TEST_VERSION_SET_URN)); + } + + @Test + public void testUnlinkNotLatestVersionWithPriorVersion() throws Exception { + + // Mock version properties aspect + VersionProperties versionProps = + new VersionProperties() + .setVersionSet(TEST_VERSION_SET_URN) + .setSortId("AAAAAAAB"); // Not initial version + SystemAspect mockVersionPropsAspect = mock(SystemAspect.class); + when(mockVersionPropsAspect.getRecordTemplate()).thenReturn(versionProps); + when(mockVersionPropsAspect.getSystemMetadataVersion()).thenReturn(Optional.of(1L)); + when(mockAspectRetriever.getLatestSystemAspect( + eq(TEST_DATASET_URN_2), eq(VERSION_PROPERTIES_ASPECT_NAME))) + .thenReturn(mockVersionPropsAspect); + + VersionSetProperties versionSetProps = + new VersionSetProperties() + .setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB) + .setLatest(TEST_DATASET_URN); + SystemAspect mockVersionSetPropsAspect = mock(SystemAspect.class); + when(mockVersionSetPropsAspect.getRecordTemplate()).thenReturn(versionSetProps); + when(mockVersionSetPropsAspect.getSystemMetadataVersion()).thenReturn(Optional.of(1L)); + when(mockAspectRetriever.getLatestSystemAspect( + eq(TEST_VERSION_SET_URN), eq(VERSION_SET_PROPERTIES_ASPECT_NAME))) + .thenReturn(mockVersionSetPropsAspect); + + // Mock graph retriever response + SearchEntityArray relatedEntities = new SearchEntityArray(); + relatedEntities.add(new SearchEntity().setEntity(TEST_DATASET_URN)); + relatedEntities.add(new SearchEntity().setEntity(TEST_DATASET_URN_2)); + + ScrollResult scrollResult = + new ScrollResult().setEntities(relatedEntities).setMetadata(new SearchResultMetadata()); + when(mockSearchRetriever.scroll(any(), any(), any(), eq(2), any(), any())) + .thenReturn(scrollResult); + + // Mock delete aspect response + RollbackResult versionPropsDeleteResult = + new RollbackResult( + TEST_DATASET_URN_2, + "dataset", + VERSION_PROPERTIES_ASPECT_NAME, + null, + null, + null, + null, + null, + false, + 0); + when(mockEntityService.deleteAspect( + eq(mockOpContext), + eq(TEST_DATASET_URN_2.toString()), + eq(VERSION_PROPERTIES_ASPECT_NAME), + anyMap(), + eq(true))) + .thenReturn(Optional.of(versionPropsDeleteResult)); + + // Execute + List results = + versioningService.unlinkVersion(mockOpContext, TEST_VERSION_SET_URN, TEST_DATASET_URN_2); + + // Verify + assertEquals(results.size(), 1); + verify(mockEntityService) + .deleteAspect( + eq(mockOpContext), + eq(TEST_DATASET_URN_2.toString()), + eq(VERSION_PROPERTIES_ASPECT_NAME), + anyMap(), + eq(true)); + verify(mockEntityService, never()).deleteUrn(eq(mockOpContext), eq(TEST_VERSION_SET_URN)); + } + + @Test + public void testUnlinkNotReturnedSingleVersionWithPriorVersion() throws Exception { + + // Mock version properties aspect + VersionProperties versionProps = + new VersionProperties() + .setVersionSet(TEST_VERSION_SET_URN) + .setSortId("AAAAAAAB"); // Not initial version + SystemAspect mockVersionPropsAspect = mock(SystemAspect.class); + when(mockVersionPropsAspect.getRecordTemplate()).thenReturn(versionProps); + when(mockVersionPropsAspect.getSystemMetadataVersion()).thenReturn(Optional.of(1L)); + when(mockAspectRetriever.getLatestSystemAspect( + eq(TEST_DATASET_URN_2), eq(VERSION_PROPERTIES_ASPECT_NAME))) + .thenReturn(mockVersionPropsAspect); + + VersionSetProperties versionSetProps = + new VersionSetProperties() + .setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB) + .setLatest(TEST_DATASET_URN_2); + SystemAspect mockVersionSetPropsAspect = mock(SystemAspect.class); + when(mockVersionSetPropsAspect.getRecordTemplate()).thenReturn(versionSetProps); + when(mockVersionSetPropsAspect.getSystemMetadataVersion()).thenReturn(Optional.of(1L)); + when(mockAspectRetriever.getLatestSystemAspect( + eq(TEST_VERSION_SET_URN), eq(VERSION_SET_PROPERTIES_ASPECT_NAME))) + .thenReturn(mockVersionSetPropsAspect); + + // Mock graph retriever response + SearchEntityArray relatedEntities = new SearchEntityArray(); + relatedEntities.add(new SearchEntity().setEntity(TEST_DATASET_URN)); + + ScrollResult scrollResult = + new ScrollResult().setEntities(relatedEntities).setMetadata(new SearchResultMetadata()); + when(mockSearchRetriever.scroll(any(), any(), any(), eq(2), any(), any())) + .thenReturn(scrollResult); + + // Mock delete aspect response + RollbackResult versionPropsDeleteResult = + new RollbackResult( + TEST_DATASET_URN_2, + "dataset", + VERSION_PROPERTIES_ASPECT_NAME, + null, + null, + null, + null, + null, + false, + 0); + when(mockEntityService.deleteAspect( + eq(mockOpContext), anyString(), eq(VERSION_PROPERTIES_ASPECT_NAME), anyMap(), eq(true))) + .thenReturn(Optional.of(versionPropsDeleteResult)); + + // Execute + List results = + versioningService.unlinkVersion(mockOpContext, TEST_VERSION_SET_URN, TEST_DATASET_URN_2); + + // Verify + assertEquals(results.size(), 1); + verify(mockEntityService) + .deleteAspect( + eq(mockOpContext), + eq(TEST_DATASET_URN_2.toString()), + eq(VERSION_PROPERTIES_ASPECT_NAME), + anyMap(), + eq(true)); + verify(mockEntityService).ingestProposal(eq(mockOpContext), any(), eq(false)); + verify(mockEntityService, never()).deleteUrn(eq(mockOpContext), eq(TEST_VERSION_SET_URN)); + } + + @Test + public void testUnlinkNotReturnedDoubleVersionWithPriorVersion() throws Exception { + + // Mock version properties aspect + VersionProperties versionProps = + new VersionProperties() + .setVersionSet(TEST_VERSION_SET_URN) + .setSortId("AAAAAAAB"); // Not initial version + SystemAspect mockVersionPropsAspect = mock(SystemAspect.class); + when(mockVersionPropsAspect.getRecordTemplate()).thenReturn(versionProps); + when(mockVersionPropsAspect.getSystemMetadataVersion()).thenReturn(Optional.of(1L)); + when(mockAspectRetriever.getLatestSystemAspect( + eq(TEST_DATASET_URN_3), eq(VERSION_PROPERTIES_ASPECT_NAME))) + .thenReturn(mockVersionPropsAspect); + + VersionSetProperties versionSetProps = + new VersionSetProperties() + .setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB) + .setLatest(TEST_DATASET_URN_3); + SystemAspect mockVersionSetPropsAspect = mock(SystemAspect.class); + when(mockVersionSetPropsAspect.getRecordTemplate()).thenReturn(versionSetProps); + when(mockVersionSetPropsAspect.getSystemMetadataVersion()).thenReturn(Optional.of(1L)); + when(mockAspectRetriever.getLatestSystemAspect( + eq(TEST_VERSION_SET_URN), eq(VERSION_SET_PROPERTIES_ASPECT_NAME))) + .thenReturn(mockVersionSetPropsAspect); + + // Mock graph retriever response + SearchEntityArray relatedEntities = new SearchEntityArray(); + relatedEntities.add(new SearchEntity().setEntity(TEST_DATASET_URN)); + relatedEntities.add(new SearchEntity().setEntity(TEST_DATASET_URN_2)); + + ScrollResult scrollResult = + new ScrollResult().setEntities(relatedEntities).setMetadata(new SearchResultMetadata()); + when(mockSearchRetriever.scroll(any(), any(), any(), eq(2), any(), any())) + .thenReturn(scrollResult); + + // Mock delete aspect response + RollbackResult versionPropsDeleteResult = + new RollbackResult( + TEST_DATASET_URN_3, + "dataset", + VERSION_PROPERTIES_ASPECT_NAME, + null, + null, + null, + null, + null, + false, + 0); + when(mockEntityService.deleteAspect( + eq(mockOpContext), anyString(), eq(VERSION_PROPERTIES_ASPECT_NAME), anyMap(), eq(true))) + .thenReturn(Optional.of(versionPropsDeleteResult)); + + // Execute + List results = + versioningService.unlinkVersion(mockOpContext, TEST_VERSION_SET_URN, TEST_DATASET_URN_3); + + // Verify + assertEquals(results.size(), 1); + verify(mockEntityService) + .deleteAspect( + eq(mockOpContext), + eq(TEST_DATASET_URN_3.toString()), + eq(VERSION_PROPERTIES_ASPECT_NAME), + anyMap(), + eq(true)); + verify(mockEntityService).ingestProposal(eq(mockOpContext), any(), eq(false)); + verify(mockEntityService, never()).deleteUrn(eq(mockOpContext), eq(TEST_VERSION_SET_URN)); + } + + @Test + public void testUnlinkNonVersionedEntity() throws Exception { + + // Mock no version properties aspect + when(mockAspectRetriever.getLatestSystemAspect( + eq(TEST_DATASET_URN), eq(VERSION_PROPERTIES_ASPECT_NAME))) + .thenReturn(null); + + // Execute + List results = + versioningService.unlinkVersion(mockOpContext, TEST_VERSION_SET_URN, TEST_DATASET_URN); + + // Verify + assertTrue(results.isEmpty()); + verify(mockEntityService, never()).deleteAspect(any(), any(), any(), any(), anyBoolean()); + verify(mockEntityService, never()).deleteUrn(any(), any()); + verify(mockSearchRetriever, never()).scroll(any(), any(), anyString(), anyInt(), any(), any()); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/sideeffects/VersionSetSideEffectTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/sideeffects/VersionSetSideEffectTest.java new file mode 100644 index 00000000000000..35445efaedc607 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/sideeffects/VersionSetSideEffectTest.java @@ -0,0 +1,229 @@ +package com.linkedin.metadata.entity.versioning.sideeffects; + +import static com.linkedin.metadata.Constants.DATASET_ENTITY_NAME; +import static com.linkedin.metadata.Constants.GLOBAL_TAGS_ASPECT_NAME; +import static com.linkedin.metadata.Constants.VERSION_SET_ENTITY_NAME; +import static com.linkedin.metadata.Constants.VERSION_SET_PROPERTIES_ASPECT_NAME; +import static org.mockito.Mockito.mock; +import static org.testng.Assert.assertEquals; + +import com.linkedin.common.GlobalTags; +import com.linkedin.common.TagAssociationArray; +import com.linkedin.common.VersionProperties; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.metadata.aspect.GraphRetriever; +import com.linkedin.metadata.aspect.batch.MCPItem; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.entity.SearchRetriever; +import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; +import com.linkedin.metadata.entity.ebean.batch.MCLItemImpl; +import com.linkedin.metadata.entity.ebean.batch.PatchItemImpl; +import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.metadata.utils.AuditStampUtils; +import com.linkedin.test.metadata.aspect.MockAspectRetriever; +import com.linkedin.test.metadata.aspect.TestEntityRegistry; +import com.linkedin.versionset.VersionSetProperties; +import com.linkedin.versionset.VersioningScheme; +import io.datahubproject.metadata.context.RetrieverContext; +import jakarta.json.JsonObject; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class VersionSetSideEffectTest { + private static final TestEntityRegistry TEST_REGISTRY = new TestEntityRegistry(); + private static final Urn TEST_VERSION_SET_URN = + UrnUtils.getUrn("urn:li:versionSet:(123456,dataset)"); + private static final Urn PREVIOUS_LATEST_URN = + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)"); + private static final Urn NEW_LATEST_URN = + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDatasetV2,PROD)"); + + private static final AspectPluginConfig TEST_PLUGIN_CONFIG = + AspectPluginConfig.builder() + .className(VersionSetSideEffect.class.getName()) + .enabled(true) + .supportedOperations( + List.of("CREATE", "PATCH", "CREATE_ENTITY", "UPSERT", "DELETE", "RESTATE")) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .aspectName(VERSION_SET_PROPERTIES_ASPECT_NAME) + .entityName(VERSION_SET_ENTITY_NAME) + .build())) + .build(); + + private MockAspectRetriever mockAspectRetriever; + private RetrieverContext retrieverContext; + private VersionSetSideEffect sideEffect; + + @BeforeMethod + public void setup() { + GraphRetriever graphRetriever = mock(GraphRetriever.class); + VersionProperties existingProperties = + new VersionProperties() + .setVersionSet(TEST_VERSION_SET_URN) + .setIsLatest(false) + .setSortId("AAAAAAAA"); + VersionProperties previousLatestProperties = + new VersionProperties() + .setVersionSet(TEST_VERSION_SET_URN) + .setIsLatest(true) + .setSortId("AAAAAAAB"); + Map> data = new HashMap<>(); + data.put(NEW_LATEST_URN, Collections.singletonList(existingProperties)); + data.put(PREVIOUS_LATEST_URN, Collections.singletonList(previousLatestProperties)); + mockAspectRetriever = new MockAspectRetriever(data); + mockAspectRetriever.setEntityRegistry(TEST_REGISTRY); + + retrieverContext = + RetrieverContext.builder() + .searchRetriever(mock(SearchRetriever.class)) + .aspectRetriever(mockAspectRetriever) + .graphRetriever(graphRetriever) + .build(); + + sideEffect = new VersionSetSideEffect(); + sideEffect.setConfig(TEST_PLUGIN_CONFIG); + } + + @Test + public void testUpdateLatestVersion() { + // Create previous version set properties with different latest + VersionSetProperties previousProperties = new VersionSetProperties(); + previousProperties.setLatest(PREVIOUS_LATEST_URN); + previousProperties.setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB); + + // Create new version set properties + VersionSetProperties newProperties = new VersionSetProperties(); + newProperties.setLatest(NEW_LATEST_URN); + newProperties.setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB); + + EntitySpec entitySpec = TEST_REGISTRY.getEntitySpec(VERSION_SET_ENTITY_NAME); + + // Create change item + ChangeItemImpl changeItem = + ChangeItemImpl.builder() + .urn(TEST_VERSION_SET_URN) + .aspectName(VERSION_SET_PROPERTIES_ASPECT_NAME) + .entitySpec(entitySpec) + .aspectSpec(entitySpec.getAspectSpec(VERSION_SET_PROPERTIES_ASPECT_NAME)) + .recordTemplate(newProperties) + .auditStamp(AuditStampUtils.createDefaultAuditStamp()) + .build(mockAspectRetriever); + + // Create MCL item with previous aspect + MCLItemImpl mclItem = + MCLItemImpl.builder() + .previousRecordTemplate(previousProperties) + .build(changeItem, previousProperties, null, retrieverContext.getAspectRetriever()); + + // Run side effect + List sideEffectResults = + sideEffect + .postMCPSideEffect(Collections.singletonList(mclItem), retrieverContext) + .collect(Collectors.toList()); + + // Verify results + assertEquals(sideEffectResults.size(), 2, "Expected two patch operations"); + + // Verify patch for previous latest version + MCPItem previousPatch = sideEffectResults.get(0); + assertEquals(previousPatch.getUrn(), PREVIOUS_LATEST_URN); + JsonObject previousPatchOp = + ((PatchItemImpl) previousPatch).getPatch().toJsonArray().getJsonObject(0); + assertEquals(previousPatchOp.getString("op"), "add"); + assertEquals(previousPatchOp.getString("path"), "/isLatest"); + assertEquals(previousPatchOp.getBoolean("value"), false); + + // Verify patch for new latest version + MCPItem newPatch = sideEffectResults.get(1); + assertEquals(newPatch.getUrn(), NEW_LATEST_URN); + JsonObject newPatchOp = ((PatchItemImpl) newPatch).getPatch().toJsonArray().getJsonObject(0); + assertEquals(newPatchOp.getString("op"), "add"); + assertEquals(newPatchOp.getString("path"), "/isLatest"); + assertEquals(newPatchOp.getBoolean("value"), true); + } + + @Test + public void testNoChangesWhenLatestRemainsSame() { + // Create version set properties with same latest + VersionSetProperties previousProperties = new VersionSetProperties(); + previousProperties.setLatest(NEW_LATEST_URN); + previousProperties.setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB); + + VersionSetProperties newProperties = new VersionSetProperties(); + newProperties.setLatest(NEW_LATEST_URN); + newProperties.setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB); + + EntitySpec entitySpec = TEST_REGISTRY.getEntitySpec(VERSION_SET_ENTITY_NAME); + + // Create change item + ChangeItemImpl changeItem = + ChangeItemImpl.builder() + .urn(TEST_VERSION_SET_URN) + .aspectName(VERSION_SET_PROPERTIES_ASPECT_NAME) + .entitySpec(entitySpec) + .aspectSpec(entitySpec.getAspectSpec(VERSION_SET_PROPERTIES_ASPECT_NAME)) + .recordTemplate(newProperties) + .auditStamp(AuditStampUtils.createDefaultAuditStamp()) + .build(mockAspectRetriever); + + // Create MCL item with previous aspect + MCLItemImpl mclItem = + MCLItemImpl.builder() + .previousRecordTemplate(previousProperties) + .build(changeItem, null, null, retrieverContext.getAspectRetriever()); + + // Run side effect + List sideEffectResults = + sideEffect + .postMCPSideEffect(Collections.singletonList(mclItem), retrieverContext) + .collect(Collectors.toList()); + + // Verify results - should still get one patch to set isLatest=true on current latest + assertEquals(sideEffectResults.size(), 1, "Expected one patch operation"); + + // Verify patch operation + MCPItem patch = sideEffectResults.get(0); + assertEquals(patch.getUrn(), NEW_LATEST_URN); + JsonObject patchOp = ((PatchItemImpl) patch).getPatch().toJsonArray().getJsonObject(0); + assertEquals(patchOp.getString("op"), "add"); + assertEquals(patchOp.getString("path"), "/isLatest"); + assertEquals(patchOp.getBoolean("value"), true); + } + + @Test + public void testNoChangesForNonVersionSetProperties() { + // Create some other type of aspect change + EntitySpec entitySpec = TEST_REGISTRY.getEntitySpec(DATASET_ENTITY_NAME); + ChangeItemImpl changeItem = + ChangeItemImpl.builder() + .urn(PREVIOUS_LATEST_URN) + .aspectName(GLOBAL_TAGS_ASPECT_NAME) + .entitySpec(entitySpec) + .aspectSpec(entitySpec.getAspectSpec(GLOBAL_TAGS_ASPECT_NAME)) + .recordTemplate(new GlobalTags().setTags(new TagAssociationArray())) + .auditStamp(AuditStampUtils.createDefaultAuditStamp()) + .build(mockAspectRetriever); + + MCLItemImpl mclItem = + MCLItemImpl.builder().build(changeItem, null, null, retrieverContext.getAspectRetriever()); + + // Run side effect + List sideEffectResults = + sideEffect + .postMCPSideEffect(Collections.singletonList(mclItem), retrieverContext) + .collect(Collectors.toList()); + + // Verify no changes for non-version set properties aspects + assertEquals( + sideEffectResults.size(), 0, "Expected no changes for non-version set properties aspect"); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/validation/VersionPropertiesValidatorTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/validation/VersionPropertiesValidatorTest.java new file mode 100644 index 00000000000000..4afd05c98a9312 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/validation/VersionPropertiesValidatorTest.java @@ -0,0 +1,165 @@ +package com.linkedin.metadata.entity.versioning.validation; + +import static com.linkedin.metadata.Constants.CHART_ENTITY_NAME; + +import com.linkedin.common.VersionProperties; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.metadata.aspect.GraphRetriever; +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; +import com.linkedin.metadata.entity.SearchRetriever; +import com.linkedin.metadata.key.VersionSetKey; +import com.linkedin.test.metadata.aspect.MockAspectRetriever; +import com.linkedin.test.metadata.aspect.TestEntityRegistry; +import com.linkedin.test.metadata.aspect.batch.TestMCP; +import com.linkedin.versionset.VersionSetProperties; +import com.linkedin.versionset.VersioningScheme; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Stream; +import org.mockito.Mockito; +import org.testng.Assert; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class VersionPropertiesValidatorTest { + + private static final String ENTITY_TYPE = "dataset"; + private static final Urn TEST_VERSION_SET_URN = + UrnUtils.getUrn("urn:li:versionSet:(12356,dataset)"); + private static final Urn TEST_ENTITY_URN = + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)"); + + private SearchRetriever mockSearchRetriever; + private MockAspectRetriever mockAspectRetriever; + private GraphRetriever mockGraphRetriever; + private RetrieverContext retrieverContext; + + @BeforeMethod + public void setup() { + mockSearchRetriever = Mockito.mock(SearchRetriever.class); + mockGraphRetriever = Mockito.mock(GraphRetriever.class); + + // Create version set key and properties + VersionSetKey versionSetKey = new VersionSetKey(); + versionSetKey.setEntityType(ENTITY_TYPE); + + VersionSetProperties versionSetProperties = new VersionSetProperties(); + versionSetProperties.setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB); + + // Initialize mock aspect retriever with version set data + Map> data = new HashMap<>(); + data.put(TEST_VERSION_SET_URN, Arrays.asList(versionSetKey, versionSetProperties)); + mockAspectRetriever = new MockAspectRetriever(data); + + retrieverContext = + io.datahubproject.metadata.context.RetrieverContext.builder() + .aspectRetriever(mockAspectRetriever) + .searchRetriever(mockSearchRetriever) + .graphRetriever(mockGraphRetriever) + .build(); + } + + @Test + public void testValidVersionProperties() { + VersionProperties properties = new VersionProperties(); + properties.setVersionSet(TEST_VERSION_SET_URN); + properties.setSortId("ABCDEFGH"); // Valid 8-char uppercase alpha + + Stream validationResult = + VersionPropertiesValidator.validatePropertiesUpserts( + TestMCP.ofOneUpsertItem(TEST_ENTITY_URN, properties, new TestEntityRegistry()), + retrieverContext); + + Assert.assertTrue(validationResult.findAny().isEmpty()); + } + + @Test + public void testInvalidSortId() { + VersionProperties properties = new VersionProperties(); + properties.setVersionSet(TEST_VERSION_SET_URN); + properties.setSortId("123"); // Invalid - not 8 chars, not alpha + + Stream validationResult = + VersionPropertiesValidator.validatePropertiesUpserts( + TestMCP.ofOneUpsertItem(TEST_ENTITY_URN, properties, new TestEntityRegistry()), + retrieverContext); + + AspectValidationException exception = validationResult.findAny().get(); + Assert.assertNotNull(exception); + Assert.assertTrue(exception.getMessage().contains("Invalid sortID for Versioning Scheme")); + } + + @Test + public void testNonexistentVersionSet() { + Urn nonexistentUrn = UrnUtils.getUrn("urn:li:versionSet:(nonexistent,dataset)"); + + VersionProperties properties = new VersionProperties(); + properties.setVersionSet(nonexistentUrn); + properties.setSortId("ABCDEFGH"); + + Stream validationResult = + VersionPropertiesValidator.validatePropertiesUpserts( + TestMCP.ofOneUpsertItem(TEST_ENTITY_URN, properties, new TestEntityRegistry()), + retrieverContext); + + AspectValidationException exception = validationResult.findAny().get(); + Assert.assertNotNull(exception); + Assert.assertTrue(exception.getMessage().contains("Version Set specified does not exist")); + } + + @Test + public void testEntityTypeMismatch() { + // Create version set with different entity type + VersionSetKey wrongTypeKey = new VersionSetKey(); + wrongTypeKey.setEntityType(CHART_ENTITY_NAME); + + VersionSetProperties versionSetProperties = new VersionSetProperties(); + versionSetProperties.setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB); + + Map> data = new HashMap<>(); + data.put(TEST_VERSION_SET_URN, Arrays.asList(wrongTypeKey, versionSetProperties)); + mockAspectRetriever = new MockAspectRetriever(data); + + retrieverContext = + io.datahubproject.metadata.context.RetrieverContext.builder() + .aspectRetriever(mockAspectRetriever) + .searchRetriever(mockSearchRetriever) + .graphRetriever(mockGraphRetriever) + .build(); + + VersionProperties properties = new VersionProperties(); + properties.setVersionSet(TEST_VERSION_SET_URN); + properties.setSortId("ABCDEFGH"); + + Stream validationResult = + VersionPropertiesValidator.validatePropertiesUpserts( + TestMCP.ofOneUpsertItem(TEST_ENTITY_URN, properties, new TestEntityRegistry()), + retrieverContext); + + AspectValidationException exception = validationResult.findAny().get(); + Assert.assertNotNull(exception); + Assert.assertTrue( + exception.getMessage().contains("Version Set specified entity type does not match")); + } + + @Test + public void testIsLatestFieldSpecified() { + VersionProperties properties = new VersionProperties(); + properties.setVersionSet(TEST_VERSION_SET_URN); + properties.setSortId("ABCDEFGH"); + properties.setIsLatest(true); // Should not be specified + + Stream validationResult = + VersionPropertiesValidator.validatePropertiesProposals( + TestMCP.ofOneUpsertItem(TEST_ENTITY_URN, properties, new TestEntityRegistry())); + + AspectValidationException exception = validationResult.findAny().get(); + Assert.assertNotNull(exception); + Assert.assertTrue(exception.getMessage().contains("IsLatest should not be specified")); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/validation/VersionSetPropertiesValidatorTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/validation/VersionSetPropertiesValidatorTest.java new file mode 100644 index 00000000000000..c91495271f6149 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/validation/VersionSetPropertiesValidatorTest.java @@ -0,0 +1,139 @@ +package com.linkedin.metadata.entity.versioning.validation; + +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.metadata.aspect.GraphRetriever; +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; +import com.linkedin.metadata.entity.SearchRetriever; +import com.linkedin.test.metadata.aspect.MockAspectRetriever; +import com.linkedin.test.metadata.aspect.TestEntityRegistry; +import com.linkedin.test.metadata.aspect.batch.TestMCP; +import com.linkedin.versionset.VersionSetProperties; +import com.linkedin.versionset.VersioningScheme; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Stream; +import org.mockito.Mockito; +import org.testng.Assert; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class VersionSetPropertiesValidatorTest { + + private static final Urn TEST_VERSION_SET_URN = + UrnUtils.getUrn("urn:li:versionSet:(123456,dataset)"); + + private SearchRetriever mockSearchRetriever; + private MockAspectRetriever mockAspectRetriever; + private GraphRetriever mockGraphRetriever; + private RetrieverContext retrieverContext; + + @BeforeMethod + public void setup() { + mockSearchRetriever = Mockito.mock(SearchRetriever.class); + mockGraphRetriever = Mockito.mock(GraphRetriever.class); + + Map> emptyData = new HashMap<>(); + mockAspectRetriever = new MockAspectRetriever(emptyData); + + retrieverContext = + io.datahubproject.metadata.context.RetrieverContext.builder() + .aspectRetriever(mockAspectRetriever) + .searchRetriever(mockSearchRetriever) + .graphRetriever(mockGraphRetriever) + .build(); + } + + @Test + public void testValidUpsertWithNoExistingProperties() { + // Create version set properties + VersionSetProperties properties = + new VersionSetProperties() + .setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB); + + // Test validation with no existing properties + Stream validationResult = + VersionSetPropertiesValidator.validatePropertiesUpserts( + TestMCP.ofOneUpsertItem(TEST_VERSION_SET_URN, properties, new TestEntityRegistry()), + retrieverContext); + + // Assert no validation exceptions + Assert.assertTrue(validationResult.findAny().isEmpty()); + } + + @Test + public void testValidUpsertWithSameVersioningScheme() { + // Create existing properties with semantic versioning + VersionSetProperties existingProperties = + new VersionSetProperties() + .setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB); + + // Set up mock retriever with existing properties + Map> data = new HashMap<>(); + data.put(TEST_VERSION_SET_URN, Collections.singletonList(existingProperties)); + mockAspectRetriever = new MockAspectRetriever(data); + + retrieverContext = + io.datahubproject.metadata.context.RetrieverContext.builder() + .aspectRetriever(mockAspectRetriever) + .searchRetriever(mockSearchRetriever) + .graphRetriever(mockGraphRetriever) + .build(); + + // Create new properties with same versioning scheme + VersionSetProperties newProperties = + new VersionSetProperties() + .setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB); + + // Test validation + Stream validationResult = + VersionSetPropertiesValidator.validatePropertiesUpserts( + TestMCP.ofOneUpsertItem(TEST_VERSION_SET_URN, newProperties, new TestEntityRegistry()), + retrieverContext); + + // Assert no validation exceptions + Assert.assertTrue(validationResult.findAny().isEmpty()); + } + + @Test + public void testInvalidUpsertWithDifferentVersioningScheme() { + // Create existing properties with semantic versioning + VersionSetProperties existingProperties = + new VersionSetProperties() + .setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB); + + // Set up mock retriever with existing properties + Map> data = new HashMap<>(); + data.put(TEST_VERSION_SET_URN, Collections.singletonList(existingProperties)); + mockAspectRetriever = new MockAspectRetriever(data); + + retrieverContext = + io.datahubproject.metadata.context.RetrieverContext.builder() + .aspectRetriever(mockAspectRetriever) + .searchRetriever(mockSearchRetriever) + .graphRetriever(mockGraphRetriever) + .build(); + + // Create new properties with different versioning scheme + VersionSetProperties newProperties = + new VersionSetProperties().setVersioningScheme(VersioningScheme.$UNKNOWN); + + // Test validation + Stream validationResult = + VersionSetPropertiesValidator.validatePropertiesUpserts( + TestMCP.ofOneUpsertItem(TEST_VERSION_SET_URN, newProperties, new TestEntityRegistry()), + retrieverContext); + + // Assert validation exception exists + AspectValidationException exception = validationResult.findAny().get(); + Assert.assertNotNull(exception); + Assert.assertTrue(exception.getMessage().contains("Versioning Scheme cannot change")); + Assert.assertTrue( + exception.getMessage().contains("Expected Scheme: ALPHANUMERIC_GENERATED_BY_DATAHUB")); + Assert.assertTrue(exception.getMessage().contains("Provided Scheme: $UNKNOWN")); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java index c5205906e9d373..23d493b7287f78 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java @@ -1,9 +1,12 @@ package com.linkedin.metadata.search.query.request; +import static com.linkedin.metadata.Constants.DATASET_ENTITY_NAME; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static org.mockito.Mockito.mock; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; +import com.google.common.collect.ImmutableList; import com.linkedin.metadata.TestEntitySpecBuilder; import com.linkedin.metadata.config.search.ExactMatchConfiguration; import com.linkedin.metadata.config.search.PartialConfiguration; @@ -13,22 +16,35 @@ import com.linkedin.metadata.config.search.custom.BoolQueryConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.config.search.custom.QueryConfiguration; +import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.query.filter.Condition; +import com.linkedin.metadata.query.filter.ConjunctiveCriterion; +import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; +import com.linkedin.metadata.query.filter.Criterion; +import com.linkedin.metadata.query.filter.CriterionArray; +import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.elasticsearch.query.request.AutocompleteRequestHandler; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.test.metadata.context.TestOperationContexts; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; import org.opensearch.action.search.SearchRequest; import org.opensearch.common.lucene.search.function.FieldValueFactorFunction; import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.ExistsQueryBuilder; import org.opensearch.index.query.MatchAllQueryBuilder; import org.opensearch.index.query.MatchPhrasePrefixQueryBuilder; import org.opensearch.index.query.MatchQueryBuilder; import org.opensearch.index.query.MultiMatchQueryBuilder; import org.opensearch.index.query.QueryBuilder; import org.opensearch.index.query.QueryBuilders; +import org.opensearch.index.query.TermQueryBuilder; import org.opensearch.index.query.functionscore.FunctionScoreQueryBuilder; import org.opensearch.index.query.functionscore.ScoreFunctionBuilders; import org.opensearch.search.builder.SearchSourceBuilder; @@ -40,6 +56,8 @@ public class AutocompleteRequestHandlerTest { private static AutocompleteRequestHandler handler; private OperationContext mockOpContext = TestOperationContexts.systemContextNoSearchAuthorization(mock(EntityRegistry.class)); + private OperationContext nonMockOpContext = + TestOperationContexts.systemContextNoSearchAuthorization(); static { testQueryConfig = new SearchConfiguration(); @@ -465,10 +483,148 @@ public void testCustomConfigWithFunctionScores() { assertEquals(wrapper.filterFunctionBuilders(), expectedCustomScoreFunctions); } + @Test + public void testFilterLatestVersions() { + final Criterion filterCriterion = + buildCriterion("platform", Condition.EQUAL, "mysql", "bigquery"); + + final BoolQueryBuilder testQuery = + getQuery( + filterCriterion, + nonMockOpContext.getEntityRegistry().getEntitySpec(DATASET_ENTITY_NAME), + true); + + List isLatestQueries = + testQuery.filter().stream() + .filter(filter -> filter instanceof BoolQueryBuilder) + .flatMap(filter -> ((BoolQueryBuilder) filter).must().stream()) + .filter(must -> must instanceof BoolQueryBuilder) + .flatMap(must -> ((BoolQueryBuilder) must).should().stream()) + .filter(should -> should instanceof BoolQueryBuilder) + .flatMap( + should -> { + BoolQueryBuilder boolShould = (BoolQueryBuilder) should; + + // Get isLatest: true term queries + Stream filterQueries = + boolShould.filter().stream() + .filter( + f -> + f instanceof TermQueryBuilder + && ((TermQueryBuilder) f).fieldName().equals("isLatest")); + + // Get isLatest exists queries + Stream existsQueries = + boolShould.mustNot().stream() + .filter(mn -> mn instanceof BoolQueryBuilder) + .flatMap(mn -> ((BoolQueryBuilder) mn).must().stream()) + .filter( + mq -> + mq instanceof ExistsQueryBuilder + && ((ExistsQueryBuilder) mq).fieldName().equals("isLatest")); + + return Stream.concat(filterQueries, existsQueries); + }) + .collect(Collectors.toList()); + + assertTrue(isLatestQueries.size() == 2, "Expected to find two queries"); + final TermQueryBuilder termQueryBuilder = (TermQueryBuilder) isLatestQueries.get(0); + assertEquals(termQueryBuilder.fieldName(), "isLatest"); + Set values = new HashSet<>(); + values.add((Boolean) termQueryBuilder.value()); + + assertEquals(values.size(), 1, "Expected only true value."); + assertTrue(values.contains(true)); + final ExistsQueryBuilder existsQueryBuilder = (ExistsQueryBuilder) isLatestQueries.get(1); + assertEquals(existsQueryBuilder.fieldName(), "isLatest"); + } + + @Test + public void testNoFilterLatestVersions() { + final Criterion filterCriterion = + buildCriterion("platform", Condition.EQUAL, "mysql", "bigquery"); + + final BoolQueryBuilder testQuery = + getQuery( + filterCriterion, + nonMockOpContext.getEntityRegistry().getEntitySpec(DATASET_ENTITY_NAME), + false); + + // bool -> filter -> [bool] -> must -> [bool] + List isLatestQueries = + testQuery.filter().stream() + .filter(filter -> filter instanceof BoolQueryBuilder) + .flatMap(filter -> ((BoolQueryBuilder) filter).must().stream()) + .filter(must -> must instanceof BoolQueryBuilder) + .flatMap(must -> ((BoolQueryBuilder) must).should().stream()) + .filter(should -> should instanceof BoolQueryBuilder) + .flatMap( + should -> { + BoolQueryBuilder boolShould = (BoolQueryBuilder) should; + + // Get isLatest: true term queries + Stream filterQueries = + boolShould.filter().stream() + .filter( + f -> + f instanceof TermQueryBuilder + && ((TermQueryBuilder) f).fieldName().equals("isLatest")); + + // Get isLatest exists queries + Stream existsQueries = + boolShould.mustNot().stream() + .filter(mn -> mn instanceof BoolQueryBuilder) + .flatMap(mn -> ((BoolQueryBuilder) mn).must().stream()) + .filter( + mq -> + mq instanceof ExistsQueryBuilder + && ((ExistsQueryBuilder) mq).fieldName().equals("isLatest")); + + return Stream.concat(filterQueries, existsQueries); + }) + .collect(Collectors.toList()); + + assertTrue(isLatestQueries.isEmpty(), "Expected to find no queries"); + } + private static QueryBuilder extractNestedQuery(BoolQueryBuilder nested) { assertEquals(nested.should().size(), 1); BoolQueryBuilder firstLevel = (BoolQueryBuilder) nested.should().get(0); assertEquals(firstLevel.should().size(), 1); return firstLevel.should().get(0); } + + private BoolQueryBuilder getQuery( + final Criterion filterCriterion, final EntitySpec entitySpec, boolean filterNonLatest) { + final Filter filter = + new Filter() + .setOr( + new ConjunctiveCriterionArray( + new ConjunctiveCriterion() + .setAnd(new CriterionArray(ImmutableList.of(filterCriterion))))); + + AutocompleteRequestHandler requestHandler = + AutocompleteRequestHandler.getBuilder( + entitySpec, + CustomSearchConfiguration.builder().build(), + QueryFilterRewriteChain.EMPTY, + testQueryConfig); + + return (BoolQueryBuilder) + ((FunctionScoreQueryBuilder) + requestHandler + .getSearchRequest( + mockOpContext.withSearchFlags( + flags -> + flags + .setFulltext(false) + .setFilterNonLatestVersions(filterNonLatest)), + "", + "platform", + filter, + 3) + .source() + .query()) + .query(); + } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java index 1a91ae35c6595b..1fea4476d75abb 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java @@ -1,6 +1,7 @@ package com.linkedin.metadata.search.query.request; import static com.linkedin.datahub.graphql.resolvers.search.SearchUtils.SEARCHABLE_ENTITY_TYPES; +import static com.linkedin.metadata.Constants.DATASET_ENTITY_NAME; import static com.linkedin.metadata.Constants.STATUS_ASPECT_NAME; import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static com.linkedin.metadata.utils.CriterionUtils.buildExistsCriterion; @@ -56,6 +57,8 @@ import org.opensearch.index.query.BoolQueryBuilder; import org.opensearch.index.query.ExistsQueryBuilder; import org.opensearch.index.query.MatchQueryBuilder; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.TermQueryBuilder; import org.opensearch.index.query.TermsQueryBuilder; import org.opensearch.search.aggregations.AggregationBuilder; import org.opensearch.search.aggregations.AggregationBuilders; @@ -805,7 +808,214 @@ public void testQueryByDefault() { } } + @Test + public void testFilterLatestVersions() { + final Criterion filterCriterion = + buildCriterion("platform", Condition.EQUAL, "mysql", "bigquery"); + + final BoolQueryBuilder testQuery = + getQuery( + filterCriterion, + operationContext.getEntityRegistry().getEntitySpec(DATASET_ENTITY_NAME), + true); + + List isLatestQueries = + testQuery.filter().stream() + .filter(filter -> filter instanceof BoolQueryBuilder) + .flatMap(filter -> ((BoolQueryBuilder) filter).must().stream()) + .filter(must -> must instanceof BoolQueryBuilder) + .flatMap(must -> ((BoolQueryBuilder) must).should().stream()) + .filter(should -> should instanceof BoolQueryBuilder) + .flatMap( + should -> { + BoolQueryBuilder boolShould = (BoolQueryBuilder) should; + + // Get isLatest: true term queries + Stream filterQueries = + boolShould.filter().stream() + .filter( + f -> + f instanceof TermQueryBuilder + && ((TermQueryBuilder) f).fieldName().equals("isLatest")); + + // Get isLatest exists queries + Stream existsQueries = + boolShould.mustNot().stream() + .filter(mn -> mn instanceof BoolQueryBuilder) + .flatMap(mn -> ((BoolQueryBuilder) mn).must().stream()) + .filter( + mq -> + mq instanceof ExistsQueryBuilder + && ((ExistsQueryBuilder) mq).fieldName().equals("isLatest")); + + return Stream.concat(filterQueries, existsQueries); + }) + .collect(Collectors.toList()); + + assertEquals(isLatestQueries.size(), 2, "Expected to find two queries"); + final TermQueryBuilder termQueryBuilder = (TermQueryBuilder) isLatestQueries.get(0); + assertEquals(termQueryBuilder.fieldName(), "isLatest"); + Set values = new HashSet<>(); + values.add((Boolean) termQueryBuilder.value()); + + assertEquals(values.size(), 1, "Expected only true value."); + assertTrue(values.contains(true)); + final ExistsQueryBuilder existsQueryBuilder = (ExistsQueryBuilder) isLatestQueries.get(1); + assertEquals(existsQueryBuilder.fieldName(), "isLatest"); + } + + @Test + public void testNoFilterLatestVersions() { + final Criterion filterCriterion = + buildCriterion("platform", Condition.EQUAL, "mysql", "bigquery"); + + final BoolQueryBuilder testQuery = + getQuery( + filterCriterion, + operationContext.getEntityRegistry().getEntitySpec(DATASET_ENTITY_NAME), + false); + + List isLatestQueries = + testQuery.filter().stream() + .filter(filter -> filter instanceof BoolQueryBuilder) + .flatMap(filter -> ((BoolQueryBuilder) filter).must().stream()) + .filter(must -> must instanceof BoolQueryBuilder) + .flatMap(must -> ((BoolQueryBuilder) must).should().stream()) + .filter(should -> should instanceof BoolQueryBuilder) + .flatMap( + should -> { + BoolQueryBuilder boolShould = (BoolQueryBuilder) should; + + // Get isLatest: true term queries + Stream filterQueries = + boolShould.filter().stream() + .filter( + f -> + f instanceof TermQueryBuilder + && ((TermQueryBuilder) f).fieldName().equals("isLatest")); + + // Get isLatest exists queries + Stream existsQueries = + boolShould.mustNot().stream() + .filter(mn -> mn instanceof BoolQueryBuilder) + .flatMap(mn -> ((BoolQueryBuilder) mn).must().stream()) + .filter( + mq -> + mq instanceof ExistsQueryBuilder + && ((ExistsQueryBuilder) mq).fieldName().equals("isLatest")); + + return Stream.concat(filterQueries, existsQueries); + }) + .collect(Collectors.toList()); + + assertTrue(isLatestQueries.isEmpty(), "Expected to find no queries"); + } + + @Test + public void testAggregationFilterLatestVersions() { + final Criterion filterCriterion = + buildCriterion("platform", Condition.EQUAL, "mysql", "bigquery"); + + final BoolQueryBuilder testQuery = + getAggregationQuery( + filterCriterion, + operationContext.getEntityRegistry().getEntitySpec(DATASET_ENTITY_NAME), + true); + + List isLatestQueries = + testQuery.must().stream() + .filter(must -> must instanceof BoolQueryBuilder) + .flatMap(must -> ((BoolQueryBuilder) must).should().stream()) + .filter(should -> should instanceof BoolQueryBuilder) + .flatMap( + should -> { + BoolQueryBuilder boolShould = (BoolQueryBuilder) should; + + // Get isLatest: true term queries + Stream filterQueries = + boolShould.filter().stream() + .filter( + f -> + f instanceof TermQueryBuilder + && ((TermQueryBuilder) f).fieldName().equals("isLatest")); + + // Get isLatest exists queries + Stream existsQueries = + boolShould.mustNot().stream() + .filter(mn -> mn instanceof BoolQueryBuilder) + .flatMap(mn -> ((BoolQueryBuilder) mn).must().stream()) + .filter( + mq -> + mq instanceof ExistsQueryBuilder + && ((ExistsQueryBuilder) mq).fieldName().equals("isLatest")); + + return Stream.concat(filterQueries, existsQueries); + }) + .collect(Collectors.toList()); + + assertEquals(isLatestQueries.size(), 2, "Expected to find two queries"); + final TermQueryBuilder termQueryBuilder = (TermQueryBuilder) isLatestQueries.get(0); + assertEquals(termQueryBuilder.fieldName(), "isLatest"); + Set values = new HashSet<>(); + values.add((Boolean) termQueryBuilder.value()); + + assertEquals(values.size(), 1, "Expected only true value."); + assertTrue(values.contains(true)); + final ExistsQueryBuilder existsQueryBuilder = (ExistsQueryBuilder) isLatestQueries.get(1); + assertEquals(existsQueryBuilder.fieldName(), "isLatest"); + } + + @Test + public void testAggregationNoFilterLatestVersions() { + final Criterion filterCriterion = + buildCriterion("platform", Condition.EQUAL, "mysql", "bigquery"); + + final BoolQueryBuilder testQuery = + getAggregationQuery( + filterCriterion, + operationContext.getEntityRegistry().getEntitySpec(DATASET_ENTITY_NAME), + false); + + List isLatestQueries = + testQuery.must().stream() + .filter(must -> must instanceof BoolQueryBuilder) + .flatMap(must -> ((BoolQueryBuilder) must).should().stream()) + .filter(should -> should instanceof BoolQueryBuilder) + .flatMap( + should -> { + BoolQueryBuilder boolShould = (BoolQueryBuilder) should; + + // Get isLatest: true term queries + Stream filterQueries = + boolShould.filter().stream() + .filter( + f -> + f instanceof TermQueryBuilder + && ((TermQueryBuilder) f).fieldName().equals("isLatest")); + + // Get isLatest exists queries + Stream existsQueries = + boolShould.mustNot().stream() + .filter(mn -> mn instanceof BoolQueryBuilder) + .flatMap(mn -> ((BoolQueryBuilder) mn).must().stream()) + .filter( + mq -> + mq instanceof ExistsQueryBuilder + && ((ExistsQueryBuilder) mq).fieldName().equals("isLatest")); + + return Stream.concat(filterQueries, existsQueries); + }) + .collect(Collectors.toList()); + + assertTrue(isLatestQueries.isEmpty(), "Expected to find no queries"); + } + private BoolQueryBuilder getQuery(final Criterion filterCriterion) { + return getQuery(filterCriterion, TestEntitySpecBuilder.getSpec(), true); + } + + private BoolQueryBuilder getQuery( + final Criterion filterCriterion, final EntitySpec entitySpec, boolean filterNonLatest) { final Filter filter = new Filter() .setOr( @@ -816,7 +1026,7 @@ private BoolQueryBuilder getQuery(final Criterion filterCriterion) { final SearchRequestHandler requestHandler = SearchRequestHandler.getBuilder( operationContext.getEntityRegistry(), - TestEntitySpecBuilder.getSpec(), + entitySpec, testQueryConfig, null, QueryFilterRewriteChain.EMPTY); @@ -824,7 +1034,8 @@ private BoolQueryBuilder getQuery(final Criterion filterCriterion) { return (BoolQueryBuilder) requestHandler .getSearchRequest( - operationContext.withSearchFlags(flags -> flags.setFulltext(false)), + operationContext.withSearchFlags( + flags -> flags.setFulltext(false).setFilterNonLatestVersions(filterNonLatest)), "", filter, null, @@ -834,4 +1045,33 @@ private BoolQueryBuilder getQuery(final Criterion filterCriterion) { .source() .query(); } + + private BoolQueryBuilder getAggregationQuery( + final Criterion filterCriterion, final EntitySpec entitySpec, boolean filterNonLatest) { + final Filter filter = + new Filter() + .setOr( + new ConjunctiveCriterionArray( + new ConjunctiveCriterion() + .setAnd(new CriterionArray(ImmutableList.of(filterCriterion))))); + + final SearchRequestHandler requestHandler = + SearchRequestHandler.getBuilder( + operationContext.getEntityRegistry(), + entitySpec, + testQueryConfig, + null, + QueryFilterRewriteChain.EMPTY); + + return (BoolQueryBuilder) + requestHandler + .getAggregationRequest( + operationContext.withSearchFlags( + flags -> flags.setFulltext(false).setFilterNonLatestVersions(filterNonLatest)), + "platform", + filter, + 10) + .source() + .query(); + } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java index 65b73b7425b743..5a4fb39bd50e96 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java @@ -430,4 +430,23 @@ public void testEmptyDescription() throws RemoteInvocationException, URISyntaxEx assertTrue(transformed.get().get("description").isNull()); assertFalse(transformed.get().get("hasDescription").asBoolean()); } + + @Test + public void testHandleRemoveFieldsWithStructuredProperties() throws IOException { + ObjectNode previousDoc = JsonNodeFactory.instance.objectNode(); + previousDoc.put("structuredProperties.prop1", "value1"); + previousDoc.put("structuredProperties.prop2", "value2"); + previousDoc.put("otherField", "value3"); + + ObjectNode newDoc = JsonNodeFactory.instance.objectNode(); + newDoc.put("structuredProperties.prop1", "updatedValue1"); + newDoc.put("otherField", "updatedValue3"); + + ObjectNode result = SearchDocumentTransformer.handleRemoveFields(newDoc, previousDoc); + + assertEquals(result.get("structuredProperties.prop1").asText(), "updatedValue1"); + assertTrue(result.has("structuredProperties.prop2")); + assertTrue(result.get("structuredProperties.prop2").isNull()); + assertEquals(result.get("otherField").asText(), "updatedValue3"); + } } diff --git a/metadata-models/src/main/pegasus/com/linkedin/common/VersionProperties.pdl b/metadata-models/src/main/pegasus/com/linkedin/common/VersionProperties.pdl new file mode 100644 index 00000000000000..af4d48debe0217 --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/common/VersionProperties.pdl @@ -0,0 +1,77 @@ +namespace com.linkedin.common + +/** + * Properties about a versioned asset i.e. dataset, ML Model, etc. + */ +@Aspect = { + "name": "versionProperties" +} +record VersionProperties { + /** + * The linked Version Set entity that ties multiple versioned assets together + */ + @Searchable = { + "queryByDefault": false + } + @Relationship = { + "name": "VersionOf", + "entityTypes": [ "versionSet" ] + } + versionSet: Urn + + /** + * Label for this versioned asset, is unique within a version set + */ + @Searchable = { + "/versionTag": { + "fieldName": "version", + "queryByDefault": false + } + } + version: VersionTag + + /** + * Associated aliases for this versioned asset + */ + @Searchable = { + "/*/versionTag": { + "fieldName": "aliases", + "queryByDefault": false + } + } + aliases: array[VersionTag] = [] + + /** + * Comment documenting what this version was created for, changes, or represents + */ + comment: optional string + + /** + * Sort identifier that determines where a version lives in the order of the Version Set. + * What this looks like depends on the Version Scheme. For sort ids generated by DataHub we use an 8 character string representation. + */ + @Searchable = { + "queryByDefault": false, + "fieldName": "versionSortId" + } + sortId: string + + /** + * Timestamp reflecting when this asset version was created in the source system. + */ + sourceCreatedTimestamp: optional AuditStamp + + /** + * Timestamp reflecting when the metadata for this version was created in DataHub + */ + metadataCreatedTimestamp: optional AuditStamp + + /** + * Marks whether this version is currently the latest. Set by a side effect and should not be modified by API. + */ + @Searchable = { + "queryByDefault": false, + "fieldType": "BOOLEAN" + } + isLatest: optional boolean +} \ No newline at end of file diff --git a/li-utils/src/main/pegasus/com/linkedin/common/VersionTag.pdl b/metadata-models/src/main/pegasus/com/linkedin/common/VersionTag.pdl similarity index 78% rename from li-utils/src/main/pegasus/com/linkedin/common/VersionTag.pdl rename to metadata-models/src/main/pegasus/com/linkedin/common/VersionTag.pdl index f26a1b0140b793..82f2193747c5d3 100644 --- a/li-utils/src/main/pegasus/com/linkedin/common/VersionTag.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/common/VersionTag.pdl @@ -5,4 +5,5 @@ namespace com.linkedin.common */ record VersionTag { versionTag: optional string + metadataAttribution: optional MetadataAttribution } \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/key/VersionSetKey.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/key/VersionSetKey.pdl new file mode 100644 index 00000000000000..edbddc29e2023f --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/key/VersionSetKey.pdl @@ -0,0 +1,20 @@ +namespace com.linkedin.metadata.key + + +/** + * Key for a Version Set entity + */ +@Aspect = { + "name": "versionSetKey" +} +record VersionSetKey { + /** + * ID of the Version Set, generated from platform + asset id / name + */ + id: string + + /** + * Type of entities included in version set, limits to a single entity type between linked versioned entities + */ + entityType: string +} \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/query/SearchFlags.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/query/SearchFlags.pdl index a3a7a8cda58a8d..ab5873452641ed 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/metadata/query/SearchFlags.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/query/SearchFlags.pdl @@ -64,4 +64,9 @@ record SearchFlags { * By default we include these, but custom aggregation requests don't need them. */ includeDefaultFacets: optional boolean = true + + /** + * Include only latest versions in version sets, default true + */ + filterNonLatestVersions: optional boolean = true } diff --git a/metadata-models/src/main/pegasus/com/linkedin/versionset/VersionSetProperties.pdl b/metadata-models/src/main/pegasus/com/linkedin/versionset/VersionSetProperties.pdl new file mode 100644 index 00000000000000..0e50c33aa2b7d4 --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/versionset/VersionSetProperties.pdl @@ -0,0 +1,24 @@ +namespace com.linkedin.versionset + +import com.linkedin.common.CustomProperties +import com.linkedin.common.Urn + +@Aspect = { + "name": "versionSetProperties" +} +record VersionSetProperties includes CustomProperties { + /** + * The latest versioned entity linked to in this version set + */ + @Searchable = { + "queryByDefault": "false" + } + latest: Urn + + /** + * What versioning scheme is being utilized for the versioned entities sort criterion. Static once set + */ + versioningScheme: enum VersioningScheme { + ALPHANUMERIC_GENERATED_BY_DATAHUB + } +} \ No newline at end of file diff --git a/metadata-models/src/main/resources/entity-registry.yml b/metadata-models/src/main/resources/entity-registry.yml index 1556b72e4aefb1..32f9d1b98db5df 100644 --- a/metadata-models/src/main/resources/entity-registry.yml +++ b/metadata-models/src/main/resources/entity-registry.yml @@ -46,6 +46,7 @@ entities: - structuredProperties - forms - partitionsSummary + - versionProperties - name: dataHubPolicy doc: DataHub Policies represent access policies granted to users or groups on metadata operations like edit, view etc. category: internal @@ -365,6 +366,7 @@ entities: - structuredProperties - forms - testResults + - versionProperties - name: mlModelGroup category: core keyAspect: mlModelGroupKey @@ -494,6 +496,11 @@ entities: keyAspect: globalSettingsKey aspects: - globalSettingsInfo + - name: versionSet + category: core + keyAspect: versionSetKey + aspects: + - versionSetProperties - name: incident doc: An incident for an asset. category: core diff --git a/metadata-service/configuration/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java b/metadata-service/configuration/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java index 97ca0dcabea9f3..eeb90d09204bb9 100644 --- a/metadata-service/configuration/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java +++ b/metadata-service/configuration/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java @@ -26,4 +26,5 @@ public class FeatureFlags { private boolean alternateMCPValidation = false; private boolean showManageStructuredProperties = false; private boolean dataProcessInstanceEntityEnabled = true; + private boolean entityVersioning = false; } diff --git a/metadata-service/configuration/src/main/resources/application.yaml b/metadata-service/configuration/src/main/resources/application.yaml index c029cb4648d012..69b86962442b91 100644 --- a/metadata-service/configuration/src/main/resources/application.yaml +++ b/metadata-service/configuration/src/main/resources/application.yaml @@ -466,6 +466,7 @@ featureFlags: showSeparateSiblings: ${SHOW_SEPARATE_SIBLINGS:false} # If turned on, all siblings will be separated with no way to get to a "combined" sibling view editableDatasetNameEnabled: ${EDITABLE_DATASET_NAME_ENABLED:false} # Enables the ability to edit the dataset name in the UI showManageStructuredProperties: ${SHOW_MANAGE_STRUCTURED_PROPERTIES:true} # If turned on, show the manage structured properties button on the govern dropdown + entityVersioning: ${ENTITY_VERSIONING_ENABLED:false} # Enables entity versioning APIs, validators, and side effects entityChangeEvents: enabled: ${ENABLE_ENTITY_CHANGE_EVENTS_HOOK:true} diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/versioning/EntityVersioningServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/versioning/EntityVersioningServiceFactory.java new file mode 100644 index 00000000000000..4d03860cccb5ca --- /dev/null +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/versioning/EntityVersioningServiceFactory.java @@ -0,0 +1,21 @@ +package com.linkedin.gms.factory.entity.versioning; + +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.versioning.EntityVersioningService; +import com.linkedin.metadata.entity.versioning.EntityVersioningServiceImpl; +import javax.annotation.Nonnull; +import lombok.extern.slf4j.Slf4j; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +@Slf4j +@Configuration +public class EntityVersioningServiceFactory { + + @Bean(name = "entityVersioningService") + @Nonnull + protected EntityVersioningService createInstance(EntityService entityService) { + + return new EntityVersioningServiceImpl(entityService); + } +} diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java index 3229f12f9021d0..8f389eccc4cf8f 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java @@ -27,6 +27,7 @@ import com.linkedin.metadata.config.GraphQLConcurrencyConfiguration; import com.linkedin.metadata.connection.ConnectionService; import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.versioning.EntityVersioningService; import com.linkedin.metadata.graph.GraphClient; import com.linkedin.metadata.graph.GraphService; import com.linkedin.metadata.graph.SiblingGraphService; @@ -205,7 +206,8 @@ public class GraphQLEngineFactory { @Nonnull protected GraphQLEngine graphQLEngine( @Qualifier("entityClient") final EntityClient entityClient, - @Qualifier("systemEntityClient") final SystemEntityClient systemEntityClient) { + @Qualifier("systemEntityClient") final SystemEntityClient systemEntityClient, + final EntityVersioningService entityVersioningService) { GmsGraphQLEngineArgs args = new GmsGraphQLEngineArgs(); args.setEntityClient(entityClient); args.setSystemEntityClient(systemEntityClient); @@ -255,6 +257,7 @@ protected GraphQLEngine graphQLEngine( configProvider.getGraphQL().getQuery().isIntrospectionEnabled()); args.setGraphQLQueryDepthLimit(configProvider.getGraphQL().getQuery().getDepthLimit()); args.setBusinessAttributeService(businessAttributeService); + args.setEntityVersioningService(entityVersioningService); args.setConnectionService(_connectionService); args.setAssertionService(assertionService); return new GmsGraphQLEngine(args).builder().build(); diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java index 2349dbd169f1d9..7d0937663fecb0 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java @@ -6,6 +6,9 @@ import static com.linkedin.metadata.Constants.SCHEMA_METADATA_ASPECT_NAME; import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_ENTITY_NAME; import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_SETTINGS_ASPECT_NAME; +import static com.linkedin.metadata.Constants.VERSION_PROPERTIES_ASPECT_NAME; +import static com.linkedin.metadata.Constants.VERSION_SET_ENTITY_NAME; +import static com.linkedin.metadata.Constants.VERSION_SET_PROPERTIES_ASPECT_NAME; import com.linkedin.metadata.Constants; import com.linkedin.metadata.aspect.hooks.IgnoreUnknownMutator; @@ -16,6 +19,9 @@ import com.linkedin.metadata.aspect.validation.ExecutionRequestResultValidator; import com.linkedin.metadata.aspect.validation.FieldPathValidator; import com.linkedin.metadata.dataproducts.sideeffects.DataProductUnsetSideEffect; +import com.linkedin.metadata.entity.versioning.sideeffects.VersionSetSideEffect; +import com.linkedin.metadata.entity.versioning.validation.VersionPropertiesValidator; +import com.linkedin.metadata.entity.versioning.validation.VersionSetPropertiesValidator; import com.linkedin.metadata.schemafields.sideeffects.SchemaFieldSideEffect; import com.linkedin.metadata.structuredproperties.validation.HidePropertyValidator; import com.linkedin.metadata.structuredproperties.validation.ShowPropertyAsBadgeValidator; @@ -32,6 +38,13 @@ @Slf4j public class SpringStandardPluginConfiguration { private static final String ALL = "*"; + private static final String UPSERT = "UPSERT"; + private static final String UPDATE = "UPDATE"; + private static final String CREATE = "CREATE"; + private static final String CREATE_ENTITY = "CREATE_ENTITY"; + private static final String PATCH = "PATCH"; + private static final String DELETE = "DELETE"; + private static final String RESTATE = "RESTATE"; @Value("${metadataChangeProposal.validation.ignoreUnknown}") private boolean ignoreUnknownEnabled; @@ -189,4 +202,58 @@ public AspectPayloadValidator showPropertyAsAssetBadgeValidator() { .build())) .build()); } + + @Bean + @ConditionalOnProperty(name = "featureFlags.entityVersioning", havingValue = "true") + public AspectPayloadValidator versionPropertiesValidator() { + return new VersionPropertiesValidator() + .setConfig( + AspectPluginConfig.builder() + .className(VersionPropertiesValidator.class.getName()) + .enabled(true) + .supportedOperations(List.of(UPSERT, UPDATE, PATCH, CREATE, CREATE_ENTITY)) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName(ALL) + .aspectName(VERSION_PROPERTIES_ASPECT_NAME) + .build())) + .build()); + } + + @Bean + @ConditionalOnProperty(name = "featureFlags.entityVersioning", havingValue = "true") + public AspectPayloadValidator versionSetPropertiesValidator() { + return new VersionSetPropertiesValidator() + .setConfig( + AspectPluginConfig.builder() + .className(VersionSetPropertiesValidator.class.getName()) + .enabled(true) + .supportedOperations(List.of(UPSERT, UPDATE, PATCH, CREATE, CREATE_ENTITY)) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName(VERSION_SET_ENTITY_NAME) + .aspectName(VERSION_SET_PROPERTIES_ASPECT_NAME) + .build())) + .build()); + } + + @Bean + @ConditionalOnProperty(name = "featureFlags.entityVersioning", havingValue = "true") + public MCPSideEffect versionSetSideEffect() { + return new VersionSetSideEffect() + .setConfig( + AspectPluginConfig.builder() + .className(VersionSetSideEffect.class.getName()) + .enabled(true) + .supportedOperations(List.of(UPSERT, UPDATE, PATCH, CREATE, CREATE_ENTITY)) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName(VERSION_SET_ENTITY_NAME) + .aspectName(VERSION_SET_PROPERTIES_ASPECT_NAME) + .build())) + .build()); + } } diff --git a/metadata-service/openapi-analytics-servlet/src/test/java/io/datahubproject/openapi/delegates/DatahubUsageEventsImplTest.java b/metadata-service/openapi-analytics-servlet/src/test/java/io/datahubproject/openapi/delegates/DatahubUsageEventsImplTest.java index 68b13bd5fb4ee8..07557ece381a0a 100644 --- a/metadata-service/openapi-analytics-servlet/src/test/java/io/datahubproject/openapi/delegates/DatahubUsageEventsImplTest.java +++ b/metadata-service/openapi-analytics-servlet/src/test/java/io/datahubproject/openapi/delegates/DatahubUsageEventsImplTest.java @@ -3,11 +3,13 @@ import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertNotNull; +import com.linkedin.gms.factory.config.ConfigurationProvider; import io.datahubproject.openapi.config.OpenAPIAnalyticsTestConfiguration; import io.datahubproject.openapi.config.SpringWebConfig; import io.datahubproject.openapi.v2.generated.controller.DatahubUsageEventsApiController; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.boot.test.mock.mockito.MockBean; import org.springframework.context.annotation.ComponentScan; import org.springframework.context.annotation.Import; import org.springframework.http.HttpStatus; @@ -22,6 +24,8 @@ public class DatahubUsageEventsImplTest extends AbstractTestNGSpringContextTests @Autowired private DatahubUsageEventsApiController analyticsController; + @MockBean private ConfigurationProvider configurationProvider; + @Test public void initTest() { assertNotNull(analyticsController); diff --git a/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/v2/delegates/EntityApiDelegateImplTest.java b/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/v2/delegates/EntityApiDelegateImplTest.java index 2beb210e5bc4ff..31b35b65ea1a8c 100644 --- a/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/v2/delegates/EntityApiDelegateImplTest.java +++ b/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/v2/delegates/EntityApiDelegateImplTest.java @@ -4,6 +4,7 @@ import static org.testng.Assert.*; import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; +import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.metadata.models.registry.EntityRegistry; import io.datahubproject.openapi.config.OpenAPIEntityTestConfiguration; import io.datahubproject.openapi.config.SpringWebConfig; @@ -38,6 +39,7 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.autoconfigure.web.servlet.AutoConfigureMockMvc; import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.boot.test.mock.mockito.MockBean; import org.springframework.context.annotation.ComponentScan; import org.springframework.context.annotation.Import; import org.springframework.http.HttpStatus; @@ -68,6 +70,7 @@ public void disableAssert() { @Autowired private DatasetApiController datasetApiController; @Autowired private EntityRegistry entityRegistry; @Autowired private MockMvc mockMvc; + @MockBean private ConfigurationProvider configurationProvider; @Test public void initTest() { diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/GlobalControllerExceptionHandler.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/GlobalControllerExceptionHandler.java index c756827cad56ba..01493d71643481 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/GlobalControllerExceptionHandler.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/GlobalControllerExceptionHandler.java @@ -4,6 +4,9 @@ import io.datahubproject.metadata.exception.ActorAccessException; import io.datahubproject.openapi.exception.InvalidUrnException; import io.datahubproject.openapi.exception.UnauthorizedException; +import jakarta.servlet.http.HttpServletRequest; +import jakarta.servlet.http.HttpServletResponse; +import java.io.IOException; import java.util.Map; import javax.annotation.PostConstruct; import lombok.extern.slf4j.Slf4j; @@ -64,4 +67,25 @@ public static ResponseEntity> handleUnauthorizedException( public static ResponseEntity> actorAccessException(ActorAccessException e) { return new ResponseEntity<>(Map.of("error", e.getMessage()), HttpStatus.FORBIDDEN); } + + @Override + protected void logException(Exception ex, HttpServletRequest request) { + log.error("Error while resolving request: " + request.getRequestURI(), ex); + } + + @Override + protected void sendServerError( + Exception ex, HttpServletRequest request, HttpServletResponse response) throws IOException { + log.error("Error while resolving request: " + request.getRequestURI(), ex); + request.setAttribute("jakarta.servlet.error.exception", ex); + response.sendError(500); + } + + @ExceptionHandler(Exception.class) + public ResponseEntity> handleGenericException( + Exception e, HttpServletRequest request) { + log.error("Unhandled exception occurred for request: " + request.getRequestURI(), e); + return new ResponseEntity<>( + Map.of("error", "Internal server error occurred"), HttpStatus.INTERNAL_SERVER_ERROR); + } } diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/SpringWebConfig.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/SpringWebConfig.java index 622cf20af9ff57..c4b4431e77c4ef 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/SpringWebConfig.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/SpringWebConfig.java @@ -7,6 +7,7 @@ import com.fasterxml.jackson.core.StreamReadConstraints; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; +import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.metadata.models.registry.EntityRegistry; import io.datahubproject.openapi.converter.StringToChangeCategoryConverter; import io.datahubproject.openapi.v3.OpenAPIV3Generator; @@ -81,13 +82,15 @@ public void addFormatters(FormatterRegistry registry) { } @Bean - public GroupedOpenApi v3OpenApiGroup(final EntityRegistry entityRegistry) { + public GroupedOpenApi v3OpenApiGroup( + final EntityRegistry entityRegistry, final ConfigurationProvider configurationProvider) { return GroupedOpenApi.builder() .group("10-openapi-v3") .displayName("DataHub v3 (OpenAPI)") .addOpenApiCustomizer( openApi -> { - OpenAPI v3OpenApi = OpenAPIV3Generator.generateOpenApiSpec(entityRegistry); + OpenAPI v3OpenApi = + OpenAPIV3Generator.generateOpenApiSpec(entityRegistry, configurationProvider); openApi.setInfo(v3OpenApi.getInfo()); openApi.setTags(Collections.emptyList()); openApi.getPaths().putAll(v3OpenApi.getPaths()); diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java index c6b8d579d879e0..f7764f2ddb39a1 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java @@ -1,11 +1,14 @@ package io.datahubproject.openapi.v3; +import static com.linkedin.metadata.Constants.VERSION_PROPERTIES_ASPECT_NAME; +import static com.linkedin.metadata.Constants.VERSION_SET_ENTITY_NAME; import static io.datahubproject.openapi.util.ReflectionCache.toUpperFirst; import com.fasterxml.jackson.databind.JsonNode; import com.github.fge.processing.ProcessingUtil; import com.google.common.collect.ImmutableMap; import com.linkedin.data.avro.SchemaTranslator; +import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.registry.EntityRegistry; @@ -64,7 +67,8 @@ public class OpenAPIV3Generator { private static final String ASPECTS = "Aspects"; private static final String ENTITIES = "Entities"; - public static OpenAPI generateOpenApiSpec(EntityRegistry entityRegistry) { + public static OpenAPI generateOpenApiSpec( + EntityRegistry entityRegistry, ConfigurationProvider configurationProvider) { final Set aspectNames = entityRegistry.getAspectSpecs().keySet(); final Set entityNames = entityRegistry.getEntitySpecs().values().stream() @@ -125,22 +129,25 @@ public static OpenAPI generateOpenApiSpec(EntityRegistry entityRegistry) { buildAspectRefResponseSchema(upperAspectName)); }); + List definedEntitySpecs = + entityRegistry.getEntitySpecs().values().stream() + .filter(entitySpec -> definitionNames.contains(entitySpec.getName())) + .sorted(Comparator.comparing(EntitySpec::getName)) + .collect(Collectors.toList()); // --> Entity components - entityRegistry.getEntitySpecs().values().stream() - .filter(e -> aspectNames.contains(e.getKeyAspectName())) - .forEach( - e -> { - final String entityName = toUpperFirst(e.getName()); - components.addSchemas( - entityName + ENTITY_REQUEST_SUFFIX, buildEntitySchema(e, aspectNames, false)); - components.addSchemas( - entityName + ENTITY_RESPONSE_SUFFIX, buildEntitySchema(e, aspectNames, true)); - components.addSchemas( - "Scroll" + entityName + ENTITY_RESPONSE_SUFFIX, buildEntityScrollSchema(e)); - components.addSchemas( - "BatchGet" + entityName + ENTITY_REQUEST_SUFFIX, - buildEntityBatchGetRequestSchema(e, aspectNames)); - }); + definedEntitySpecs.forEach( + e -> { + final String entityName = toUpperFirst(e.getName()); + components.addSchemas( + entityName + ENTITY_REQUEST_SUFFIX, buildEntitySchema(e, aspectNames, false)); + components.addSchemas( + entityName + ENTITY_RESPONSE_SUFFIX, buildEntitySchema(e, aspectNames, true)); + components.addSchemas( + "Scroll" + entityName + ENTITY_RESPONSE_SUFFIX, buildEntityScrollSchema(e)); + components.addSchemas( + "BatchGet" + entityName + ENTITY_REQUEST_SUFFIX, + buildEntityBatchGetRequestSchema(e, aspectNames)); + }); components.addSchemas("SortOrder", new Schema()._enum(List.of("ASCENDING", "DESCENDING"))); // TODO: Correct handling of SystemMetadata and AuditStamp @@ -151,14 +158,12 @@ public static OpenAPI generateOpenApiSpec(EntityRegistry entityRegistry) { // Parameters // --> Entity Parameters - entityRegistry.getEntitySpecs().values().stream() - .filter(e -> definitionNames.contains(e.getKeyAspectName())) - .forEach( - e -> { - final String parameterName = toUpperFirst(e.getName()) + ASPECTS; - components.addParameters( - parameterName + MODEL_VERSION, buildParameterSchema(e, definitionNames)); - }); + definedEntitySpecs.forEach( + e -> { + final String parameterName = toUpperFirst(e.getName()) + ASPECTS; + components.addParameters( + parameterName + MODEL_VERSION, buildParameterSchema(e, definitionNames)); + }); addExtraParameters(components); @@ -169,39 +174,56 @@ public static OpenAPI generateOpenApiSpec(EntityRegistry entityRegistry) { paths.addPathItem("/v3/entity/scroll", buildGenericListEntitiesPath()); // --> Entity Paths - entityRegistry.getEntitySpecs().values().stream() - .filter(e -> definitionNames.contains(e.getName())) - .sorted(Comparator.comparing(EntitySpec::getName)) - .forEach( - e -> { - paths.addPathItem( - String.format("/v3/entity/%s", e.getName().toLowerCase()), - buildListEntityPath(e)); - paths.addPathItem( - String.format("/v3/entity/%s/batchGet", e.getName().toLowerCase()), - buildBatchGetEntityPath(e)); - paths.addPathItem( - String.format("/v3/entity/%s/{urn}", e.getName().toLowerCase()), - buildSingleEntityPath(e)); - }); + definedEntitySpecs.forEach( + e -> { + paths.addPathItem( + String.format("/v3/entity/%s", e.getName().toLowerCase()), buildListEntityPath(e)); + paths.addPathItem( + String.format("/v3/entity/%s/batchGet", e.getName().toLowerCase()), + buildBatchGetEntityPath(e)); + paths.addPathItem( + String.format("/v3/entity/%s/{urn}", e.getName().toLowerCase()), + buildSingleEntityPath(e)); + }); // --> Aspect Paths - entityRegistry.getEntitySpecs().values().stream() - .filter(e -> definitionNames.contains(e.getName())) - .sorted(Comparator.comparing(EntitySpec::getName)) - .forEach( - e -> { - e.getAspectSpecs().stream() - .filter(a -> definitionNames.contains(a.getName())) - .sorted(Comparator.comparing(AspectSpec::getName)) - .forEach( - a -> - paths.addPathItem( - String.format( - "/v3/entity/%s/{urn}/%s", - e.getName().toLowerCase(), a.getName().toLowerCase()), - buildSingleEntityAspectPath(e, a))); - }); + definedEntitySpecs.forEach( + e -> + e.getAspectSpecs().stream() + .filter(a -> definitionNames.contains(a.getName())) + .sorted(Comparator.comparing(AspectSpec::getName)) + .forEach( + a -> + paths.addPathItem( + String.format( + "/v3/entity/%s/{urn}/%s", + e.getName().toLowerCase(), a.getName().toLowerCase()), + buildSingleEntityAspectPath(e, a)))); + definedEntitySpecs.forEach( + e -> + e.getAspectSpecs().stream() + .filter(a -> definitionNames.contains(a.getName())) + .sorted(Comparator.comparing(AspectSpec::getName)) + .forEach( + a -> + paths.addPathItem( + String.format( + "/v3/entity/%s/{urn}/%s", + e.getName().toLowerCase(), a.getName().toLowerCase()), + buildSingleEntityAspectPath(e, a)))); + + // --> Link & Unlink APIs + if (configurationProvider.getFeatureFlags().isEntityVersioning()) { + definedEntitySpecs.stream() + .filter(entitySpec -> VERSION_SET_ENTITY_NAME.equals(entitySpec.getName())) + .forEach( + entitySpec -> { + paths.addPathItem( + "/v3/entity/versioning/{versionSetUrn}/relationship/versionOf/{entityUrn}", + buildVersioningRelationshipPath()); + }); + } + return new OpenAPI().openapi("3.0.1").info(info).paths(paths).components(components); } @@ -1198,4 +1220,115 @@ private static PathItem buildSingleEntityAspectPath( .post(postOperation) .patch(patchOperation); } + + private static Schema buildVersionPropertiesRequestSchema() { + return new Schema<>() + .type(TYPE_OBJECT) + .description("Properties for creating a version relationship") + .properties( + Map.of( + "comment", + new Schema<>() + .type(TYPE_STRING) + .description("Comment about the version") + .nullable(true), + "label", + new Schema<>() + .type(TYPE_STRING) + .description("Label for the version") + .nullable(true), + "sourceCreationTimestamp", + new Schema<>() + .type(TYPE_INTEGER) + .description("Timestamp when version was created in source system") + .nullable(true), + "sourceCreator", + new Schema<>() + .type(TYPE_STRING) + .description("Creator of version in source system") + .nullable(true))); + } + + private static PathItem buildVersioningRelationshipPath() { + final PathItem result = new PathItem(); + + // Common parameters for path + final List parameters = + List.of( + new Parameter() + .in(NAME_PATH) + .name("versionSetUrn") + .description("The Version Set URN to unlink from") + .required(true) + .schema(new Schema().type(TYPE_STRING)), + new Parameter() + .in(NAME_PATH) + .name("entityUrn") + .description("The Entity URN to be unlinked") + .required(true) + .schema(new Schema().type(TYPE_STRING))); + + // Success response for DELETE + final ApiResponse successDeleteResponse = + new ApiResponse() + .description("Successfully unlinked entity from version set") + .content(new Content().addMediaType("application/json", new MediaType())); + + // DELETE operation + final Operation deleteOperation = + new Operation() + .summary("Unlink an entity from a version set") + .description("Removes the version relationship between an entity and a version set") + .tags(List.of("Version Relationships")) + .parameters(parameters) + .responses( + new ApiResponses() + .addApiResponse("200", successDeleteResponse) + .addApiResponse( + "404", new ApiResponse().description("Version Set or Entity not found"))); + + // Success response for POST + final ApiResponse successPostResponse = + new ApiResponse() + .description("Successfully linked entity to version set") + .content( + new Content() + .addMediaType( + "application/json", + new MediaType() + .schema( + new Schema<>() + .$ref( + String.format( + "#/components/schemas/%s%s", + toUpperFirst(VERSION_PROPERTIES_ASPECT_NAME), + ASPECT_RESPONSE_SUFFIX))))); + + // Request body for POST + final RequestBody requestBody = + new RequestBody() + .description("Version properties for the link operation") + .required(true) + .content( + new Content() + .addMediaType( + "application/json", + new MediaType().schema(buildVersionPropertiesRequestSchema()))); + + // POST operation + final Operation postOperation = + new Operation() + .summary("Link an entity to a version set") + .description("Creates a version relationship between an entity and a version set") + .tags(List.of("Version Relationships")) + .parameters(parameters) + .requestBody(requestBody) + .responses( + new ApiResponses() + .addApiResponse("201", successPostResponse) + .addApiResponse( + "404", new ApiResponse().description("Version Set or Entity not found"))); + + return result.delete(deleteOperation).post(postOperation); + } } diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java index af13cd3aab0510..a4583082d57c7f 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java @@ -1,7 +1,9 @@ package io.datahubproject.openapi.v3.controller; +import static com.linkedin.metadata.Constants.VERSION_SET_ENTITY_NAME; import static com.linkedin.metadata.aspect.validation.ConditionalWriteValidator.HTTP_HEADER_IF_VERSION_MATCH; import static com.linkedin.metadata.authorization.ApiOperation.READ; +import static com.linkedin.metadata.authorization.ApiOperation.UPDATE; import com.datahub.authentication.Actor; import com.datahub.authentication.Authentication; @@ -11,22 +13,28 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; +import com.google.common.collect.ImmutableSet; import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.ByteString; import com.linkedin.data.template.SetMode; import com.linkedin.data.template.StringMap; import com.linkedin.entity.EnvelopedAspect; import com.linkedin.events.metadata.ChangeType; +import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.batch.AspectsBatch; import com.linkedin.metadata.aspect.batch.BatchItem; import com.linkedin.metadata.aspect.batch.ChangeMCP; import com.linkedin.metadata.entity.EntityApiUtils; import com.linkedin.metadata.entity.IngestResult; +import com.linkedin.metadata.entity.RollbackResult; import com.linkedin.metadata.entity.UpdateAspectResult; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; import com.linkedin.metadata.entity.ebean.batch.ProposedItem; +import com.linkedin.metadata.entity.versioning.EntityVersioningService; +import com.linkedin.metadata.entity.versioning.VersionPropertiesInput; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.query.filter.SortCriterion; @@ -71,9 +79,12 @@ import javax.annotation.Nullable; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; import org.springframework.http.MediaType; import org.springframework.http.ResponseEntity; import org.springframework.util.CollectionUtils; +import org.springframework.web.bind.annotation.DeleteMapping; +import org.springframework.web.bind.annotation.PathVariable; import org.springframework.web.bind.annotation.PostMapping; import org.springframework.web.bind.annotation.RequestBody; import org.springframework.web.bind.annotation.RequestMapping; @@ -89,6 +100,9 @@ public class EntityController extends GenericEntitiesController< GenericAspectV3, GenericEntityV3, GenericEntityScrollResultV3> { + @Autowired private final EntityVersioningService entityVersioningService; + @Autowired private final ConfigurationProvider configurationProvider; + @Tag(name = "Generic Entities") @PostMapping(value = "/{entityName}/batchGet", produces = MediaType.APPLICATION_JSON_VALUE) @Operation(summary = "Get a batch of entities") @@ -222,6 +236,111 @@ public ResponseEntity scrollEntities( entityAspectsBody.getAspects() != null)); } + @Tag(name = "EntityVersioning") + @PostMapping( + value = "/versioning/{versionSetUrn}/relationship/versionOf/{entityUrn}", + produces = MediaType.APPLICATION_JSON_VALUE) + @Operation(summary = "Link an Entity to a Version Set as the latest version") + public ResponseEntity> linkLatestVersion( + HttpServletRequest request, + @PathVariable("versionSetUrn") String versionSetUrnString, + @PathVariable("entityUrn") String entityUrnString, + @RequestParam(value = "systemMetadata", required = false, defaultValue = "false") + Boolean withSystemMetadata, + @RequestBody @Nonnull VersionPropertiesInput versionPropertiesInput) + throws URISyntaxException, JsonProcessingException { + + if (!configurationProvider.getFeatureFlags().isEntityVersioning()) { + throw new IllegalAccessError( + "Entity Versioning is not configured, please enable before attempting to use this feature."); + } + Authentication authentication = AuthenticationContext.getAuthentication(); + Urn versionSetUrn = UrnUtils.getUrn(versionSetUrnString); + if (!VERSION_SET_ENTITY_NAME.equals(versionSetUrn.getEntityType())) { + throw new IllegalArgumentException( + String.format("Version Set urn %s must be of type Version Set.", versionSetUrnString)); + } + Urn entityUrn = UrnUtils.getUrn(entityUrnString); + OperationContext opContext = + OperationContext.asSession( + systemOperationContext, + RequestContext.builder() + .buildOpenapi( + authentication.getActor().toUrnStr(), + request, + "linkLatestVersion", + ImmutableSet.of(entityUrn.getEntityType(), versionSetUrn.getEntityType())), + authorizationChain, + authentication, + true); + if (!AuthUtil.isAPIAuthorizedEntityUrns( + opContext, UPDATE, ImmutableSet.of(versionSetUrn, entityUrn))) { + throw new UnauthorizedException( + String.format( + "%s is unauthorized to %s entities %s and %s", + authentication.getActor().toUrnStr(), UPDATE, versionSetUrnString, entityUrnString)); + } + + return ResponseEntity.ok( + buildEntityList( + opContext, + entityVersioningService.linkLatestVersion( + opContext, versionSetUrn, entityUrn, versionPropertiesInput), + false)); + } + + @Tag(name = "EntityVersioning") + @DeleteMapping( + value = "/versioning/{versionSetUrn}/relationship/versionOf/{entityUrn}", + produces = MediaType.APPLICATION_JSON_VALUE) + @Operation(summary = "Unlink the latest linked version of an entity") + public ResponseEntity> unlinkVersion( + HttpServletRequest request, + @PathVariable("versionSetUrn") String versionSetUrnString, + @PathVariable("entityUrn") String entityUrnString, + @RequestParam(value = "systemMetadata", required = false, defaultValue = "false") + Boolean withSystemMetadata) + throws URISyntaxException, JsonProcessingException { + + if (!configurationProvider.getFeatureFlags().isEntityVersioning()) { + throw new IllegalAccessError( + "Entity Versioning is not configured, please enable before attempting to use this feature."); + } + Authentication authentication = AuthenticationContext.getAuthentication(); + Urn versionSetUrn = UrnUtils.getUrn(versionSetUrnString); + if (!VERSION_SET_ENTITY_NAME.equals(versionSetUrn.getEntityType())) { + throw new IllegalArgumentException( + String.format("Version Set urn %s must be of type Version Set.", versionSetUrnString)); + } + Urn entityUrn = UrnUtils.getUrn(entityUrnString); + OperationContext opContext = + OperationContext.asSession( + systemOperationContext, + RequestContext.builder() + .buildOpenapi( + authentication.getActor().toUrnStr(), + request, + "unlinkVersion", + ImmutableSet.of(entityUrn.getEntityType(), versionSetUrn.getEntityType())), + authorizationChain, + authentication, + true); + if (!AuthUtil.isAPIAuthorizedEntityUrns( + opContext, UPDATE, ImmutableSet.of(versionSetUrn, entityUrn))) { + throw new UnauthorizedException( + String.format( + "%s is unauthorized to %s entities %s and %s", + authentication.getActor().toUrnStr(), UPDATE, versionSetUrnString, entityUrnString)); + } + List rollbackResults = + entityVersioningService.unlinkVersion(opContext, versionSetUrn, entityUrn); + + return ResponseEntity.ok( + rollbackResults.stream() + .map(rollbackResult -> rollbackResult.getUrn().toString()) + .collect(Collectors.toList())); + } + @Override public GenericEntityScrollResultV3 buildScrollResult( @Nonnull OperationContext opContext, @@ -361,7 +480,10 @@ protected List buildEntityList( .auditStamp( withSystemMetadata ? ingest.getRequest().getAuditStamp() : null) .build())) - .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + // Map merge strategy, just take latest one + .collect( + Collectors.toMap( + Map.Entry::getKey, Map.Entry::getValue, (value1, value2) -> value2)); responseList.add( GenericEntityV3.builder().build(objectMapper, urnAspects.getKey(), aspectsMap)); } diff --git a/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/OpenAPIV3GeneratorTest.java b/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/OpenAPIV3GeneratorTest.java index e1568017156d9b..d8f04b60455abb 100644 --- a/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/OpenAPIV3GeneratorTest.java +++ b/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/OpenAPIV3GeneratorTest.java @@ -6,6 +6,8 @@ import static org.testng.Assert.assertTrue; import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; +import com.linkedin.datahub.graphql.featureflags.FeatureFlags; +import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.metadata.models.registry.ConfigEntityRegistry; import io.swagger.v3.core.util.Yaml; import io.swagger.v3.oas.models.OpenAPI; @@ -36,8 +38,10 @@ public void testOpenApiSpecBuilder() throws Exception { OpenAPIV3GeneratorTest.class .getClassLoader() .getResourceAsStream("entity-registry.yml")); + ConfigurationProvider configurationProvider = new ConfigurationProvider(); + configurationProvider.setFeatureFlags(new FeatureFlags()); - OpenAPI openAPI = OpenAPIV3Generator.generateOpenApiSpec(er); + OpenAPI openAPI = OpenAPIV3Generator.generateOpenApiSpec(er, configurationProvider); String openapiYaml = Yaml.pretty(openAPI); Files.write( Path.of(getClass().getResource("/").getPath(), "open-api.yaml"), diff --git a/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/controller/EntityControllerTest.java b/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/controller/EntityControllerTest.java index 952dc31c5ba386..e82ab50a0defeb 100644 --- a/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/controller/EntityControllerTest.java +++ b/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/controller/EntityControllerTest.java @@ -33,9 +33,12 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.template.RecordTemplate; +import com.linkedin.datahub.graphql.featureflags.FeatureFlags; import com.linkedin.dataset.DatasetProfile; import com.linkedin.entity.Aspect; import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.gms.factory.config.ConfigurationProvider; +import com.linkedin.gms.factory.entity.versioning.EntityVersioningServiceFactory; import com.linkedin.metadata.aspect.batch.AspectsBatch; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.EntityServiceImpl; @@ -57,6 +60,7 @@ import io.datahubproject.openapi.config.SpringWebConfig; import io.datahubproject.openapi.exception.InvalidUrnException; import io.datahubproject.test.metadata.context.TestOperationContexts; +import jakarta.servlet.ServletException; import java.util.Collections; import java.util.List; import java.util.Map; @@ -81,7 +85,11 @@ @SpringBootTest(classes = {SpringWebConfig.class}) @ComponentScan(basePackages = {"io.datahubproject.openapi.v3.controller"}) -@Import({SpringWebConfig.class, EntityControllerTest.EntityControllerTestConfig.class}) +@Import({ + SpringWebConfig.class, + EntityControllerTest.EntityControllerTestConfig.class, + EntityVersioningServiceFactory.class +}) @AutoConfigureWebMvc @AutoConfigureMockMvc public class EntityControllerTest extends AbstractTestNGSpringContextTests { @@ -92,6 +100,7 @@ public class EntityControllerTest extends AbstractTestNGSpringContextTests { @Autowired private TimeseriesAspectService mockTimeseriesAspectService; @Autowired private EntityRegistry entityRegistry; @Autowired private OperationContext opContext; + @MockBean private ConfigurationProvider configurationProvider; @Test public void initTest() { @@ -431,4 +440,211 @@ public TimeseriesAspectService timeseriesAspectService() { return timeseriesAspectService; } } + + @Test + public void testGetEntityBatchWithMultipleEntities() throws Exception { + List TEST_URNS = + List.of( + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:testPlatform,1,PROD)"), + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:testPlatform,2,PROD)")); + + // Mock entity aspect response + when(mockEntityService.getEnvelopedVersionedAspects( + any(OperationContext.class), anyMap(), eq(false))) + .thenReturn( + Map.of( + TEST_URNS.get(0), + List.of( + new EnvelopedAspect() + .setName("status") + .setValue(new Aspect(new Status().data()))), + TEST_URNS.get(1), + List.of( + new EnvelopedAspect() + .setName("status") + .setValue(new Aspect(new Status().data()))))); + + String requestBody = + String.format( + "[{\"urn\": \"%s\"}, {\"urn\": \"%s\"}]", + TEST_URNS.get(0).toString(), TEST_URNS.get(1).toString()); + + mockMvc + .perform( + MockMvcRequestBuilders.post("/v3/entity/dataset/batchGet") + .content(requestBody) + .contentType(MediaType.APPLICATION_JSON) + .accept(MediaType.APPLICATION_JSON)) + .andExpect(status().is2xxSuccessful()) + .andExpect(MockMvcResultMatchers.jsonPath("$[0].urn").value(TEST_URNS.get(0).toString())) + .andExpect(MockMvcResultMatchers.jsonPath("$[1].urn").value(TEST_URNS.get(1).toString())); + } + + @Test(expectedExceptions = ServletException.class) + public void testGetEntityBatchWithInvalidUrn() throws Exception { + String requestBody = "[{\"urn\": \"invalid:urn\"}]"; + + mockMvc + .perform( + MockMvcRequestBuilders.post("/v3/entity/dataset/batchGet") + .content(requestBody) + .contentType(MediaType.APPLICATION_JSON) + .accept(MediaType.APPLICATION_JSON)) + .andExpect(status().is4xxClientError()); + } + + @Test + public void testScrollEntitiesWithMultipleSortFields() throws Exception { + List TEST_URNS = + List.of( + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:testPlatform,1,PROD)"), + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:testPlatform,2,PROD)")); + + ScrollResult expectedResult = + new ScrollResult() + .setEntities( + new SearchEntityArray( + List.of( + new SearchEntity().setEntity(TEST_URNS.get(0)), + new SearchEntity().setEntity(TEST_URNS.get(1))))); + + when(mockSearchService.scrollAcrossEntities( + any(OperationContext.class), + eq(List.of("dataset")), + anyString(), + nullable(Filter.class), + any(), + nullable(String.class), + nullable(String.class), + anyInt())) + .thenReturn(expectedResult); + + when(mockEntityService.getEnvelopedVersionedAspects( + any(OperationContext.class), anyMap(), eq(false))) + .thenReturn( + Map.of( + TEST_URNS.get(0), + List.of( + new EnvelopedAspect() + .setName("status") + .setValue(new Aspect(new Status().data()))))); + + mockMvc + .perform( + MockMvcRequestBuilders.post("/v3/entity/scroll") + .content("{\"entities\":[\"dataset\"]}") + .param("sortCriteria", "name", "urn") + .contentType(MediaType.APPLICATION_JSON) + .accept(MediaType.APPLICATION_JSON)) + .andExpect(status().is2xxSuccessful()) + .andExpect( + MockMvcResultMatchers.jsonPath("$.entities[0].urn").value(TEST_URNS.get(0).toString())); + } + + @Test + public void testScrollEntitiesWithPitKeepAlive() throws Exception { + List TEST_URNS = + List.of(UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:testPlatform,1,PROD)")); + + ScrollResult expectedResult = + new ScrollResult() + .setEntities( + new SearchEntityArray(List.of(new SearchEntity().setEntity(TEST_URNS.get(0))))) + .setScrollId("test-scroll-id"); + + when(mockSearchService.scrollAcrossEntities( + any(OperationContext.class), + eq(List.of("dataset")), + anyString(), + nullable(Filter.class), + any(), + nullable(String.class), + eq("10m"), + anyInt())) + .thenReturn(expectedResult); + + when(mockEntityService.getEnvelopedVersionedAspects( + any(OperationContext.class), anyMap(), eq(false))) + .thenReturn( + Map.of( + TEST_URNS.get(0), + List.of( + new EnvelopedAspect() + .setName("status") + .setValue(new Aspect(new Status().data()))))); + + mockMvc + .perform( + MockMvcRequestBuilders.post("/v3/entity/scroll") + .content("{\"entities\":[\"dataset\"]}") + .param("pitKeepAlive", "10m") + .contentType(MediaType.APPLICATION_JSON) + .accept(MediaType.APPLICATION_JSON)) + .andExpect(status().is2xxSuccessful()) + .andExpect(MockMvcResultMatchers.jsonPath("$.scrollId").value("test-scroll-id")); + } + + @Test(expectedExceptions = ServletException.class) + public void testEntityVersioningFeatureFlagDisabled() throws Exception { + Urn TEST_URN = UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:testPlatform,1,PROD)"); + Urn VERSION_SET_URN = UrnUtils.getUrn("urn:li:versionSet:test-version-set"); + + FeatureFlags mockFeatureFlags = mock(FeatureFlags.class); + when(configurationProvider.getFeatureFlags()).thenReturn(mockFeatureFlags); + when(mockFeatureFlags.isEntityVersioning()).thenReturn(false); + + // Test linking version with disabled flag + mockMvc + .perform( + MockMvcRequestBuilders.post( + String.format( + "/v3/entity/versioning/%s/relationship/versionOf/%s", + VERSION_SET_URN, TEST_URN)) + .content("{}") + .contentType(MediaType.APPLICATION_JSON) + .accept(MediaType.APPLICATION_JSON)) + .andExpect(status().is4xxClientError()); + + // Test unlinking version with disabled flag + mockMvc + .perform( + MockMvcRequestBuilders.delete( + String.format( + "/v3/entity/versioning/%s/relationship/versionOf/%s", + VERSION_SET_URN, TEST_URN)) + .accept(MediaType.APPLICATION_JSON)) + .andExpect(status().is4xxClientError()); + } + + @Test(expectedExceptions = ServletException.class) + public void testInvalidVersionSetUrn() throws Exception { + Urn TEST_URN = UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:testPlatform,1,PROD)"); + String INVALID_VERSION_SET_URN = "urn:li:dataset:invalid-version-set"; + + FeatureFlags mockFeatureFlags = mock(FeatureFlags.class); + when(configurationProvider.getFeatureFlags()).thenReturn(mockFeatureFlags); + when(mockFeatureFlags.isEntityVersioning()).thenReturn(true); + + // Test linking with invalid version set URN + mockMvc + .perform( + MockMvcRequestBuilders.post( + String.format( + "/v3/entity/versioning/%s/relationship/versionOf/%s", + INVALID_VERSION_SET_URN, TEST_URN)) + .content("{}") + .contentType(MediaType.APPLICATION_JSON) + .accept(MediaType.APPLICATION_JSON)) + .andExpect(status().is4xxClientError()); + + // Test unlinking with invalid version set URN + mockMvc + .perform( + MockMvcRequestBuilders.delete( + String.format( + "/v3/entity/versioning/%s/relationship/versionOf/%s", + INVALID_VERSION_SET_URN, TEST_URN)) + .accept(MediaType.APPLICATION_JSON)) + .andExpect(status().is4xxClientError()); + } } diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json index 432c4a9ddcb73f..af11532ccf4ece 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json @@ -1382,6 +1382,10 @@ "name" : "versionTag", "type" : "string", "optional" : true + }, { + "name" : "metadataAttribution", + "type" : "MetadataAttribution", + "optional" : true } ] }, { "type" : "record", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json index 45e91873de10ff..f58d83dd1e5cb7 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json @@ -1409,6 +1409,10 @@ "name" : "versionTag", "type" : "string", "optional" : true + }, { + "name" : "metadataAttribution", + "type" : "MetadataAttribution", + "optional" : true } ] }, "com.linkedin.common.fieldtransformer.TransformationType", "com.linkedin.common.fieldtransformer.UDFTransformer", { "type" : "record", @@ -6139,6 +6143,12 @@ "doc" : "Include default facets when getting facets to aggregate on in search requests.\nBy default we include these, but custom aggregation requests don't need them.", "default" : true, "optional" : true + }, { + "name" : "filterNonLatestVersions", + "type" : "boolean", + "doc" : "Include only latest versions in version sets, default true", + "default" : true, + "optional" : true } ] }, { "type" : "enum", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json index 9061cbff188135..61c31f93987b88 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json @@ -1115,6 +1115,10 @@ "name" : "versionTag", "type" : "string", "optional" : true + }, { + "name" : "metadataAttribution", + "type" : "MetadataAttribution", + "optional" : true } ] }, { "type" : "record", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json index e6be4e828c976f..75793be7331da4 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json @@ -1115,6 +1115,10 @@ "name" : "versionTag", "type" : "string", "optional" : true + }, { + "name" : "metadataAttribution", + "type" : "MetadataAttribution", + "optional" : true } ] }, { "type" : "record", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json index 10f3218d469757..58ba2ad05dfe74 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json @@ -1409,6 +1409,10 @@ "name" : "versionTag", "type" : "string", "optional" : true + }, { + "name" : "metadataAttribution", + "type" : "MetadataAttribution", + "optional" : true } ] }, "com.linkedin.common.fieldtransformer.TransformationType", "com.linkedin.common.fieldtransformer.UDFTransformer", { "type" : "record", diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/versioning/EntityVersioningService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/versioning/EntityVersioningService.java new file mode 100644 index 00000000000000..9e82efa913a98d --- /dev/null +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/versioning/EntityVersioningService.java @@ -0,0 +1,36 @@ +package com.linkedin.metadata.entity.versioning; + +import com.linkedin.common.urn.Urn; +import com.linkedin.metadata.entity.IngestResult; +import com.linkedin.metadata.entity.RollbackResult; +import io.datahubproject.metadata.context.OperationContext; +import java.util.List; + +public interface EntityVersioningService { + + /** + * Generates a new set of VersionProperties for the latest version and links it to the specified + * version set. If the specified version set does not yet exist, will create it. Order of + * operations here is important: 1. Create initial Version Set if necessary, do not generate + * Version Set Properties 2. Create Version Properties for specified entity. 3. Generate version + * properties with the properly set latest version Will eventually want to add in the scheme here + * as a parameter + * + * @return ingestResult -> the results of the ingested linked version + */ + List linkLatestVersion( + OperationContext opContext, + Urn versionSet, + Urn newLatestVersion, + VersionPropertiesInput inputProperties); + + /** + * Unlinks the latest version from a version set. Will attempt to set up the previous version as + * the new latest. This fully removes the version properties and unversions the specified entity. + * + * @param opContext operational context containing various information about the current execution + * @param currentLatest the currently linked latest versioned entity urn + * @return the deletion result + */ + List unlinkVersion(OperationContext opContext, Urn versionSet, Urn currentLatest); +} diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/versioning/VersionPropertiesInput.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/versioning/VersionPropertiesInput.java new file mode 100644 index 00000000000000..28c320ec717201 --- /dev/null +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/versioning/VersionPropertiesInput.java @@ -0,0 +1,20 @@ +package com.linkedin.metadata.entity.versioning; + +import com.fasterxml.jackson.annotation.JsonInclude; +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@JsonInclude(JsonInclude.Include.NON_NULL) +@NoArgsConstructor(force = true, access = AccessLevel.PRIVATE) +@AllArgsConstructor +public class VersionPropertiesInput { + private String comment; + private String version; + private Long sourceCreationTimestamp; + private String sourceCreator; +} diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java b/metadata-service/services/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java index 7e9d1701bf79a9..4cd9ec6c75b786 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java @@ -87,6 +87,14 @@ public static Filter newDisjunctiveFilter(@Nonnull Criterion... orCriterion) { .collect(Collectors.toCollection(ConjunctiveCriterionArray::new))); } + @Nonnull + public static Filter newConjunctiveFilter(@Nonnull Criterion... andCriterion) { + ConjunctiveCriterionArray orCriteria = new ConjunctiveCriterionArray(); + orCriteria.add( + new ConjunctiveCriterion().setAnd(new CriterionArray(Arrays.asList(andCriterion)))); + return new Filter().setOr(orCriteria); + } + @Nonnull public static ConjunctiveCriterion add( @Nonnull ConjunctiveCriterion conjunctiveCriterion, @Nonnull Criterion element) { diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java b/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java index 80a11ab98bbf4a..3c623f8df7c1bf 100644 --- a/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java +++ b/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java @@ -749,6 +749,14 @@ public class PoliciesConfig { EDIT_ENTITY_TAGS_PRIVILEGE, EDIT_ENTITY_GLOSSARY_TERMS_PRIVILEGE)); + // Version Set privileges + public static final ResourcePrivileges VERSION_SET_PRIVILEGES = + ResourcePrivileges.of( + "versionSet", + "Version Set", + "A logical collection of versioned entities.", + COMMON_ENTITY_PRIVILEGES); + public static final List ENTITY_RESOURCE_PRIVILEGES = ImmutableList.of( DATASET_PRIVILEGES, @@ -767,7 +775,8 @@ public class PoliciesConfig { DATA_PRODUCT_PRIVILEGES, ER_MODEL_RELATIONSHIP_PRIVILEGES, BUSINESS_ATTRIBUTE_PRIVILEGES, - STRUCTURED_PROPERTIES_PRIVILEGES); + STRUCTURED_PROPERTIES_PRIVILEGES, + VERSION_SET_PRIVILEGES); // Merge all entity specific resource privileges to create a superset of all resource privileges public static final ResourcePrivileges ALL_RESOURCE_PRIVILEGES = diff --git a/smoke-test/tests/entity_versioning/__init__.py b/smoke-test/tests/entity_versioning/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/smoke-test/tests/entity_versioning/test_versioning.py b/smoke-test/tests/entity_versioning/test_versioning.py new file mode 100644 index 00000000000000..c331cc5305a336 --- /dev/null +++ b/smoke-test/tests/entity_versioning/test_versioning.py @@ -0,0 +1,64 @@ +import pytest + + +@pytest.fixture(scope="module", autouse=True) +def test_link_unlink_version(auth_session): + """Fixture to execute setup before and tear down after all tests are run""" + res_data = link_version(auth_session) + + assert res_data + assert res_data["data"] + assert res_data["data"]["linkAssetVersion"] + assert ( + res_data["data"]["linkAssetVersion"] + == "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD)" + ) + + res_data = unlink_version(auth_session) + + assert res_data + assert res_data["data"] + assert res_data["data"]["unlinkAssetVersion"] + + +def link_version(auth_session): + json = { + "mutation": """mutation linkAssetVersion($input: LinkVersionInput!) {\n + linkAssetVersion(input: $input) + }\n + }""", + "variables": { + "input": { + "version": "1233456", + "versionSet": "urn:li:versionSet:(12345678910,dataset)", + "linkedEntity": "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD)", + } + }, + } + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=json + ) + response.raise_for_status() + + return response.json() + + +def unlink_version(auth_session): + json = { + "mutation": """mutation unlinkAssetVersion($input: UnlinkVersionInput!) {\n + unlinkAssetVersion(input: $input) + }\n + }""", + "variables": { + "input": { + "versionSet": "urn:li:versionSet:(12345678910,dataset)", + "unlinkedEntity": "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD)", + } + }, + } + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=json + ) + response.raise_for_status() + + return response.json() diff --git a/test-models/build.gradle b/test-models/build.gradle index e8733f0525870b..89bf4ec445440d 100644 --- a/test-models/build.gradle +++ b/test-models/build.gradle @@ -18,3 +18,4 @@ idea { } sourceSets.mainGeneratedDataTemplate.java.srcDirs('src/main/javaPegasus/') +spotlessJava.dependsOn generateTestDataTemplate \ No newline at end of file From b252f782c56327175a1a0bddc95b5c417db285a1 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Tue, 14 Jan 2025 17:04:49 -0800 Subject: [PATCH 04/48] feat(build): use remote gradle cache (#12344) --- settings.gradle | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/settings.gradle b/settings.gradle index 77d0706549a439..437a353f210ac4 100644 --- a/settings.gradle +++ b/settings.gradle @@ -79,6 +79,20 @@ include ':metadata-service:openapi-servlet:models' include ':metadata-integration:java:datahub-schematron:lib' include ':metadata-integration:java:datahub-schematron:cli' +buildCache { + def depotSecret = System.getenv('DEPOT_TOKEN'); + + remote(HttpBuildCache) { + url = 'https://cache.depot.dev' + enabled = depotSecret != null + push = true + credentials { + username = '' + password = depotSecret + } + } +} + def installPreCommitHooks() { def preCommitInstalled = false try { @@ -116,7 +130,7 @@ def installPreCommitHooks() { def stderr = new StringBuilder() installHooksProcess.waitForProcessOutput(stdout, stderr) if (installHooksProcess.exitValue() != 0) { - println "Failed to install hooks: ${stderr}" + println "Failed to install hooks: ${stdout}" return } println "Hooks output: ${stdout}" From a0575329848d65eafb455a3f400e8f47bc7e9bb7 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Tue, 14 Jan 2025 19:35:36 -0600 Subject: [PATCH 05/48] feat(docker-profiles): version mixing & docs (#12342) --- docker/build.gradle | 6 +----- docker/profiles/README.md | 28 +++++++++++++++++++++++++- docker/profiles/docker-compose.gms.yml | 16 +++++++-------- 3 files changed, 36 insertions(+), 14 deletions(-) diff --git a/docker/build.gradle b/docker/build.gradle index 576e47a53e6ef5..0070d814286cf0 100644 --- a/docker/build.gradle +++ b/docker/build.gradle @@ -42,7 +42,6 @@ ext { modules: python_services_modules + backend_profile_modules + [':datahub-frontend'], isDebug: true ], - 'quickstartDebugConsumers': [ profile: 'debug-consumers', modules: python_services_modules + backend_profile_modules + [':datahub-frontend', @@ -50,7 +49,6 @@ ext { ':metadata-jobs:mae-consumer-job'], isDebug: true ], - 'quickstartPg': [ profile: 'quickstart-postgres', modules: (backend_profile_modules - [':docker:mysql-setup']) + [ @@ -108,9 +106,7 @@ dockerCompose { } // Common environment variables - environment.put 'DATAHUB_VERSION', config.isDebug ? - System.getenv("DATAHUB_VERSION") ?: "v${version}" : - "v${version}" + environment.put 'DATAHUB_VERSION', System.getenv("DATAHUB_VERSION") ?: "v${version}" environment.put 'DATAHUB_TELEMETRY_ENABLED', 'false' environment.put "METADATA_TESTS_ENABLED", "true" environment.put "DATAHUB_REPO", "${docker_registry}" diff --git a/docker/profiles/README.md b/docker/profiles/README.md index fb3c9e3c84a7a2..192fde3130a895 100644 --- a/docker/profiles/README.md +++ b/docker/profiles/README.md @@ -101,4 +101,30 @@ Runs everything except for the GMS. Useful for running just a local (non-docker) | debug-cassandra | | | X | | X | X | X | X | | | X | X | | | debug-consumers | X | | | | X | X | X | X | X | X | X | X | | | debug-neo4j | X | | | X | X | X | X | X | | | X | X | | -| debug-elasticsearch | X | | | | X | X | X | X | | | X | | X | \ No newline at end of file +| debug-elasticsearch | X | | | | X | X | X | X | | | X | | X | + +## Advanced Setups + +### Version Mixing + +In some cases, it might be useful to debug upgrade scenarios where there are intentional version miss-matches. It is possible +to override individual component versions. + +Note: This only works for `non-debug` profiles because of the file mounts when in `debug` which would run older containers +but still pickup the latest application jars. + +In this example we are interested in upgrading two components (the `mae-consumer` and the `mce-consumer`) to a fresh build `v0.15.1-SNAPSHOT` +while maintaining older components on `v0.14.1` (especially the `system-update` container). + +This configuration reproduces the situation where the consumers were upgraded prior to running the latest version of `system-update`. In this +scenario we expect the consumers to block their startup waiting for the successful completion of a newer `system-update`. + +`DATAHUB_VERSION` - specifies the default component version of `v0.14.1` +`DATAHUB_MAE_VERSION` - specifies an override of just the `mae-consumer` to version `v0.15.1-SNAPSHOT`[1] +`DATAHUB_MCE_VERSION` - specifies an override of just the `mce-consumer` to version `v0.15.1-SNAPSHOT`[1] + +```shell + DATAHUB_MAE_VERSION="v0.15.1-SNAPSHOT" DATAHUB_MCE_VERSION="v0.15.1-SNAPSHOT" DATAHUB_VERSION="v0.14.1" ./gradlew quickstart +``` + +[1] Image versions were `v0.15.1-SNAPSHOT` built locally prior to running the command. diff --git a/docker/profiles/docker-compose.gms.yml b/docker/profiles/docker-compose.gms.yml index ada7df51e20bef..2147d6b5a0247f 100644 --- a/docker/profiles/docker-compose.gms.yml +++ b/docker/profiles/docker-compose.gms.yml @@ -54,7 +54,7 @@ x-datahub-dev-telemetry-env: &datahub-dev-telemetry-env ################################# x-datahub-system-update-service: &datahub-system-update-service hostname: datahub-system-update - image: ${DATAHUB_UPGRADE_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-upgrade}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_UPGRADE_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-upgrade}:${DATAHUB_UPDATE_VERSION:-${DATAHUB_VERSION:-head}} command: - -u - SystemUpdate @@ -73,7 +73,7 @@ x-datahub-system-update-service: &datahub-system-update-service x-datahub-system-update-service-dev: &datahub-system-update-service-dev <<: *datahub-system-update-service - image: ${DATAHUB_UPGRADE_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-upgrade}:${DATAHUB_VERSION:-debug} + image: ${DATAHUB_UPGRADE_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-upgrade}:${DATAHUB_UPDATE_VERSION:-${DATAHUB_VERSION:-debug}} ports: - ${DATAHUB_MAPPED_UPGRADE_DEBUG_PORT:-5003}:5003 environment: &datahub-system-update-dev-env @@ -92,7 +92,7 @@ x-datahub-system-update-service-dev: &datahub-system-update-service-dev ################################# x-datahub-gms-service: &datahub-gms-service hostname: datahub-gms - image: ${DATAHUB_GMS_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-gms}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_GMS_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-gms}:${DATAHUB_GMS_VERSION:-${DATAHUB_VERSION:-head}} ports: - ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080 env_file: @@ -118,7 +118,7 @@ x-datahub-gms-service: &datahub-gms-service x-datahub-gms-service-dev: &datahub-gms-service-dev <<: *datahub-gms-service - image: ${DATAHUB_GMS_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-gms}:${DATAHUB_VERSION:-debug} + image: ${DATAHUB_GMS_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-gms}:${DATAHUB_GMS_VERSION:-${DATAHUB_VERSION:-debug}} ports: - ${DATAHUB_MAPPED_GMS_DEBUG_PORT:-5001}:5001 - ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080 @@ -150,7 +150,7 @@ x-datahub-gms-service-dev: &datahub-gms-service-dev ################################# x-datahub-mae-consumer-service: &datahub-mae-consumer-service hostname: datahub-mae-consumer - image: ${DATAHUB_MAE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mae-consumer}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_MAE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mae-consumer}:${DATAHUB_MAE_VERSION:-${DATAHUB_VERSION:-head}} ports: - 9091:9091 env_file: @@ -163,7 +163,7 @@ x-datahub-mae-consumer-service: &datahub-mae-consumer-service x-datahub-mae-consumer-service-dev: &datahub-mae-consumer-service-dev <<: *datahub-mae-consumer-service - image: ${DATAHUB_MAE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mae-consumer}:${DATAHUB_VERSION:-debug} + image: ${DATAHUB_MAE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mae-consumer}:${DATAHUB_MAE_VERSION:-${DATAHUB_VERSION:-debug}} environment: <<: [*datahub-dev-telemetry-env, *datahub-mae-consumer-env] ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true} @@ -178,7 +178,7 @@ x-datahub-mae-consumer-service-dev: &datahub-mae-consumer-service-dev ################################# x-datahub-mce-consumer-service: &datahub-mce-consumer-service hostname: datahub-mce-consumer - image: ${DATAHUB_MCE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mce-consumer}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_MCE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mce-consumer}:${DATAHUB_MCE_VERSION:-${DATAHUB_VERSION:-head}} ports: - 9090:9090 env_file: @@ -193,7 +193,7 @@ x-datahub-mce-consumer-service: &datahub-mce-consumer-service x-datahub-mce-consumer-service-dev: &datahub-mce-consumer-service-dev <<: *datahub-mce-consumer-service - image: ${DATAHUB_MCE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mce-consumer}:${DATAHUB_VERSION:-debug} + image: ${DATAHUB_MCE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mce-consumer}:${DATAHUB_MCE_VERSION:-${DATAHUB_VERSION:-debug}} environment: <<: [*datahub-dev-telemetry-env, *datahub-mce-consumer-env] ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true} From 3905c8ee4146c93a06653dbcd690775ae36bef0f Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Tue, 14 Jan 2025 19:36:02 -0600 Subject: [PATCH 06/48] docs(async-api): addition to known issues (#12339) --- docs/how/updating-datahub.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index 68b41c907c6ad6..eb5a792216d981 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -1,3 +1,8 @@ +# Known Issues + +- Async APIs - DataHub's asynchronous APIs perform only basic schema validation when receiving MCP requests, similar to direct production to MCP Kafka topics. While requests must conform to the MCP schema to be accepted, actual processing happens later in the pipeline. Any processing failures that occur after the initial acceptance are captured in the Failed MCP topic, but these failures are not immediately surfaced to the API caller since they happen asynchronously. + + # Updating DataHub

Watch Metadata & AI Summit sessions on-demand.

Watch Now', + '

Learn about DataHub 1.0 launching at our 5th birthday party!

Register
', backgroundColor: "#111", textColor: "#ffffff", isCloseable: false, diff --git a/docs-website/src/components/SolutionsDropdown/SolutionsDropdownContent/solutionsDropdownContent.js b/docs-website/src/components/SolutionsDropdown/SolutionsDropdownContent/solutionsDropdownContent.js index abede0f11735d4..ad7278a438cf81 100644 --- a/docs-website/src/components/SolutionsDropdown/SolutionsDropdownContent/solutionsDropdownContent.js +++ b/docs-website/src/components/SolutionsDropdown/SolutionsDropdownContent/solutionsDropdownContent.js @@ -24,7 +24,7 @@ const solutionsDropdownContent = { title: "DataHub Core", description: "Get started with the Open Source platform.", iconImage: "/img/solutions/icon-dropdown-core.png", - href: "/", + href: "/docs/quickstart", }, { title: "Cloud vs Core", From 3084147df9fe21cbab3d80eb029a14c45376925e Mon Sep 17 00:00:00 2001 From: rharisi Date: Fri, 17 Jan 2025 06:28:06 +0530 Subject: [PATCH 26/48] fix(pdl): Add Dataplatform Instance urn pdl file (#11754) Co-authored-by: John Joyce --- .../common/urn/DataPlatformInstanceUrn.java | 79 +++++++++++++++++++ .../common/DataPlatformInstanceUrn.pdl | 27 +++++++ 2 files changed, 106 insertions(+) create mode 100644 li-utils/src/main/javaPegasus/com/linkedin/common/urn/DataPlatformInstanceUrn.java create mode 100644 li-utils/src/main/pegasus/com/linkedin/common/DataPlatformInstanceUrn.pdl diff --git a/li-utils/src/main/javaPegasus/com/linkedin/common/urn/DataPlatformInstanceUrn.java b/li-utils/src/main/javaPegasus/com/linkedin/common/urn/DataPlatformInstanceUrn.java new file mode 100644 index 00000000000000..dfce6dc1e51085 --- /dev/null +++ b/li-utils/src/main/javaPegasus/com/linkedin/common/urn/DataPlatformInstanceUrn.java @@ -0,0 +1,79 @@ +package com.linkedin.common.urn; + +import com.linkedin.data.template.Custom; +import com.linkedin.data.template.DirectCoercer; +import com.linkedin.data.template.TemplateOutputCastException; +import java.net.URISyntaxException; + +public final class DataPlatformInstanceUrn extends Urn { + + public static final String ENTITY_TYPE = "dataPlatformInstance"; + + private final DataPlatformUrn _platform; + private final String _instanceId; + + public DataPlatformInstanceUrn(DataPlatformUrn platform, String instanceId) { + super(ENTITY_TYPE, TupleKey.create(platform, instanceId)); + this._platform = platform; + this._instanceId = instanceId; + } + + public DataPlatformUrn getPlatformEntity() { + return _platform; + } + + public String getInstance() { + return _instanceId; + } + + public static DataPlatformInstanceUrn createFromString(String rawUrn) throws URISyntaxException { + return createFromUrn(Urn.createFromString(rawUrn)); + } + + public static DataPlatformInstanceUrn createFromUrn(Urn urn) throws URISyntaxException { + if (!"li".equals(urn.getNamespace())) { + throw new URISyntaxException(urn.toString(), "Urn namespace type should be 'li'."); + } else if (!ENTITY_TYPE.equals(urn.getEntityType())) { + throw new URISyntaxException( + urn.toString(), "Urn entity type should be 'dataPlatformInstance'."); + } else { + TupleKey key = urn.getEntityKey(); + if (key.size() != 2) { + throw new URISyntaxException(urn.toString(), "Invalid number of keys."); + } else { + try { + return new DataPlatformInstanceUrn( + (DataPlatformUrn) key.getAs(0, DataPlatformUrn.class), + (String) key.getAs(1, String.class)); + } catch (Exception e) { + throw new URISyntaxException(urn.toString(), "Invalid URN Parameter: '" + e.getMessage()); + } + } + } + } + + public static DataPlatformInstanceUrn deserialize(String rawUrn) throws URISyntaxException { + return createFromString(rawUrn); + } + + static { + Custom.initializeCustomClass(DataPlatformUrn.class); + Custom.initializeCustomClass(DataPlatformInstanceUrn.class); + Custom.registerCoercer( + new DirectCoercer() { + public Object coerceInput(DataPlatformInstanceUrn object) throws ClassCastException { + return object.toString(); + } + + public DataPlatformInstanceUrn coerceOutput(Object object) + throws TemplateOutputCastException { + try { + return DataPlatformInstanceUrn.createFromString((String) object); + } catch (URISyntaxException e) { + throw new TemplateOutputCastException("Invalid URN syntax: " + e.getMessage(), e); + } + } + }, + DataPlatformInstanceUrn.class); + } +} diff --git a/li-utils/src/main/pegasus/com/linkedin/common/DataPlatformInstanceUrn.pdl b/li-utils/src/main/pegasus/com/linkedin/common/DataPlatformInstanceUrn.pdl new file mode 100644 index 00000000000000..168e0ee7611d31 --- /dev/null +++ b/li-utils/src/main/pegasus/com/linkedin/common/DataPlatformInstanceUrn.pdl @@ -0,0 +1,27 @@ +namespace com.linkedin.common + +/** + * Standardized dataset identifier. + */ +@java.class = "com.linkedin.common.urn.DataPlatformInstanceUrn" +@validate.`com.linkedin.common.validator.TypedUrnValidator` = { + "accessible" : true, + "owningTeam" : "urn:li:internalTeam:datahub", + "entityType" : "dataPlatformInstance", + "constructable" : true, + "namespace" : "li", + "name" : "DataPlatformInstance", + "doc" : "Standardized data platform instance identifier.", + "owners" : [ "urn:li:corpuser:fbar", "urn:li:corpuser:bfoo" ], + "fields" : [ { + "type" : "com.linkedin.common.urn.DataPlatformUrn", + "name" : "platform", + "doc" : "Standardized platform urn." + }, { + "name" : "instance", + "doc" : "Instance of the data platform (e.g. db instance)", + "type" : "string", + } ], + "maxLength" : 100 +} +typeref DataPlatformInstanceUrn = string From 4a1fff56c695400f1ed57caf8f1d4a4c7ea2809c Mon Sep 17 00:00:00 2001 From: Meenakshi Kamalaseshan Radha <62914384+mkamalas@users.noreply.github.com> Date: Fri, 17 Jan 2025 06:28:43 +0530 Subject: [PATCH 27/48] feat(ui-plugin) - Allow custom userContext states to be added (#12057) --- datahub-web-react/src/app/context/CustomUserContext.tsx | 7 +++++++ datahub-web-react/src/app/context/userContext.tsx | 3 +++ 2 files changed, 10 insertions(+) create mode 100644 datahub-web-react/src/app/context/CustomUserContext.tsx diff --git a/datahub-web-react/src/app/context/CustomUserContext.tsx b/datahub-web-react/src/app/context/CustomUserContext.tsx new file mode 100644 index 00000000000000..016bbe29684ea5 --- /dev/null +++ b/datahub-web-react/src/app/context/CustomUserContext.tsx @@ -0,0 +1,7 @@ +/** + * Custom User Context State - This is a custom user context state and can be overriden in specific fork of DataHub. + * The below type can be customized with specific object properties as well if needed. + */ +export type CustomUserContextState = Record; + +export const DEFAULT_CUSTOM_STATE: CustomUserContextState = {}; diff --git a/datahub-web-react/src/app/context/userContext.tsx b/datahub-web-react/src/app/context/userContext.tsx index c9b8adafd9722f..a728e01ddc29ae 100644 --- a/datahub-web-react/src/app/context/userContext.tsx +++ b/datahub-web-react/src/app/context/userContext.tsx @@ -1,5 +1,6 @@ import React from 'react'; import { CorpUser, PlatformPrivileges } from '../../types.generated'; +import { CustomUserContextState, DEFAULT_CUSTOM_STATE } from './CustomUserContext'; /** * Local State is persisted to local storage. @@ -22,6 +23,7 @@ export type State = { loadedPersonalDefaultViewUrn: boolean; hasSetDefaultView: boolean; }; + customState?: CustomUserContextState; }; /** @@ -51,6 +53,7 @@ export const DEFAULT_STATE: State = { loadedPersonalDefaultViewUrn: false, hasSetDefaultView: false, }, + customState: DEFAULT_CUSTOM_STATE, }; export const DEFAULT_CONTEXT = { From fb08919f0457f45c8654c5882a6e5079bed9dac7 Mon Sep 17 00:00:00 2001 From: Deepali Jain <70557997+Deepalijain13@users.noreply.github.com> Date: Fri, 17 Jan 2025 06:30:59 +0530 Subject: [PATCH 28/48] feat(ui): Enhancements to the user pic list selection within entities (#11803) --- .../sidebar/Ownership/EditOwnersModal.tsx | 21 +++++++++++++++++-- .../src/app/shared/OwnerLabel.tsx | 12 +++++++++-- datahub-web-react/src/graphql/search.graphql | 4 ++++ 3 files changed, 33 insertions(+), 4 deletions(-) diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/EditOwnersModal.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/EditOwnersModal.tsx index 62b967e8f7b30d..e57666471df1a6 100644 --- a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/EditOwnersModal.tsx +++ b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/EditOwnersModal.tsx @@ -78,10 +78,26 @@ export const EditOwnersModal = ({ const renderSearchResult = (entity: Entity) => { const avatarUrl = (entity.type === EntityType.CorpUser && (entity as CorpUser).editableProperties?.pictureLink) || undefined; + const corpUserDepartmentName = + (entity.type === EntityType.CorpUser && (entity as CorpUser).properties?.departmentName) || ''; + const corpUserId = (entity.type === EntityType.CorpUser && (entity as CorpUser).username) || ''; + const corpUserTitle = (entity.type === EntityType.CorpUser && (entity as CorpUser).properties?.title) || ''; const displayName = entityRegistry.getDisplayName(entity.type, entity); + return ( - - + } + > + ); }; @@ -381,6 +397,7 @@ export const EditOwnersModal = ({ value: owner.value.ownerUrn, label: owner.label, }))} + optionLabelProp="label" > {ownerSearchOptions} diff --git a/datahub-web-react/src/app/shared/OwnerLabel.tsx b/datahub-web-react/src/app/shared/OwnerLabel.tsx index de3c03dea2ba4a..fb670aa56d7881 100644 --- a/datahub-web-react/src/app/shared/OwnerLabel.tsx +++ b/datahub-web-react/src/app/shared/OwnerLabel.tsx @@ -20,14 +20,22 @@ type Props = { name: string; avatarUrl: string | undefined; type: EntityType; + corpUserId?: string; + corpUserTitle?: string; + corpUserDepartmentName?: string; }; -export const OwnerLabel = ({ name, avatarUrl, type }: Props) => { +export const OwnerLabel = ({ name, avatarUrl, type, corpUserId, corpUserTitle, corpUserDepartmentName }: Props) => { + const subHeader = [corpUserId, corpUserTitle, corpUserDepartmentName].filter(Boolean).join(' - '); + return ( -
{name}
+
+
{name}
+ {subHeader &&
{subHeader}
} +
); diff --git a/datahub-web-react/src/graphql/search.graphql b/datahub-web-react/src/graphql/search.graphql index 72e7d347187828..de7d1befd39b08 100644 --- a/datahub-web-react/src/graphql/search.graphql +++ b/datahub-web-react/src/graphql/search.graphql @@ -433,6 +433,8 @@ fragment searchResultsWithoutSchemaField on Entity { lastName fullName email + departmentName + title } info { active @@ -442,6 +444,8 @@ fragment searchResultsWithoutSchemaField on Entity { lastName fullName email + departmentName + title } editableProperties { displayName From 825309ef5d172e38f69c382b712b5122c8d97656 Mon Sep 17 00:00:00 2001 From: BHADHRINATH U <91546378+Bhadhri03@users.noreply.github.com> Date: Fri, 17 Jan 2025 06:38:55 +0530 Subject: [PATCH 29/48] Fix(UI): Move setUpdatedName call inside updateName promise in Dataset name edit (#12232) Co-authored-by: Raj Tekal --- .../app/entity/shared/containers/profile/header/EntityName.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityName.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityName.tsx index 8976629d9ef0b1..549724bd1945d9 100644 --- a/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityName.tsx +++ b/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityName.tsx @@ -48,9 +48,9 @@ function EntityName(props: Props) { setIsEditing(false); return; } - setUpdatedName(name); updateName({ variables: { input: { name, urn } } }) .then(() => { + setUpdatedName(name); setIsEditing(false); message.success({ content: 'Name Updated', duration: 2 }); refetch(); From 99ce309b3780ca2c767c9a1e5e498b58d4e9c89e Mon Sep 17 00:00:00 2001 From: Dmitry Bryazgin <58312247+bda618@users.noreply.github.com> Date: Thu, 16 Jan 2025 20:10:03 -0500 Subject: [PATCH 30/48] feat(datahub) Remove serialVersionUID from constructor (#12150) --- .../datahub/graphql/authorization/AuthorizationUtils.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java index c25d6af75fe76d..29d1c02dacb416 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java @@ -232,6 +232,10 @@ public static T restrictEntity(@Nonnull Object entity, Class clazz) { try { Object[] args = allFields.stream() + // New versions of graphql.codegen generate serialVersionUID + // We need to filter serialVersionUID out because serialVersionUID is + // never part of the entity type constructor + .filter(field -> !field.getName().contains("serialVersionUID")) .map( field -> { // properties are often not required but only because From 05ed277f5e3abd72d21dd836808c762b5c852ebe Mon Sep 17 00:00:00 2001 From: Andrew Sikowitz Date: Thu, 16 Jan 2025 22:56:36 -0800 Subject: [PATCH 31/48] feat(graphql/versioning): Add versioning support to graphql; mutations return version set (#12358) Co-authored-by: RyanHolstien --- .../linkedin/datahub/graphql/Constants.java | 1 + .../datahub/graphql/GmsGraphQLEngine.java | 53 +++- .../resolvers/config/AppConfigResolver.java | 1 + .../versioning/LinkAssetVersionResolver.java | 38 ++- .../UnlinkAssetVersionResolver.java | 19 +- .../graphql/resolvers/search/SearchUtils.java | 107 +++++++ .../versioning/VersionsSearchResolver.java | 87 ++++++ .../mappers/SearchFlagsInputMapper.java | 3 + .../common/mappers/UrnToEntityMapper.java | 6 + .../graphql/types/dataset/DatasetType.java | 3 +- .../types/dataset/mappers/DatasetMapper.java | 7 + .../types/mlmodel/mappers/MLModelMapper.java | 7 + .../versioning/VersionPropertiesMapper.java | 53 ++++ .../types/versioning/VersionSetMapper.java | 47 +++ .../types/versioning/VersionSetType.java | 79 +++++ .../src/main/resources/app.graphql | 7 +- .../src/main/resources/entity.graphql | 65 +--- .../src/main/resources/search.graphql | 7 +- .../src/main/resources/versioning.graphql | 148 +++++++++ .../LinkAssetVersionResolverTest.java | 3 +- .../UnlinkAssetVersionResolverTest.java | 2 +- .../VersionsSearchResolverTest.java | 294 ++++++++++++++++++ datahub-web-react/src/Mocks.tsx | 1 + datahub-web-react/src/appConfigContext.tsx | 1 + datahub-web-react/src/graphql/app.graphql | 1 + datahub-web-react/src/graphql/dataset.graphql | 1 + datahub-web-react/src/graphql/mlModel.graphql | 1 + datahub-web-react/src/graphql/preview.graphql | 5 + datahub-web-react/src/graphql/search.graphql | 5 + .../src/graphql/versioning.graphql | 89 ++++++ 30 files changed, 1055 insertions(+), 86 deletions(-) create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/versioning/VersionsSearchResolver.java create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionPropertiesMapper.java create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionSetMapper.java create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionSetType.java create mode 100644 datahub-graphql-core/src/main/resources/versioning.graphql create mode 100644 datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/versioning/VersionsSearchResolverTest.java create mode 100644 datahub-web-react/src/graphql/versioning.graphql diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/Constants.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/Constants.java index 69306862a46ef7..aec5352dec1a64 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/Constants.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/Constants.java @@ -28,6 +28,7 @@ private Constants() {} public static final String INCIDENTS_SCHEMA_FILE = "incident.graphql"; public static final String CONTRACTS_SCHEMA_FILE = "contract.graphql"; public static final String CONNECTIONS_SCHEMA_FILE = "connection.graphql"; + public static final String VERSION_SCHEMA_FILE = "versioning.graphql"; public static final String BROWSE_PATH_DELIMITER = "/"; public static final String BROWSE_PATH_V2_DELIMITER = "␟"; public static final String VERSION_STAMP_FIELD_NAME = "versionStamp"; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java index b15db80a8487ae..403e80a71380be 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java @@ -124,6 +124,8 @@ import com.linkedin.datahub.graphql.generated.TestResult; import com.linkedin.datahub.graphql.generated.TypeQualifier; import com.linkedin.datahub.graphql.generated.UserUsageCounts; +import com.linkedin.datahub.graphql.generated.VersionProperties; +import com.linkedin.datahub.graphql.generated.VersionSet; import com.linkedin.datahub.graphql.resolvers.MeResolver; import com.linkedin.datahub.graphql.resolvers.assertion.AssertionRunEventResolver; import com.linkedin.datahub.graphql.resolvers.assertion.DeleteAssertionResolver; @@ -324,6 +326,7 @@ import com.linkedin.datahub.graphql.resolvers.user.ListUsersResolver; import com.linkedin.datahub.graphql.resolvers.user.RemoveUserResolver; import com.linkedin.datahub.graphql.resolvers.user.UpdateUserStatusResolver; +import com.linkedin.datahub.graphql.resolvers.versioning.VersionsSearchResolver; import com.linkedin.datahub.graphql.resolvers.view.CreateViewResolver; import com.linkedin.datahub.graphql.resolvers.view.DeleteViewResolver; import com.linkedin.datahub.graphql.resolvers.view.ListGlobalViewsResolver; @@ -381,6 +384,7 @@ import com.linkedin.datahub.graphql.types.structuredproperty.StructuredPropertyType; import com.linkedin.datahub.graphql.types.tag.TagType; import com.linkedin.datahub.graphql.types.test.TestType; +import com.linkedin.datahub.graphql.types.versioning.VersionSetType; import com.linkedin.datahub.graphql.types.view.DataHubViewType; import com.linkedin.entity.client.EntityClient; import com.linkedin.entity.client.SystemEntityClient; @@ -537,6 +541,7 @@ public class GmsGraphQLEngine { private final IncidentType incidentType; private final RestrictedType restrictedType; private final DataProcessInstanceType dataProcessInstanceType; + private final VersionSetType versionSetType; private final int graphQLQueryComplexityLimit; private final int graphQLQueryDepthLimit; @@ -658,6 +663,7 @@ public GmsGraphQLEngine(final GmsGraphQLEngineArgs args) { this.incidentType = new IncidentType(entityClient); this.restrictedType = new RestrictedType(entityClient, restrictedService); this.dataProcessInstanceType = new DataProcessInstanceType(entityClient, featureFlags); + this.versionSetType = new VersionSetType(entityClient); this.graphQLQueryComplexityLimit = args.graphQLQueryComplexityLimit; this.graphQLQueryDepthLimit = args.graphQLQueryDepthLimit; @@ -707,6 +713,7 @@ public GmsGraphQLEngine(final GmsGraphQLEngineArgs args) { entityTypeType, formType, incidentType, + versionSetType, restrictedType, businessAttributeType, dataProcessInstanceType)); @@ -809,6 +816,8 @@ public void configureRuntimeWiring(final RuntimeWiring.Builder builder) { configureConnectionResolvers(builder); configureDeprecationResolvers(builder); configureMetadataAttributionResolver(builder); + configureVersionPropertiesResolvers(builder); + configureVersionSetResolvers(builder); } private void configureOrganisationRoleResolvers(RuntimeWiring.Builder builder) { @@ -863,7 +872,8 @@ public GraphQLEngine.Builder builder() { .addSchema(fileBasedSchema(ASSERTIONS_SCHEMA_FILE)) .addSchema(fileBasedSchema(INCIDENTS_SCHEMA_FILE)) .addSchema(fileBasedSchema(CONTRACTS_SCHEMA_FILE)) - .addSchema(fileBasedSchema(COMMON_SCHEMA_FILE)); + .addSchema(fileBasedSchema(COMMON_SCHEMA_FILE)) + .addSchema(fileBasedSchema(VERSION_SCHEMA_FILE)); for (GmsGraphQLPlugin plugin : this.graphQLPlugins) { List pluginSchemaFiles = plugin.getSchemaFiles(); @@ -1050,6 +1060,7 @@ private void configureQueryResolvers(final RuntimeWiring.Builder builder) { .dataFetcher("form", getResolver(formType)) .dataFetcher("view", getResolver(dataHubViewType)) .dataFetcher("structuredProperty", getResolver(structuredPropertyType)) + .dataFetcher("versionSet", getResolver(versionSetType)) .dataFetcher("listPolicies", new ListPoliciesResolver(this.entityClient)) .dataFetcher("getGrantedPrivileges", new GetGrantedPrivilegesResolver()) .dataFetcher("listUsers", new ListUsersResolver(this.entityClient)) @@ -2295,7 +2306,15 @@ private void configureTypeResolvers(final RuntimeWiring.Builder builder) { .type( "TimeSeriesAspect", typeWiring -> typeWiring.typeResolver(new TimeSeriesAspectInterfaceTypeResolver())) - .type("ResultsType", typeWiring -> typeWiring.typeResolver(new ResultsTypeResolver())); + .type("ResultsType", typeWiring -> typeWiring.typeResolver(new ResultsTypeResolver())) + .type( + "SupportsVersions", + typeWiring -> + typeWiring.typeResolver( + new EntityInterfaceTypeResolver( + loadableTypes.stream() + .map(graphType -> (EntityType) graphType) + .collect(Collectors.toList())))); } /** Configures custom type extensions leveraged within our GraphQL schema. */ @@ -3322,4 +3341,34 @@ private void configureMetadataAttributionResolver(final RuntimeWiring.Builder bu entityTypes, (env) -> ((MetadataAttribution) env.getSource()).getSource()))); } + + private void configureVersionPropertiesResolvers(final RuntimeWiring.Builder builder) { + builder.type( + "VersionProperties", + typeWiring -> + typeWiring.dataFetcher( + "versionSet", + new LoadableTypeResolver<>( + versionSetType, + (env) -> { + final VersionProperties versionProperties = env.getSource(); + return versionProperties != null + ? versionProperties.getVersionSet().getUrn() + : null; + }))); + } + + private void configureVersionSetResolvers(final RuntimeWiring.Builder builder) { + builder.type( + "VersionSet", + typeWiring -> + typeWiring + .dataFetcher( + "latestVersion", + new EntityTypeResolver( + entityTypes, (env) -> ((VersionSet) env.getSource()).getLatestVersion())) + .dataFetcher( + "versionsSearch", + new VersionsSearchResolver(this.entityClient, this.viewService))); + } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java index 3647eb55b2583a..8cdc13a14be87c 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java @@ -189,6 +189,7 @@ public CompletableFuture get(final DataFetchingEnvironment environmen .setEditableDatasetNameEnabled(_featureFlags.isEditableDatasetNameEnabled()) .setShowSeparateSiblings(_featureFlags.isShowSeparateSiblings()) .setShowManageStructuredProperties(_featureFlags.isShowManageStructuredProperties()) + .setEntityVersioningEnabled(_featureFlags.isEntityVersioning()) .build(); appConfig.setFeatureFlags(featureFlagsConfig); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolver.java index 69e049af1e87b7..f32fd03a384005 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolver.java @@ -12,7 +12,9 @@ import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.featureflags.FeatureFlags; +import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.generated.LinkVersionInput; +import com.linkedin.datahub.graphql.generated.VersionSet; import com.linkedin.metadata.entity.IngestResult; import com.linkedin.metadata.entity.versioning.EntityVersioningService; import com.linkedin.metadata.entity.versioning.VersionPropertiesInput; @@ -21,24 +23,22 @@ import io.datahubproject.metadata.context.OperationContext; import java.util.List; import java.util.concurrent.CompletableFuture; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang.StringUtils; /** * Currently only supports linking the latest version, but may be modified later to support inserts */ -public class LinkAssetVersionResolver implements DataFetcher> { +@Slf4j +@RequiredArgsConstructor +public class LinkAssetVersionResolver implements DataFetcher> { private final EntityVersioningService entityVersioningService; private final FeatureFlags featureFlags; - public LinkAssetVersionResolver( - EntityVersioningService entityVersioningService, FeatureFlags featureFlags) { - this.entityVersioningService = entityVersioningService; - this.featureFlags = featureFlags; - } - @Override - public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { + public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { final QueryContext context = environment.getContext(); final LinkVersionInput input = bindArgument(environment.getArgument("input"), LinkVersionInput.class); @@ -75,12 +75,22 @@ public CompletableFuture get(DataFetchingEnvironment environment) throws entityVersioningService.linkLatestVersion( opContext, versionSetUrn, entityUrn, versionPropertiesInput); - return linkResults.stream() - .filter( - ingestResult -> input.getLinkedEntity().equals(ingestResult.getUrn().toString())) - .map(ingestResult -> ingestResult.getUrn().toString()) - .findAny() - .orElse(StringUtils.EMPTY); + String successVersionSetUrn = + linkResults.stream() + .filter( + ingestResult -> + input.getLinkedEntity().equals(ingestResult.getUrn().toString())) + .map(ingestResult -> ingestResult.getUrn().toString()) + .findAny() + .orElse(StringUtils.EMPTY); + + if (StringUtils.isEmpty(successVersionSetUrn)) { + return null; + } + VersionSet versionSet = new VersionSet(); + versionSet.setUrn(versionSetUrn.toString()); + versionSet.setType(EntityType.VERSION_SET); + return versionSet; }, this.getClass().getSimpleName(), "get"); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolver.java index 3d5027a0d668ac..33ab83a59c6771 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolver.java @@ -12,14 +12,18 @@ import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.featureflags.FeatureFlags; +import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.generated.UnlinkVersionInput; +import com.linkedin.datahub.graphql.generated.VersionSet; +import com.linkedin.metadata.entity.RollbackResult; import com.linkedin.metadata.entity.versioning.EntityVersioningService; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; import io.datahubproject.metadata.context.OperationContext; +import java.util.List; import java.util.concurrent.CompletableFuture; -public class UnlinkAssetVersionResolver implements DataFetcher> { +public class UnlinkAssetVersionResolver implements DataFetcher> { private final EntityVersioningService entityVersioningService; private final FeatureFlags featureFlags; @@ -31,7 +35,7 @@ public UnlinkAssetVersionResolver( } @Override - public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { + public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { if (!featureFlags.isEntityVersioning()) { throw new IllegalAccessError( "Entity Versioning is not configured, please enable before attempting to use this feature."); @@ -58,8 +62,15 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw } return GraphQLConcurrencyUtils.supplyAsync( () -> { - entityVersioningService.unlinkVersion(opContext, versionSetUrn, entityUrn); - return true; + List results = + entityVersioningService.unlinkVersion(opContext, versionSetUrn, entityUrn); + if (results.isEmpty() || results.stream().allMatch(RollbackResult::isNoOp)) { + return null; + } + VersionSet versionSet = new VersionSet(); + versionSet.setUrn(versionSetUrn.toString()); + versionSet.setType(EntityType.VERSION_SET); + return versionSet; }, this.getClass().getSimpleName(), "get"); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java index a01b3aaec9c982..f105a72a1273ee 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java @@ -18,13 +18,18 @@ import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.generated.FacetFilterInput; import com.linkedin.datahub.graphql.generated.SearchResults; import com.linkedin.datahub.graphql.generated.SearchSortInput; +import com.linkedin.datahub.graphql.resolvers.ResolverUtils; import com.linkedin.datahub.graphql.types.common.mappers.SearchFlagsInputMapper; import com.linkedin.datahub.graphql.types.entitytype.EntityTypeMapper; +import com.linkedin.datahub.graphql.types.mappers.UrnSearchResultsMapper; +import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.query.SearchFlags; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; @@ -33,24 +38,32 @@ import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; import com.linkedin.metadata.query.filter.SortOrder; +import com.linkedin.metadata.search.SearchResult; import com.linkedin.metadata.service.ViewService; import com.linkedin.view.DataHubViewInfo; import io.datahubproject.metadata.context.OperationContext; import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.Optional; import java.util.Set; +import java.util.concurrent.CompletableFuture; import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.StringUtils; import org.codehaus.plexus.util.CollectionUtils; @Slf4j public class SearchUtils { private SearchUtils() {} + private static final int DEFAULT_SEARCH_COUNT = 10; + private static final int DEFAULT_SCROLL_COUNT = 10; + private static final String DEFAULT_SCROLL_KEEP_ALIVE = "5m"; + /** Entities that are searched by default in Search Across Entities */ public static final List SEARCHABLE_ENTITY_TYPES = ImmutableList.of( @@ -348,4 +361,98 @@ public static List getSortCriteria(@Nullable final SearchSortInpu return sortCriteria; } + + public static CompletableFuture searchAcrossEntities( + QueryContext inputContext, + final EntityClient _entityClient, + final ViewService _viewService, + List inputEntityTypes, + String inputQuery, + Filter baseFilter, + String viewUrn, + List sortCriteria, + com.linkedin.datahub.graphql.generated.SearchFlags inputSearchFlags, + Integer inputCount, + Integer inputStart, + String className) { + + final List entityTypes = + (inputEntityTypes == null || inputEntityTypes.isEmpty()) + ? SEARCHABLE_ENTITY_TYPES + : inputEntityTypes; + final List entityNames = + entityTypes.stream().map(EntityTypeMapper::getName).collect(Collectors.toList()); + + // escape forward slash since it is a reserved character in Elasticsearch, default to * if + // blank/empty + final String query = + StringUtils.isNotBlank(inputQuery) ? ResolverUtils.escapeForwardSlash(inputQuery) : "*"; + + final Optional searchFlags = + Optional.ofNullable(inputSearchFlags) + .map((flags) -> SearchFlagsInputMapper.map(inputContext, flags)); + final OperationContext context = + inputContext.getOperationContext().withSearchFlags(searchFlags::orElse); + + final int count = Optional.ofNullable(inputCount).orElse(DEFAULT_SEARCH_COUNT); + final int start = Optional.ofNullable(inputStart).orElse(0); + + return GraphQLConcurrencyUtils.supplyAsync( + () -> { + final OperationContext baseContext = inputContext.getOperationContext(); + final Optional maybeResolvedView = + Optional.ofNullable(viewUrn) + .map((urn) -> resolveView(baseContext, _viewService, UrnUtils.getUrn(urn))); + + final List finalEntityNames = + maybeResolvedView + .map( + (view) -> + intersectEntityTypes(entityNames, view.getDefinition().getEntityTypes())) + .orElse(entityNames); + + final Filter finalFilters = + maybeResolvedView + .map((view) -> combineFilters(baseFilter, view.getDefinition().getFilter())) + .orElse(baseFilter); + + log.debug( + "Executing search for multiple entities: entity types {}, query {}, filters: {}, start: {}, count: {}", + finalEntityNames, + query, + finalFilters, + start, + count); + + try { + final SearchResult searchResult = + _entityClient.searchAcrossEntities( + context, + finalEntityNames, + query, + finalFilters, + start, + count, + sortCriteria, + null); + return UrnSearchResultsMapper.map(inputContext, searchResult); + } catch (Exception e) { + log.warn( + "Failed to execute search for multiple entities: entity types {}, query {}, filters: {}, start: {}, count: {}", + finalEntityNames, + query, + finalFilters, + start, + count); + throw new RuntimeException( + "Failed to execute search: " + + String.format( + "entity types %s, query %s, filters: %s, start: %s, count: %s", + finalEntityNames, query, finalFilters, start, count), + e); + } + }, + className, + "searchAcrossEntities"); + } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/versioning/VersionsSearchResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/versioning/VersionsSearchResolver.java new file mode 100644 index 00000000000000..915e1cf00ebc6b --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/versioning/VersionsSearchResolver.java @@ -0,0 +1,87 @@ +package com.linkedin.datahub.graphql.resolvers.versioning; + +import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.bindArgument; +import static com.linkedin.metadata.Constants.*; + +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.Entity; +import com.linkedin.datahub.graphql.generated.SearchAcrossEntitiesInput; +import com.linkedin.datahub.graphql.generated.SearchFlags; +import com.linkedin.datahub.graphql.generated.SearchResults; +import com.linkedin.datahub.graphql.resolvers.ResolverUtils; +import com.linkedin.datahub.graphql.resolvers.search.SearchUtils; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.query.filter.Condition; +import com.linkedin.metadata.query.filter.ConjunctiveCriterion; +import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; +import com.linkedin.metadata.query.filter.Criterion; +import com.linkedin.metadata.query.filter.CriterionArray; +import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.query.filter.SortCriterion; +import com.linkedin.metadata.query.filter.SortOrder; +import com.linkedin.metadata.service.ViewService; +import com.linkedin.metadata.utils.CriterionUtils; +import graphql.schema.DataFetcher; +import graphql.schema.DataFetchingEnvironment; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.CompletableFuture; +import java.util.stream.Stream; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +/** Resolver that executes a searchAcrossEntities only on a version set's versioned entities */ +@Slf4j +@RequiredArgsConstructor +public class VersionsSearchResolver implements DataFetcher> { + + private static final String VERSION_SET_FIELD_NAME = "versionSet"; + + private final EntityClient _entityClient; + private final ViewService _viewService; + + @Override + public CompletableFuture get(DataFetchingEnvironment environment) { + final Entity entity = environment.getSource(); + final QueryContext context = environment.getContext(); + final SearchAcrossEntitiesInput input = + bindArgument(environment.getArgument("input"), SearchAcrossEntitiesInput.class); + + final Criterion versionSetFilter = + CriterionUtils.buildCriterion(VERSION_SET_FIELD_NAME, Condition.EQUAL, entity.getUrn()); + final Filter baseFilter = + new Filter() + .setOr( + new ConjunctiveCriterionArray( + new ConjunctiveCriterion().setAnd(new CriterionArray(versionSetFilter)))); + final Filter inputFilter = ResolverUtils.buildFilter(null, input.getOrFilters()); + + final List initialSortCriteria = + SearchUtils.getSortCriteria(input.getSortInput()); + final List sortCriteria = + Stream.concat( + initialSortCriteria.stream(), + Stream.of( + new SortCriterion() + .setField(VERSION_SORT_ID_FIELD_NAME) + .setOrder(SortOrder.DESCENDING))) + .toList(); + + SearchFlags searchFlags = Optional.ofNullable(input.getSearchFlags()).orElse(new SearchFlags()); + searchFlags.setFilterNonLatestVersions(false); + + return SearchUtils.searchAcrossEntities( + context, + _entityClient, + _viewService, + input.getTypes(), + input.getQuery(), + SearchUtils.combineFilters(inputFilter, baseFilter), + input.getViewUrn(), + sortCriteria, + searchFlags, + input.getCount(), + input.getStart(), + this.getClass().getSimpleName()); + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/SearchFlagsInputMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/SearchFlagsInputMapper.java index 9f5025ccf303a2..0b3a445175c4c1 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/SearchFlagsInputMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/SearchFlagsInputMapper.java @@ -69,6 +69,9 @@ public com.linkedin.metadata.query.SearchFlags apply( result.setCustomHighlightingFields( new StringArray(searchFlags.getCustomHighlightingFields())); } + if (searchFlags.getFilterNonLatestVersions() != null) { + result.setFilterNonLatestVersions(searchFlags.getFilterNonLatestVersions()); + } return result; } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/UrnToEntityMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/UrnToEntityMapper.java index eae33e6da2e56d..b815c1b1c1dd9f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/UrnToEntityMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/UrnToEntityMapper.java @@ -41,6 +41,7 @@ import com.linkedin.datahub.graphql.generated.StructuredPropertyEntity; import com.linkedin.datahub.graphql.generated.Tag; import com.linkedin.datahub.graphql.generated.Test; +import com.linkedin.datahub.graphql.generated.VersionSet; import com.linkedin.datahub.graphql.types.mappers.ModelMapper; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -231,6 +232,11 @@ public Entity apply(@Nullable QueryContext context, Urn input) { ((DataProcessInstance) partialEntity).setUrn(input.toString()); ((DataProcessInstance) partialEntity).setType(EntityType.DATA_PROCESS_INSTANCE); } + if (input.getEntityType().equals(VERSION_SET_ENTITY_NAME)) { + partialEntity = new VersionSet(); + ((VersionSet) partialEntity).setUrn(input.toString()); + ((VersionSet) partialEntity).setType(EntityType.VERSION_SET); + } return partialEntity; } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java index 6a3f9cb9b21f38..74ef4cf125cd24 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java @@ -89,7 +89,8 @@ public class DatasetType ACCESS_ASPECT_NAME, STRUCTURED_PROPERTIES_ASPECT_NAME, FORMS_ASPECT_NAME, - SUB_TYPES_ASPECT_NAME); + SUB_TYPES_ASPECT_NAME, + VERSION_PROPERTIES_ASPECT_NAME); private static final Set FACET_FIELDS = ImmutableSet.of("origin", "platform"); private static final String ENTITY_NAME = "dataset"; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetMapper.java index e411014c23c89b..aa7033b180e80e 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetMapper.java @@ -17,6 +17,7 @@ import com.linkedin.common.Status; import com.linkedin.common.SubTypes; import com.linkedin.common.TimeStamp; +import com.linkedin.common.VersionProperties; import com.linkedin.common.urn.Urn; import com.linkedin.data.DataMap; import com.linkedin.datahub.graphql.QueryContext; @@ -48,6 +49,7 @@ import com.linkedin.datahub.graphql.types.rolemetadata.mappers.AccessMapper; import com.linkedin.datahub.graphql.types.structuredproperty.StructuredPropertiesMapper; import com.linkedin.datahub.graphql.types.tag.mappers.GlobalTagsMapper; +import com.linkedin.datahub.graphql.types.versioning.VersionPropertiesMapper; import com.linkedin.dataset.DatasetDeprecation; import com.linkedin.dataset.DatasetProperties; import com.linkedin.dataset.EditableDatasetProperties; @@ -183,6 +185,11 @@ public Dataset apply( SUB_TYPES_ASPECT_NAME, (dashboard, dataMap) -> dashboard.setSubTypes(SubTypesMapper.map(context, new SubTypes(dataMap)))); + mappingHelper.mapToResult( + VERSION_PROPERTIES_ASPECT_NAME, + (entity, dataMap) -> + entity.setVersionProperties( + VersionPropertiesMapper.map(context, new VersionProperties(dataMap)))); if (context != null && !canView(context.getOperationContext(), entityUrn)) { return AuthorizationUtils.restrictEntity(mappingHelper.getResult(), Dataset.class); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelMapper.java index 7102fd4aed9743..11e6b5180f8c1c 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelMapper.java @@ -13,6 +13,7 @@ import com.linkedin.common.InstitutionalMemory; import com.linkedin.common.Ownership; import com.linkedin.common.Status; +import com.linkedin.common.VersionProperties; import com.linkedin.common.urn.Urn; import com.linkedin.data.DataMap; import com.linkedin.data.template.RecordTemplate; @@ -38,6 +39,7 @@ import com.linkedin.datahub.graphql.types.mappers.ModelMapper; import com.linkedin.datahub.graphql.types.structuredproperty.StructuredPropertiesMapper; import com.linkedin.datahub.graphql.types.tag.mappers.GlobalTagsMapper; +import com.linkedin.datahub.graphql.types.versioning.VersionPropertiesMapper; import com.linkedin.domain.Domains; import com.linkedin.entity.EntityResponse; import com.linkedin.entity.EnvelopedAspectMap; @@ -180,6 +182,11 @@ public MLModel apply( FORMS_ASPECT_NAME, ((entity, dataMap) -> entity.setForms(FormsMapper.map(new Forms(dataMap), entityUrn.toString())))); + mappingHelper.mapToResult( + VERSION_PROPERTIES_ASPECT_NAME, + (entity, dataMap) -> + entity.setVersionProperties( + VersionPropertiesMapper.map(context, new VersionProperties(dataMap)))); if (context != null && !canView(context.getOperationContext(), entityUrn)) { return AuthorizationUtils.restrictEntity(mappingHelper.getResult(), MLModel.class); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionPropertiesMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionPropertiesMapper.java new file mode 100644 index 00000000000000..f89ebdc9f2b043 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionPropertiesMapper.java @@ -0,0 +1,53 @@ +package com.linkedin.datahub.graphql.types.versioning; + +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.datahub.graphql.generated.VersionProperties; +import com.linkedin.datahub.graphql.generated.VersionSet; +import com.linkedin.datahub.graphql.types.mappers.MapperUtils; +import com.linkedin.datahub.graphql.types.mappers.ModelMapper; +import com.linkedin.datahub.graphql.types.mlmodel.mappers.VersionTagMapper; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +public class VersionPropertiesMapper + implements ModelMapper { + public static final VersionPropertiesMapper INSTANCE = new VersionPropertiesMapper(); + + public static VersionProperties map( + @Nullable QueryContext context, + @Nonnull final com.linkedin.common.VersionProperties versionProperties) { + return INSTANCE.apply(context, versionProperties); + } + + @Override + public VersionProperties apply( + @Nullable QueryContext context, @Nonnull com.linkedin.common.VersionProperties input) { + final VersionProperties result = new VersionProperties(); + + result.setVersionSet( + VersionSet.builder() + .setUrn(input.getVersionSet().toString()) + .setType(EntityType.VERSION_SET) + .build()); + + result.setVersion(VersionTagMapper.map(context, input.getVersion())); + result.setAliases( + input.getAliases().stream() + .map(alias -> VersionTagMapper.map(context, alias)) + .collect(Collectors.toList())); + result.setComment(input.getComment()); + result.setIsLatest(Boolean.TRUE.equals(input.isIsLatest())); + + if (input.getMetadataCreatedTimestamp() != null) { + result.setCreated(MapperUtils.createResolvedAuditStamp(input.getMetadataCreatedTimestamp())); + } + if (input.getSourceCreatedTimestamp() != null) { + result.setCreatedInSource( + MapperUtils.createResolvedAuditStamp(input.getSourceCreatedTimestamp())); + } + + return result; + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionSetMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionSetMapper.java new file mode 100644 index 00000000000000..3a07115ece5f6e --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionSetMapper.java @@ -0,0 +1,47 @@ +package com.linkedin.datahub.graphql.types.versioning; + +import static com.linkedin.metadata.Constants.VERSION_SET_PROPERTIES_ASPECT_NAME; + +import com.linkedin.data.DataMap; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.datahub.graphql.generated.VersionSet; +import com.linkedin.datahub.graphql.types.common.mappers.UrnToEntityMapper; +import com.linkedin.datahub.graphql.types.common.mappers.util.MappingHelper; +import com.linkedin.datahub.graphql.types.mappers.ModelMapper; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.EnvelopedAspectMap; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +public class VersionSetMapper implements ModelMapper { + + public static final VersionSetMapper INSTANCE = new VersionSetMapper(); + + public static VersionSet map( + @Nullable QueryContext context, @Nonnull final EntityResponse entityResponse) { + return INSTANCE.apply(context, entityResponse); + } + + @Override + public VersionSet apply(@Nullable QueryContext context, @Nonnull EntityResponse entityResponse) { + final VersionSet result = new VersionSet(); + result.setUrn(entityResponse.getUrn().toString()); + result.setType(EntityType.VERSION_SET); + + EnvelopedAspectMap aspectMap = entityResponse.getAspects(); + MappingHelper mappingHelper = new MappingHelper<>(aspectMap, result); + mappingHelper.mapToResult( + VERSION_SET_PROPERTIES_ASPECT_NAME, + (versionSet, dataMap) -> mapVersionSetProperties(context, versionSet, dataMap)); + + return result; + } + + private void mapVersionSetProperties( + @Nullable QueryContext context, @Nonnull VersionSet versionSet, @Nonnull DataMap dataMap) { + com.linkedin.versionset.VersionSetProperties versionProperties = + new com.linkedin.versionset.VersionSetProperties(dataMap); + versionSet.setLatestVersion(UrnToEntityMapper.map(context, versionProperties.getLatest())); + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionSetType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionSetType.java new file mode 100644 index 00000000000000..ed2beff4530949 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionSetType.java @@ -0,0 +1,79 @@ +package com.linkedin.datahub.graphql.types.versioning; + +import static com.linkedin.metadata.Constants.*; + +import com.google.common.collect.ImmutableSet; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.Entity; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.datahub.graphql.generated.VersionSet; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.client.EntityClient; +import graphql.execution.DataFetcherResult; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Function; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import lombok.RequiredArgsConstructor; + +@RequiredArgsConstructor +public class VersionSetType + implements com.linkedin.datahub.graphql.types.EntityType { + + public static final Set ASPECTS_TO_FETCH = + ImmutableSet.of(VERSION_SET_PROPERTIES_ASPECT_NAME); + private final EntityClient _entityClient; + + @Override + public EntityType type() { + return EntityType.VERSION_SET; + } + + @Override + public Function getKeyProvider() { + return Entity::getUrn; + } + + @Override + public Class objectClass() { + return VersionSet.class; + } + + @Override + public List> batchLoad( + @Nonnull List urns, @Nonnull QueryContext context) throws Exception { + final List versionSetUrns = + urns.stream().map(UrnUtils::getUrn).collect(Collectors.toList()); + + try { + final Map entities = + _entityClient.batchGetV2( + context.getOperationContext(), + VERSION_SET_ENTITY_NAME, + new HashSet<>(versionSetUrns), + ASPECTS_TO_FETCH); + + final List gmsResults = new ArrayList<>(); + for (Urn urn : versionSetUrns) { + gmsResults.add(entities.getOrDefault(urn, null)); + } + return gmsResults.stream() + .map( + gmsResult -> + gmsResult == null + ? null + : DataFetcherResult.newResult() + .data(VersionSetMapper.map(context, gmsResult)) + .build()) + .collect(Collectors.toList()); + } catch (Exception e) { + throw new RuntimeException("Failed to batch load Queries", e); + } + } +} diff --git a/datahub-graphql-core/src/main/resources/app.graphql b/datahub-graphql-core/src/main/resources/app.graphql index 28688903687235..ca7f89415f6b87 100644 --- a/datahub-graphql-core/src/main/resources/app.graphql +++ b/datahub-graphql-core/src/main/resources/app.graphql @@ -531,6 +531,11 @@ type FeatureFlagsConfig { If turned on, show the manage structured properties tab in the govern dropdown """ showManageStructuredProperties: Boolean! + + """ + If turned on, exposes the versioning feature by allowing users to link entities in the UI. + """ + entityVersioningEnabled: Boolean! } """ @@ -573,4 +578,4 @@ type DocPropagationSettings { The default doc propagation setting for the platform. """ docColumnPropagation: Boolean -} \ No newline at end of file +} diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql index b47be7ae32b2c4..51909ae72c56b0 100644 --- a/datahub-graphql-core/src/main/resources/entity.graphql +++ b/datahub-graphql-core/src/main/resources/entity.graphql @@ -956,16 +956,6 @@ type Mutation { Remove Business Attribute """ removeBusinessAttribute(input: AddBusinessAttributeInput!): Boolean - - """ - Link the latest versioned entity to a Version Set - """ - linkAssetVersion(input: LinkVersionInput!): String - - """ - Unlink a versioned entity from a Version Set - """ - unlinkAssetVersion(input: UnlinkVersionInput!): Boolean } """ @@ -1231,6 +1221,11 @@ enum EntityType { A Business Attribute """ BUSINESS_ATTRIBUTE + + """ + A set of versioned entities, representing a single source / logical entity over time + """ + VERSION_SET } """ @@ -12921,56 +12916,6 @@ input ListBusinessAttributesInput { query: String } -""" -Input for linking a versioned entity to a Version Set -""" -input LinkVersionInput { - """ - The target version set - """ - versionSet: String! - - """ - The target versioned entity to link - """ - linkedEntity: String! - - """ - Version Tag label for the version, should be unique within a Version Set - """ - version: String! - - """ - Optional timestamp from the source system - """ - sourceTimestamp: Long - - """ - Optional creator from the source system, will be converted to an Urn - """ - sourceCreator: String - - """ - Optional comment about the version - """ - comment: String -} - -""" -Input for unlinking a versioned entity from a Version Set -""" -input UnlinkVersionInput { - """ - The target version set - """ - versionSet: String - - """ - The target versioned entity to unlink - """ - unlinkedEntity: String -} - """ The result obtained when listing Business Attribute """ diff --git a/datahub-graphql-core/src/main/resources/search.graphql b/datahub-graphql-core/src/main/resources/search.graphql index 82bfb9ee26fc42..d8f17faa3d11c2 100644 --- a/datahub-graphql-core/src/main/resources/search.graphql +++ b/datahub-graphql-core/src/main/resources/search.graphql @@ -172,6 +172,11 @@ input SearchFlags { Whether or not to fetch and request for structured property facets when doing a search """ includeStructuredPropertyFacets: Boolean + + """ + Determines whether to filter out any non-latest entity version if entity is part of a Version Set, default true + """ + filterNonLatestVersions: Boolean } """ @@ -1497,4 +1502,4 @@ input GroupingCriterion { """ groupingEntityType: EntityType! -} \ No newline at end of file +} diff --git a/datahub-graphql-core/src/main/resources/versioning.graphql b/datahub-graphql-core/src/main/resources/versioning.graphql new file mode 100644 index 00000000000000..4a63463509c84d --- /dev/null +++ b/datahub-graphql-core/src/main/resources/versioning.graphql @@ -0,0 +1,148 @@ +type VersionSet implements Entity { + """ + The primary key of the VersionSet + """ + urn: String! + + """ + The standard Entity Type + """ + type: EntityType! + + """ + Granular API for querying edges extending from this entity + """ + relationships(input: RelationshipsInput!): EntityRelationshipsResult + + """ + The latest versioned entity linked to in this version set + """ + latestVersion: Entity + + """ + Executes a search on all versioned entities linked to this version set + By default sorts by sortId in descending order + """ + versionsSearch(input: SearchAcrossEntitiesInput!): SearchResults +} + +type VersionProperties { + """ + The linked Version Set entity that ties multiple versioned assets together + """ + versionSet: VersionSet! + + """ + Label for this versioned asset, should be unique within a version set (not enforced) + """ + version: VersionTag! + + """ + Additional version identifiers for this versioned asset. + """ + aliases: [VersionTag!]! + + """ + Comment documenting what this version was created for, changes, or represents + """ + comment: String + + """ + Whether this version is currently the latest in its verison set + """ + isLatest: Boolean! + + """ + Timestamp reflecting when the metadata for this version was created in DataHub + """ + created: ResolvedAuditStamp + + """ + Timestamp reflecting when the metadata for this version was created in DataHub + """ + createdInSource: ResolvedAuditStamp +} + +interface SupportsVersions { + """ + Indicates that this entity is versioned and provides information about the version. + """ + versionProperties: VersionProperties +} + +extend type Dataset implements SupportsVersions { + versionProperties: VersionProperties +} + +extend type MLModel implements SupportsVersions { + versionProperties: VersionProperties +} + +extend type Query { + """ + Fetch a Version Set by its URN + """ + versionSet(urn: String!): VersionSet +} + +""" +Input for linking a versioned entity to a Version Set +""" +input LinkVersionInput { + """ + The target version set + """ + versionSet: String! + + """ + The target versioned entity to link + """ + linkedEntity: String! + + """ + Version Tag label for the version, should be unique within a version set (not enforced) + """ + version: String! + + """ + Optional timestamp from the source system + """ + sourceTimestamp: Long + + """ + Optional creator from the source system, will be converted to an Urn + """ + sourceCreator: String + + """ + Optional comment about the version + """ + comment: String +} + +""" +Input for unlinking a versioned entity from a Version Set +""" +input UnlinkVersionInput { + """ + The target version set + """ + versionSet: String + + """ + The target versioned entity to unlink + """ + unlinkedEntity: String +} + +extend type Mutation { + """ + Link the latest versioned entity to a Version Set + """ + linkAssetVersion(input: LinkVersionInput!): VersionSet + + """ + Unlink a versioned entity from a Version Set + """ + unlinkAssetVersion(input: UnlinkVersionInput!): VersionSet +} diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolverTest.java index 690856263fccc5..c2eb92f4d1cd4c 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolverTest.java @@ -56,8 +56,7 @@ public void testGetSuccessful() throws Exception { Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); Mockito.when(mockEnv.getContext()).thenReturn(mockContext); - String result = resolver.get(mockEnv).get(); - assertEquals(result, TEST_ENTITY_URN); + assertEquals(resolver.get(mockEnv).get().getUrn(), TEST_VERSION_SET_URN); } @Test diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolverTest.java index 0000ad24a04537..e162ce96e627c6 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolverTest.java @@ -48,7 +48,7 @@ public void testGetSuccessful() throws Exception { Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); Mockito.when(mockEnv.getContext()).thenReturn(mockContext); - assertTrue(resolver.get(mockEnv).get()); + assertEquals(resolver.get(mockEnv).get(), null); Mockito.verify(mockService) .unlinkVersion( diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/versioning/VersionsSearchResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/versioning/VersionsSearchResolverTest.java new file mode 100644 index 00000000000000..3554df074df698 --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/versioning/VersionsSearchResolverTest.java @@ -0,0 +1,294 @@ +package com.linkedin.datahub.graphql.resolvers.versioning; + +import static com.linkedin.datahub.graphql.TestUtils.getMockAllowContext; +import static com.linkedin.metadata.Constants.*; +import static com.linkedin.metadata.utils.CriterionUtils.*; +import static org.mockito.ArgumentMatchers.*; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertThrows; + +import com.linkedin.common.AuditStamp; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.template.StringArray; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.AndFilterInput; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.datahub.graphql.generated.FacetFilterInput; +import com.linkedin.datahub.graphql.generated.SearchAcrossEntitiesInput; +import com.linkedin.datahub.graphql.generated.SearchFlags; +import com.linkedin.datahub.graphql.generated.SearchResults; +import com.linkedin.datahub.graphql.generated.SearchSortInput; +import com.linkedin.datahub.graphql.generated.SortCriterion; +import com.linkedin.datahub.graphql.generated.SortOrder; +import com.linkedin.datahub.graphql.generated.VersionSet; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.Constants; +import com.linkedin.metadata.query.filter.Condition; +import com.linkedin.metadata.query.filter.ConjunctiveCriterion; +import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; +import com.linkedin.metadata.query.filter.CriterionArray; +import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.search.SearchEntityArray; +import com.linkedin.metadata.search.SearchResult; +import com.linkedin.metadata.search.SearchResultMetadata; +import com.linkedin.metadata.service.ViewService; +import com.linkedin.metadata.utils.CriterionUtils; +import com.linkedin.r2.RemoteInvocationException; +import com.linkedin.view.DataHubViewDefinition; +import com.linkedin.view.DataHubViewInfo; +import com.linkedin.view.DataHubViewType; +import graphql.schema.DataFetchingEnvironment; +import java.util.List; +import java.util.concurrent.CompletionException; +import org.mockito.Mockito; +import org.testng.annotations.Test; + +public class VersionsSearchResolverTest { + + private static final String VERSION_SET_URN = "urn:li:versionSet:(my_version_set,dataset)"; + private static final Urn TEST_VIEW_URN = UrnUtils.getUrn("urn:li:dataHubView:test"); + private static final Urn TEST_USER_URN = UrnUtils.getUrn("urn:li:corpuser:test"); + + private static final SearchAcrossEntitiesInput BASIC_INPUT = + new SearchAcrossEntitiesInput( + List.of(EntityType.DATASET), "", 0, 10, null, null, null, null, null); + + private static final SearchAcrossEntitiesInput COMPLEX_INPUT = + new SearchAcrossEntitiesInput( + List.of(EntityType.CHART, EntityType.DATASET), + "query", + 2, + 5, + null, + List.of( + AndFilterInput.builder() + .setAnd( + List.of( + FacetFilterInput.builder() + .setField("field1") + .setValues(List.of("1", "2")) + .build(), + FacetFilterInput.builder() + .setField("field2") + .setValues(List.of("a")) + .build())) + .build(), + AndFilterInput.builder() + .setAnd( + List.of( + FacetFilterInput.builder() + .setField("field3") + .setValues(List.of("3", "4")) + .build(), + FacetFilterInput.builder() + .setField("field4") + .setValues(List.of("b")) + .build())) + .build()), + TEST_VIEW_URN.toString(), + SearchFlags.builder().setSkipCache(true).build(), + SearchSortInput.builder() + .setSortCriteria( + List.of( + SortCriterion.builder() + .setField("sortField1") + .setSortOrder(SortOrder.DESCENDING) + .build(), + SortCriterion.builder() + .setField("sortField2") + .setSortOrder(SortOrder.ASCENDING) + .build())) + .build()); + + @Test + public void testGetSuccessBasic() throws Exception { + EntityClient mockEntityClient = initMockEntityClient(); + ViewService mockViewService = Mockito.mock(ViewService.class); + VersionsSearchResolver resolver = new VersionsSearchResolver(mockEntityClient, mockViewService); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(BASIC_INPUT); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + VersionSet versionSet = new VersionSet(); + versionSet.setUrn(VERSION_SET_URN); + Mockito.when(mockEnv.getSource()).thenReturn(versionSet); + + SearchResults result = resolver.get(mockEnv).get(); + + // Validate the result + assertEquals(result.getSearchResults().size(), 0); + + // Validate that we called the search service correctly + Mockito.verify(mockEntityClient, Mockito.times(1)) + .searchAcrossEntities( + Mockito.argThat( + context -> + !context.getSearchContext().getSearchFlags().isFilterNonLatestVersions()), + Mockito.eq(List.of(Constants.DATASET_ENTITY_NAME)), + Mockito.eq("*"), + Mockito.eq( + new Filter() + .setOr( + new ConjunctiveCriterionArray( + new ConjunctiveCriterion() + .setAnd( + new CriterionArray( + CriterionUtils.buildCriterion( + "versionSet", Condition.EQUAL, VERSION_SET_URN)))))), + Mockito.eq(0), + Mockito.eq(10), + Mockito.eq( + List.of( + new com.linkedin.metadata.query.filter.SortCriterion() + .setField(VERSION_SORT_ID_FIELD_NAME) + .setOrder(com.linkedin.metadata.query.filter.SortOrder.DESCENDING))), + any()); + } + + @Test + public void testGetSuccessComplex() throws Exception { + EntityClient mockEntityClient = initMockEntityClient(); + + Filter viewFilter = + new Filter() + .setOr( + new ConjunctiveCriterionArray( + new ConjunctiveCriterion() + .setAnd( + new CriterionArray( + List.of(buildCriterion("viewField", Condition.EQUAL, "test")))))); + DataHubViewInfo viewInfo = + new DataHubViewInfo() + .setName("test") + .setType(DataHubViewType.GLOBAL) + .setCreated(new AuditStamp().setTime(0L).setActor(TEST_USER_URN)) + .setLastModified(new AuditStamp().setTime(0L).setActor(TEST_USER_URN)) + .setDefinition( + new DataHubViewDefinition() + .setEntityTypes( + new StringArray( + List.of( + Constants.DATASET_ENTITY_NAME, Constants.DASHBOARD_ENTITY_NAME))) + .setFilter(viewFilter)); + ViewService mockViewService = Mockito.mock(ViewService.class); + Mockito.when(mockViewService.getViewInfo(any(), Mockito.eq(TEST_VIEW_URN))) + .thenReturn(viewInfo); + + VersionsSearchResolver resolver = new VersionsSearchResolver(mockEntityClient, mockViewService); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(COMPLEX_INPUT); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + VersionSet versionSet = new VersionSet(); + versionSet.setUrn(VERSION_SET_URN); + Mockito.when(mockEnv.getSource()).thenReturn(versionSet); + + SearchResults result = resolver.get(mockEnv).get(); + + // Validate the result + assertEquals(result.getSearchResults().size(), 0); + + // Validate that we called the search service correctly + Mockito.verify(mockEntityClient, Mockito.times(1)) + .searchAcrossEntities( + Mockito.argThat( + context -> + !context.getSearchContext().getSearchFlags().isFilterNonLatestVersions() + && context.getSearchContext().getSearchFlags().isSkipCache()), + Mockito.eq(List.of(Constants.DATASET_ENTITY_NAME)), + Mockito.eq("query"), + Mockito.eq( + new Filter() + .setOr( + new ConjunctiveCriterionArray( + new ConjunctiveCriterion() + .setAnd( + new CriterionArray( + CriterionUtils.buildCriterion( + "field1", Condition.EQUAL, "1", "2"), + CriterionUtils.buildCriterion( + "field2", Condition.EQUAL, "a"), + CriterionUtils.buildCriterion( + "versionSet", Condition.EQUAL, VERSION_SET_URN), + CriterionUtils.buildCriterion( + "viewField", Condition.EQUAL, "test"))), + new ConjunctiveCriterion() + .setAnd( + new CriterionArray( + CriterionUtils.buildCriterion( + "field3", Condition.EQUAL, "3", "4"), + CriterionUtils.buildCriterion( + "field4", Condition.EQUAL, "b"), + CriterionUtils.buildCriterion( + "versionSet", Condition.EQUAL, VERSION_SET_URN), + CriterionUtils.buildCriterion( + "viewField", Condition.EQUAL, "test")))))), + Mockito.eq(2), + Mockito.eq(5), + Mockito.eq( + List.of( + new com.linkedin.metadata.query.filter.SortCriterion() + .setField("sortField1") + .setOrder(com.linkedin.metadata.query.filter.SortOrder.DESCENDING), + new com.linkedin.metadata.query.filter.SortCriterion() + .setField("sortField2") + .setOrder(com.linkedin.metadata.query.filter.SortOrder.ASCENDING), + new com.linkedin.metadata.query.filter.SortCriterion() + .setField(VERSION_SORT_ID_FIELD_NAME) + .setOrder(com.linkedin.metadata.query.filter.SortOrder.DESCENDING))), + any()); + } + + @Test + public void testThrowsError() throws Exception { + EntityClient mockEntityClient = initMockEntityClient(); + ViewService mockViewService = Mockito.mock(ViewService.class); + + Mockito.when( + mockEntityClient.searchAcrossEntities( + any(), any(), any(), any(), Mockito.anyInt(), Mockito.anyInt(), any(), any())) + .thenThrow(new RemoteInvocationException()); + + VersionsSearchResolver resolver = new VersionsSearchResolver(mockEntityClient, mockViewService); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(BASIC_INPUT); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + VersionSet versionSet = new VersionSet(); + versionSet.setUrn(VERSION_SET_URN); + Mockito.when(mockEnv.getSource()).thenReturn(versionSet); + + assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); + } + + private EntityClient initMockEntityClient() throws Exception { + EntityClient client = Mockito.mock(EntityClient.class); + + Mockito.when( + client.searchAcrossEntities( + any(), + any(), + Mockito.anyString(), + any(), + Mockito.anyInt(), + Mockito.anyInt(), + any(), + Mockito.eq(null))) + .thenReturn( + new SearchResult() + .setEntities(new SearchEntityArray()) + .setNumEntities(0) + .setFrom(0) + .setPageSize(0) + .setMetadata(new SearchResultMetadata())); + + return client; + } +} diff --git a/datahub-web-react/src/Mocks.tsx b/datahub-web-react/src/Mocks.tsx index 2da9e733eb4072..063b784920e234 100644 --- a/datahub-web-react/src/Mocks.tsx +++ b/datahub-web-react/src/Mocks.tsx @@ -645,6 +645,7 @@ export const dataset3 = { structuredProperties: null, forms: null, activeIncidents: null, + versionProperties: null, } as Dataset; export const dataset3WithSchema = { diff --git a/datahub-web-react/src/appConfigContext.tsx b/datahub-web-react/src/appConfigContext.tsx index d7fef85db4b625..8ac18d0142b4e9 100644 --- a/datahub-web-react/src/appConfigContext.tsx +++ b/datahub-web-react/src/appConfigContext.tsx @@ -57,6 +57,7 @@ export const DEFAULT_APP_CONFIG = { editableDatasetNameEnabled: false, showSeparateSiblings: false, showManageStructuredProperties: false, + entityVersioningEnabled: false, }, }; diff --git a/datahub-web-react/src/graphql/app.graphql b/datahub-web-react/src/graphql/app.graphql index 0d1999f82f77cd..c1fe50d7620a3c 100644 --- a/datahub-web-react/src/graphql/app.graphql +++ b/datahub-web-react/src/graphql/app.graphql @@ -72,6 +72,7 @@ query appConfig { editableDatasetNameEnabled showSeparateSiblings showManageStructuredProperties + entityVersioningEnabled } } } diff --git a/datahub-web-react/src/graphql/dataset.graphql b/datahub-web-react/src/graphql/dataset.graphql index fcca919f614235..8bbeb304aae2cc 100644 --- a/datahub-web-react/src/graphql/dataset.graphql +++ b/datahub-web-react/src/graphql/dataset.graphql @@ -172,6 +172,7 @@ fragment nonSiblingDatasetFields on Dataset { forms { ...formsFields } + ...entityProfileVersionProperties } query getRecentQueries($urn: String!) { diff --git a/datahub-web-react/src/graphql/mlModel.graphql b/datahub-web-react/src/graphql/mlModel.graphql index 2192888caef701..ad97c7c6f530a1 100644 --- a/datahub-web-react/src/graphql/mlModel.graphql +++ b/datahub-web-react/src/graphql/mlModel.graphql @@ -34,5 +34,6 @@ query getMLModel($urn: String!) { forms { ...formsFields } + ...entityProfileVersionProperties } } diff --git a/datahub-web-react/src/graphql/preview.graphql b/datahub-web-react/src/graphql/preview.graphql index 1bee614dd7adbe..8000f59f2bf258 100644 --- a/datahub-web-react/src/graphql/preview.graphql +++ b/datahub-web-react/src/graphql/preview.graphql @@ -346,4 +346,9 @@ fragment entityPreview on Entity { ... on Container { ...entityContainer } + ... on SupportsVersions { + versionProperties { + ...versionProperties + } + } } diff --git a/datahub-web-react/src/graphql/search.graphql b/datahub-web-react/src/graphql/search.graphql index de7d1befd39b08..9edd6754022866 100644 --- a/datahub-web-react/src/graphql/search.graphql +++ b/datahub-web-react/src/graphql/search.graphql @@ -949,6 +949,11 @@ fragment searchResultsWithoutSchemaField on Entity { ... on StructuredPropertyEntity { ...structuredPropertyFields } + ... on SupportsVersions { + versionProperties { + ...versionProperties + } + } } fragment searchResultFields on Entity { diff --git a/datahub-web-react/src/graphql/versioning.graphql b/datahub-web-react/src/graphql/versioning.graphql new file mode 100644 index 00000000000000..e9b6b82494b6ed --- /dev/null +++ b/datahub-web-react/src/graphql/versioning.graphql @@ -0,0 +1,89 @@ +fragment versionProperties on VersionProperties { + versionSet { + urn + type + } + isLatest + version { + versionTag + } + aliases { + versionTag + } + comment + created { + time + actor { + urn + ...entityDisplayNameFields + editableProperties { + displayName + pictureLink + } + } + } + createdInSource { + time + actor { + urn + ...entityDisplayNameFields + editableProperties { + displayName + pictureLink + } + } + } +} + +fragment versionsSearchResults on SearchResults { + count + total + searchResults { + entity { + urn + type + ... on SupportsVersions { + versionProperties { + ...versionProperties + } + } + } + } +} + +fragment entityProfileVersionProperties on SupportsVersions { + versionProperties { + ...versionProperties + versionSet { + urn + type + versionsSearch(input: { query: "*", count: 5, searchFlags: { skipCache: true } }) { + ...versionsSearchResults + } + } + } +} + +query searchAcrossVersions($versionSetUrn: String!, $input: SearchAcrossEntitiesInput!) { + versionSet(urn: $versionSetUrn) { + versionsSearch(input: $input) { + ...versionsSearchResults + } + } +} + +mutation linkAssetVersion($input: LinkVersionInput!) { + linkAssetVersion(input: $input) { + versionsSearch(input: { query: "*", count: 5, searchFlags: { skipCache: true } }) { + ...versionsSearchResults + } + } +} + +mutation unlinkAssetVersion($input: UnlinkVersionInput!) { + unlinkAssetVersion(input: $input) { + versionsSearch(input: { query: "*", count: 5, searchFlags: { skipCache: true } }) { + ...versionsSearchResults + } + } +} From 4de7f61d0924dd66e86c8a31686fdf4e84a474da Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Fri, 17 Jan 2025 21:38:16 +0530 Subject: [PATCH 32/48] fix(ingest): log exception properly (#12372) --- metadata-ingestion/src/datahub/ingestion/run/pipeline.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py index ee1c1608cd48c6..ef59ba7a3b58b4 100644 --- a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py +++ b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py @@ -76,8 +76,9 @@ def on_failure( failure_metadata: dict, ) -> None: logger.error( - f"{self.name} failed to write record with workunit {record_envelope.metadata['workunit_id']}" - f" with {failure_exception} and info {failure_metadata}" + f"{self.name} failed to write record with workunit {record_envelope.metadata['workunit_id']}", + extra={"failure_metadata": failure_metadata}, + exc_info=failure_exception, ) From 76e46b89dbcb0dc12e3524bbbfdf177d5db93473 Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Fri, 17 Jan 2025 21:38:29 +0530 Subject: [PATCH 33/48] dev(ingest): move modules from isort,flake8 to ruff (#12373) --- .../airflow-plugin/build.gradle | 6 +-- .../airflow-plugin/pyproject.toml | 53 ++++++++++++++++--- .../airflow-plugin/setup.cfg | 21 -------- .../airflow-plugin/setup.py | 4 +- .../src/datahub_airflow_plugin/_config.py | 5 +- .../_datahub_ol_adapter.py | 3 +- .../src/datahub_airflow_plugin/_extractors.py | 23 ++++---- .../client/airflow_generator.py | 2 +- .../datahub_listener.py | 10 ++-- .../datahub_airflow_plugin/datahub_plugin.py | 6 +-- .../datahub_plugin_v22.py | 2 +- .../src/datahub_airflow_plugin/entities.py | 1 + .../example_dags/generic_recipe_sample_dag.py | 1 + .../example_dags/graph_usage_sample_dag.py | 2 +- .../example_dags/lineage_emission_dag.py | 2 +- .../datahub_airflow_plugin/hooks/datahub.py | 2 + .../lineage/_lineage_core.py | 1 - .../operators/datahub.py | 2 +- .../operators/datahub_assertion_operator.py | 2 +- .../operators/datahub_assertion_sensor.py | 2 +- .../operators/datahub_operation_operator.py | 2 +- .../operators/datahub_operation_sensor.py | 2 +- ...hub_emitter_operator_jinja_template_dag.py | 2 +- .../tests/integration/test_plugin.py | 2 +- .../airflow-plugin/tests/unit/test_airflow.py | 2 +- .../tests/unit/test_packaging.py | 2 +- .../dagster-plugin/build.gradle | 6 +-- .../dagster-plugin/pyproject.toml | 52 +++++++++++++++--- .../dagster-plugin/setup.cfg | 21 -------- .../dagster-plugin/setup.py | 5 +- .../client/dagster_generator.py | 2 + .../sensors/datahub_sensors.py | 6 ++- .../dagster-plugin/tests/unit/test_dagster.py | 4 +- .../gx-plugin/pyproject.toml | 3 -- .../prefect-plugin/pyproject.toml | 3 -- metadata-ingestion/pyproject.toml | 12 +---- .../api/entities/dataproduct/dataproduct.py | 2 +- .../datahub/ingestion/source/abs/source.py | 2 +- .../ingestion/source/dremio/dremio_api.py | 2 +- .../ingestion/source/neo4j/neo4j_source.py | 2 +- .../src/datahub/ingestion/source/s3/source.py | 2 +- .../ingestion/source/schema/json_schema.py | 2 +- .../ingestion/source/sql/clickhouse.py | 2 +- .../ingestion/source/tableau/tableau.py | 2 +- .../src/datahub/testing/mcp_diff.py | 2 +- .../src/datahub/utilities/sqllineage_patch.py | 2 +- .../integration/powerbi/test_m_parser.py | 4 +- .../tests/integration/powerbi/test_powerbi.py | 4 +- .../tests/performance/data_generation.py | 4 +- smoke-test/pyproject.toml | 2 - 50 files changed, 166 insertions(+), 144 deletions(-) diff --git a/metadata-ingestion-modules/airflow-plugin/build.gradle b/metadata-ingestion-modules/airflow-plugin/build.gradle index 68a35c0dfc417b..1bcb58e6b7c543 100644 --- a/metadata-ingestion-modules/airflow-plugin/build.gradle +++ b/metadata-ingestion-modules/airflow-plugin/build.gradle @@ -74,16 +74,14 @@ task lint(type: Exec, dependsOn: installDev) { "find ${venv_name}/lib -path *airflow/_vendor/connexion/spec.py -exec sed -i.bak -e '169,169s/ # type: List\\[str\\]//g' {} \\; && " + "source ${venv_name}/bin/activate && set -x && " + "black --check --diff src/ tests/ && " + - "isort --check --diff src/ tests/ && " + - "flake8 --count --statistics src/ tests/ && " + + "ruff check src/ tests/ && " + "mypy --show-traceback --show-error-codes src/ tests/" } task lintFix(type: Exec, dependsOn: installDev) { commandLine 'bash', '-c', "source ${venv_name}/bin/activate && set -x && " + "black src/ tests/ && " + - "isort src/ tests/ && " + - "flake8 src/ tests/ && " + + "ruff check --fix src/ tests/" "mypy src/ tests/ " } diff --git a/metadata-ingestion-modules/airflow-plugin/pyproject.toml b/metadata-ingestion-modules/airflow-plugin/pyproject.toml index 648040c1951db8..7d03c2a14bf078 100644 --- a/metadata-ingestion-modules/airflow-plugin/pyproject.toml +++ b/metadata-ingestion-modules/airflow-plugin/pyproject.toml @@ -10,11 +10,50 @@ extend-exclude = ''' ''' include = '\.pyi?$' -[tool.isort] -indent = ' ' -known_future_library = ['__future__', 'datahub.utilities._markupsafe_compat', 'datahub_provider._airflow_compat'] -profile = 'black' -sections = 'FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER' +[tool.ruff.lint.isort] +combine-as-imports = true +known-first-party = ["datahub"] +extra-standard-library = ["__future__", "datahub.utilities._markupsafe_compat", "datahub.sql_parsing._sqlglot_patch"] +section-order = ["future", "standard-library", "third-party", "first-party", "local-folder"] +force-sort-within-sections = false +force-wrap-aliases = false +split-on-trailing-comma = false +order-by-type = true +relative-imports-order = "closest-to-furthest" +force-single-line = false +single-line-exclusions = ["typing"] +length-sort = false +from-first = false +required-imports = [] +classes = ["typing"] -[tool.pyright] -extraPaths = ['tests'] \ No newline at end of file +[tool.ruff.lint] +select = [ + "B", + "C90", + "E", + "F", + "I", # For isort + "TID", +] +ignore = [ + # Ignore line length violations (handled by Black) + "E501", + # Ignore whitespace before ':' (matches Black) + "E203", + "E203", + # Allow usages of functools.lru_cache + "B019", + # Allow function call in argument defaults + "B008", +] + +[tool.ruff.lint.mccabe] +max-complexity = 15 + +[tool.ruff.lint.flake8-tidy-imports] +# Disallow all relative imports. +ban-relative-imports = "all" + +[tool.ruff.lint.per-file-ignores] +"__init__.py" = ["F401"] \ No newline at end of file diff --git a/metadata-ingestion-modules/airflow-plugin/setup.cfg b/metadata-ingestion-modules/airflow-plugin/setup.cfg index c25256c5751b8d..abb9040ab3535a 100644 --- a/metadata-ingestion-modules/airflow-plugin/setup.cfg +++ b/metadata-ingestion-modules/airflow-plugin/setup.cfg @@ -1,24 +1,3 @@ -[flake8] -max-complexity = 15 -ignore = - # Ignore: line length issues, since black's formatter will take care of them. - E501, - # Ignore: 1 blank line required before class docstring. - D203, - # See https://stackoverflow.com/a/57074416. - W503, - # See https://github.com/psf/black/issues/315. - E203 -exclude = - .git, - venv, - .tox, - __pycache__ -per-file-ignores = - # imported but unused - __init__.py: F401 -ban-relative-imports = true - [mypy] plugins = sqlmypy, diff --git a/metadata-ingestion-modules/airflow-plugin/setup.py b/metadata-ingestion-modules/airflow-plugin/setup.py index d07063dbffc5c4..2fd74b37e89c05 100644 --- a/metadata-ingestion-modules/airflow-plugin/setup.py +++ b/metadata-ingestion-modules/airflow-plugin/setup.py @@ -73,9 +73,7 @@ def get_long_description(): *mypy_stubs, "black==22.12.0", "coverage>=5.1", - "flake8>=3.8.3", - "flake8-tidy-imports>=4.3.0", - "isort>=5.7.0", + "ruff==0.9.1", "mypy==1.10.1", # pydantic 1.8.2 is incompatible with mypy 0.910. # See https://github.com/samuelcolvin/pydantic/pull/3175#issuecomment-995382910. diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_config.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_config.py index c4964712cf9f7d..6d6ba601556788 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_config.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_config.py @@ -1,11 +1,12 @@ from enum import Enum from typing import TYPE_CHECKING, Optional -import datahub.emitter.mce_builder as builder from airflow.configuration import conf -from datahub.configuration.common import AllowDenyPattern, ConfigModel from pydantic.fields import Field +import datahub.emitter.mce_builder as builder +from datahub.configuration.common import AllowDenyPattern, ConfigModel + if TYPE_CHECKING: from datahub_airflow_plugin.hooks.datahub import DatahubGenericHook diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_datahub_ol_adapter.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_datahub_ol_adapter.py index 69de61aced0a59..72cdcd8813252a 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_datahub_ol_adapter.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_datahub_ol_adapter.py @@ -1,8 +1,9 @@ import logging -import datahub.emitter.mce_builder as builder from openlineage.client.run import Dataset as OpenLineageDataset +import datahub.emitter.mce_builder as builder + logger = logging.getLogger(__name__) diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_extractors.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_extractors.py index 28d5775f61f542..fd01ac10f98de9 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_extractors.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_extractors.py @@ -3,17 +3,11 @@ import unittest.mock from typing import TYPE_CHECKING, Optional -import datahub.emitter.mce_builder as builder -from datahub.ingestion.source.sql.sqlalchemy_uri_mapper import ( - get_platform_from_sqlalchemy_uri, -) -from datahub.sql_parsing.sqlglot_lineage import ( - SqlParsingResult, - create_lineage_sql_parsed_result, +from openlineage.airflow.extractors import ( + BaseExtractor, + ExtractorManager as OLExtractorManager, + TaskMetadata, ) -from openlineage.airflow.extractors import BaseExtractor -from openlineage.airflow.extractors import ExtractorManager as OLExtractorManager -from openlineage.airflow.extractors import TaskMetadata from openlineage.airflow.extractors.snowflake_extractor import SnowflakeExtractor from openlineage.airflow.extractors.sql_extractor import SqlExtractor from openlineage.airflow.utils import get_operator_class, try_import_from_string @@ -23,11 +17,20 @@ SqlJobFacet, ) +import datahub.emitter.mce_builder as builder +from datahub.ingestion.source.sql.sqlalchemy_uri_mapper import ( + get_platform_from_sqlalchemy_uri, +) +from datahub.sql_parsing.sqlglot_lineage import ( + SqlParsingResult, + create_lineage_sql_parsed_result, +) from datahub_airflow_plugin._airflow_shims import Operator from datahub_airflow_plugin._datahub_ol_adapter import OL_SCHEME_TWEAKS if TYPE_CHECKING: from airflow.models import DagRun, TaskInstance + from datahub.ingestion.graph.client import DataHubGraph logger = logging.getLogger(__name__) diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py index e9f93c0c1eab0a..c1ccdaeb0a1fbd 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py @@ -2,6 +2,7 @@ from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Union, cast from airflow.configuration import conf + from datahub.api.entities.datajob import DataFlow, DataJob from datahub.api.entities.dataprocess.dataprocess_instance import ( DataProcessInstance, @@ -11,7 +12,6 @@ from datahub.metadata.schema_classes import DataProcessTypeClass from datahub.utilities.urns.data_flow_urn import DataFlowUrn from datahub.utilities.urns.data_job_urn import DataJobUrn - from datahub_airflow_plugin._airflow_compat import AIRFLOW_PATCHED from datahub_airflow_plugin._config import DatahubLineageConfig, DatajobUrl diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py index 640991a90a1d28..9de44811f60a48 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py @@ -8,9 +8,13 @@ from typing import TYPE_CHECKING, Callable, Dict, List, Optional, TypeVar, cast import airflow -import datahub.emitter.mce_builder as builder from airflow.models import Variable from airflow.models.serialized_dag import SerializedDagModel +from openlineage.airflow.listener import TaskHolder +from openlineage.airflow.utils import redact_with_exclusions +from openlineage.client.serde import Serde + +import datahub.emitter.mce_builder as builder from datahub.api.entities.datajob import DataJob from datahub.api.entities.dataprocess.dataprocess_instance import InstanceRunResult from datahub.emitter.mcp import MetadataChangeProposalWrapper @@ -30,10 +34,6 @@ ) from datahub.sql_parsing.sqlglot_lineage import SqlParsingResult from datahub.telemetry import telemetry -from openlineage.airflow.listener import TaskHolder -from openlineage.airflow.utils import redact_with_exclusions -from openlineage.client.serde import Serde - from datahub_airflow_plugin._airflow_shims import ( HAS_AIRFLOW_DAG_LISTENER_API, HAS_AIRFLOW_DATASET_LISTENER_API, diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin.py index 137cf97f69280a..7638720db023ac 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin.py @@ -15,9 +15,9 @@ logger = logging.getLogger(__name__) -_USE_AIRFLOW_LISTENER_INTERFACE = HAS_AIRFLOW_LISTENER_API and not os.getenv( +_USE_AIRFLOW_LISTENER_INTERFACE = HAS_AIRFLOW_LISTENER_API and os.getenv( "DATAHUB_AIRFLOW_PLUGIN_USE_V1_PLUGIN", "false" -).lower() in ("true", "1") +).lower() not in ("true", "1") if _USE_AIRFLOW_LISTENER_INTERFACE: try: @@ -32,7 +32,7 @@ with contextlib.suppress(Exception): - if not os.getenv("DATAHUB_AIRFLOW_PLUGIN_SKIP_FORK_PATCH", "false").lower() in ( + if os.getenv("DATAHUB_AIRFLOW_PLUGIN_SKIP_FORK_PATCH", "false").lower() not in ( "true", "1", ): diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py index db47f37bed562e..4bf050d41473e4 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py @@ -7,9 +7,9 @@ from airflow.lineage import PIPELINE_OUTLETS from airflow.models.baseoperator import BaseOperator from airflow.utils.module_loading import import_string + from datahub.api.entities.dataprocess.dataprocess_instance import InstanceRunResult from datahub.telemetry import telemetry - from datahub_airflow_plugin._airflow_shims import ( MappedOperator, get_task_inlets, diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/entities.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/entities.py index 30b35ac6d6198b..f3fd17259c9f63 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/entities.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/entities.py @@ -2,6 +2,7 @@ from typing import List, Optional import attr + import datahub.emitter.mce_builder as builder from datahub.utilities.urns.data_job_urn import DataJobUrn from datahub.utilities.urns.dataset_urn import DatasetUrn diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/generic_recipe_sample_dag.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/generic_recipe_sample_dag.py index ff8dba457066fd..ac620852c6f288 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/generic_recipe_sample_dag.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/generic_recipe_sample_dag.py @@ -9,6 +9,7 @@ from airflow import DAG from airflow.operators.python import PythonOperator from airflow.utils.dates import days_ago + from datahub.configuration.config_loader import load_config_file from datahub.ingestion.run.pipeline import Pipeline diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/graph_usage_sample_dag.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/graph_usage_sample_dag.py index d72ba67c23cd72..7951d6f7fd21ef 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/graph_usage_sample_dag.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/graph_usage_sample_dag.py @@ -4,8 +4,8 @@ import pendulum from airflow.decorators import dag, task -from datahub.ingestion.graph.client import DataHubGraph, RemovedStatusFilter +from datahub.ingestion.graph.client import DataHubGraph, RemovedStatusFilter from datahub_airflow_plugin.hooks.datahub import DatahubRestHook diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_emission_dag.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_emission_dag.py index 0d7cdb6b6e90a5..4351f40fe7e3ad 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_emission_dag.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_emission_dag.py @@ -4,11 +4,11 @@ """ from datetime import timedelta -import datahub.emitter.mce_builder as builder from airflow import DAG from airflow.operators.bash import BashOperator from airflow.utils.dates import days_ago +import datahub.emitter.mce_builder as builder from datahub_airflow_plugin.operators.datahub import DatahubEmitterOperator default_args = { diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/hooks/datahub.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/hooks/datahub.py index 5f4d787fb893d3..26c5026c075bd7 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/hooks/datahub.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/hooks/datahub.py @@ -2,6 +2,7 @@ from airflow.exceptions import AirflowException from airflow.hooks.base import BaseHook + from datahub.emitter.generic_emitter import Emitter from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.metadata.com.linkedin.pegasus2avro.mxe import ( @@ -11,6 +12,7 @@ if TYPE_CHECKING: from airflow.models.connection import Connection + from datahub.emitter.kafka_emitter import DatahubKafkaEmitter from datahub.emitter.rest_emitter import DataHubRestEmitter from datahub.emitter.synchronized_file_emitter import SynchronizedFileEmitter diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/lineage/_lineage_core.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/lineage/_lineage_core.py index 638458b0efd6ab..db50c48dfaf08a 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/lineage/_lineage_core.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/lineage/_lineage_core.py @@ -2,7 +2,6 @@ from typing import TYPE_CHECKING, Dict, List from datahub.api.entities.dataprocess.dataprocess_instance import InstanceRunResult - from datahub_airflow_plugin._config import DatahubLineageConfig from datahub_airflow_plugin.client.airflow_generator import AirflowGenerator from datahub_airflow_plugin.entities import ( diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub.py index 41d855512aa066..817db6b7480c4b 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub.py @@ -3,9 +3,9 @@ from airflow.models import BaseOperator from airflow.utils.decorators import apply_defaults from avrogen.dict_wrapper import DictWrapper + from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent - from datahub_airflow_plugin.hooks.datahub import ( DatahubGenericHook, DatahubKafkaHook, diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_assertion_operator.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_assertion_operator.py index 6f93c09a9e2872..3a440b0ec14e07 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_assertion_operator.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_assertion_operator.py @@ -2,11 +2,11 @@ from typing import Any, List, Optional, Sequence, Union from airflow.models import BaseOperator + from datahub.api.circuit_breaker import ( AssertionCircuitBreaker, AssertionCircuitBreakerConfig, ) - from datahub_airflow_plugin.hooks.datahub import DatahubRestHook diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_assertion_sensor.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_assertion_sensor.py index 16e5d1cbe8b1f4..6a446ba1f3b55e 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_assertion_sensor.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_assertion_sensor.py @@ -2,11 +2,11 @@ from typing import Any, List, Optional, Sequence, Union from airflow.sensors.base import BaseSensorOperator + from datahub.api.circuit_breaker import ( AssertionCircuitBreaker, AssertionCircuitBreakerConfig, ) - from datahub_airflow_plugin.hooks.datahub import DatahubRestHook diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_operation_operator.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_operation_operator.py index 94e105309537b6..eb5fe8168bccf8 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_operation_operator.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_operation_operator.py @@ -2,11 +2,11 @@ from typing import Any, List, Optional, Sequence, Union from airflow.sensors.base import BaseSensorOperator + from datahub.api.circuit_breaker import ( OperationCircuitBreaker, OperationCircuitBreakerConfig, ) - from datahub_airflow_plugin.hooks.datahub import DatahubRestHook diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_operation_sensor.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_operation_sensor.py index 434c60754064d0..89e20e46a0074a 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_operation_sensor.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_operation_sensor.py @@ -2,11 +2,11 @@ from typing import Any, List, Optional, Sequence, Union from airflow.sensors.base import BaseSensorOperator + from datahub.api.circuit_breaker import ( OperationCircuitBreaker, OperationCircuitBreakerConfig, ) - from datahub_airflow_plugin.hooks.datahub import DatahubRestHook diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/datahub_emitter_operator_jinja_template_dag.py b/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/datahub_emitter_operator_jinja_template_dag.py index c1b4aa4d7b94f4..04845e601d674d 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/datahub_emitter_operator_jinja_template_dag.py +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/datahub_emitter_operator_jinja_template_dag.py @@ -1,6 +1,7 @@ from datetime import datetime, timedelta from airflow import DAG + from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent from datahub.metadata.schema_classes import ( @@ -9,7 +10,6 @@ DatasetPropertiesClass, DatasetSnapshotClass, ) - from datahub_airflow_plugin.operators.datahub import DatahubEmitterOperator default_args = { diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py b/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py index 3b2c9140e4632f..d2c9821295419c 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py @@ -17,9 +17,9 @@ import requests import tenacity from airflow.models.connection import Connection + from datahub.ingestion.sink.file import write_metadata_file from datahub.testing.compare_metadata_json import assert_metadata_files_equal - from datahub_airflow_plugin._airflow_shims import ( AIRFLOW_VERSION, HAS_AIRFLOW_DAG_LISTENER_API, diff --git a/metadata-ingestion-modules/airflow-plugin/tests/unit/test_airflow.py b/metadata-ingestion-modules/airflow-plugin/tests/unit/test_airflow.py index 79620f81a437b0..1dc8e14a425dfc 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/unit/test_airflow.py +++ b/metadata-ingestion-modules/airflow-plugin/tests/unit/test_airflow.py @@ -8,12 +8,12 @@ import airflow.configuration import airflow.version -import datahub.emitter.mce_builder as builder import packaging.version import pytest from airflow.lineage import apply_lineage, prepare_lineage from airflow.models import DAG, Connection, DagBag, DagRun, TaskInstance +import datahub.emitter.mce_builder as builder from datahub_airflow_plugin import get_provider_info from datahub_airflow_plugin._airflow_shims import ( AIRFLOW_PATCHED, diff --git a/metadata-ingestion-modules/airflow-plugin/tests/unit/test_packaging.py b/metadata-ingestion-modules/airflow-plugin/tests/unit/test_packaging.py index a9c8b7ec65fa3c..a822527582c2cd 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/unit/test_packaging.py +++ b/metadata-ingestion-modules/airflow-plugin/tests/unit/test_packaging.py @@ -1,6 +1,6 @@ import setuptools -from datahub.testing.check_imports import ensure_no_indirect_model_imports +from datahub.testing.check_imports import ensure_no_indirect_model_imports from tests.utils import PytestConfig diff --git a/metadata-ingestion-modules/dagster-plugin/build.gradle b/metadata-ingestion-modules/dagster-plugin/build.gradle index 0d57bb5bfdff70..503b3556a41bfe 100644 --- a/metadata-ingestion-modules/dagster-plugin/build.gradle +++ b/metadata-ingestion-modules/dagster-plugin/build.gradle @@ -55,16 +55,14 @@ task lint(type: Exec, dependsOn: installDev) { commandLine 'bash', '-c', "source ${venv_name}/bin/activate && set -x && " + "black --check --diff src/ tests/ examples/ && " + - "isort --check --diff src/ tests/ examples/ && " + - "flake8 --count --statistics src/ tests/ examples/ && " + + "ruff check src/ tests/ && " + "mypy --show-traceback --show-error-codes src/ tests/ examples/" } task lintFix(type: Exec, dependsOn: installDev) { commandLine 'bash', '-x', '-c', "source ${venv_name}/bin/activate && " + "black src/ tests/ examples/ && " + - "isort src/ tests/ examples/ && " + - "flake8 src/ tests/ examples/ && " + + "ruff check --fix src/ tests/" "mypy src/ tests/ examples/" } diff --git a/metadata-ingestion-modules/dagster-plugin/pyproject.toml b/metadata-ingestion-modules/dagster-plugin/pyproject.toml index fba81486b9f677..7d03c2a14bf078 100644 --- a/metadata-ingestion-modules/dagster-plugin/pyproject.toml +++ b/metadata-ingestion-modules/dagster-plugin/pyproject.toml @@ -10,10 +10,50 @@ extend-exclude = ''' ''' include = '\.pyi?$' -[tool.isort] -indent = ' ' -profile = 'black' -sections = 'FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER' +[tool.ruff.lint.isort] +combine-as-imports = true +known-first-party = ["datahub"] +extra-standard-library = ["__future__", "datahub.utilities._markupsafe_compat", "datahub.sql_parsing._sqlglot_patch"] +section-order = ["future", "standard-library", "third-party", "first-party", "local-folder"] +force-sort-within-sections = false +force-wrap-aliases = false +split-on-trailing-comma = false +order-by-type = true +relative-imports-order = "closest-to-furthest" +force-single-line = false +single-line-exclusions = ["typing"] +length-sort = false +from-first = false +required-imports = [] +classes = ["typing"] -[tool.pyright] -extraPaths = ['tests'] \ No newline at end of file +[tool.ruff.lint] +select = [ + "B", + "C90", + "E", + "F", + "I", # For isort + "TID", +] +ignore = [ + # Ignore line length violations (handled by Black) + "E501", + # Ignore whitespace before ':' (matches Black) + "E203", + "E203", + # Allow usages of functools.lru_cache + "B019", + # Allow function call in argument defaults + "B008", +] + +[tool.ruff.lint.mccabe] +max-complexity = 15 + +[tool.ruff.lint.flake8-tidy-imports] +# Disallow all relative imports. +ban-relative-imports = "all" + +[tool.ruff.lint.per-file-ignores] +"__init__.py" = ["F401"] \ No newline at end of file diff --git a/metadata-ingestion-modules/dagster-plugin/setup.cfg b/metadata-ingestion-modules/dagster-plugin/setup.cfg index 20a903914332aa..89b28ae45f9648 100644 --- a/metadata-ingestion-modules/dagster-plugin/setup.cfg +++ b/metadata-ingestion-modules/dagster-plugin/setup.cfg @@ -1,24 +1,3 @@ -[flake8] -max-complexity = 15 -ignore = - # Ignore: line length issues, since black's formatter will take care of them. - E501, - # Ignore: 1 blank line required before class docstring. - D203, - # See https://stackoverflow.com/a/57074416. - W503, - # See https://github.com/psf/black/issues/315. - E203 -exclude = - .git, - venv, - .tox, - __pycache__ -per-file-ignores = - # imported but unused - __init__.py: F401 -ban-relative-imports = true - [mypy] plugins = pydantic.mypy diff --git a/metadata-ingestion-modules/dagster-plugin/setup.py b/metadata-ingestion-modules/dagster-plugin/setup.py index 22c15497bd8070..f2e90c14833f78 100644 --- a/metadata-ingestion-modules/dagster-plugin/setup.py +++ b/metadata-ingestion-modules/dagster-plugin/setup.py @@ -53,10 +53,7 @@ def get_long_description(): "dagster-snowflake-pandas >= 0.11.0", "black==22.12.0", "coverage>=5.1", - "flake8>=6.0.0", - "flake8-tidy-imports>=4.3.0", - "flake8-bugbear==23.3.12", - "isort>=5.7.0", + "ruff==0.9.1", "mypy>=1.4.0", # pydantic 1.8.2 is incompatible with mypy 0.910. # See https://github.com/samuelcolvin/pydantic/pull/3175#issuecomment-995382910. diff --git a/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/client/dagster_generator.py b/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/client/dagster_generator.py index a87f490f2d947e..9a0a9a1b3a75ed 100644 --- a/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/client/dagster_generator.py +++ b/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/client/dagster_generator.py @@ -13,6 +13,7 @@ TableSchemaMetadataValue, ) from dagster._core.execution.stats import RunStepKeyStatsSnapshot, StepEventStatus + from datahub.sql_parsing.sqlglot_utils import get_query_fingerprint try: @@ -23,6 +24,7 @@ from dagster._core.snap.node import OpDefSnap from dagster._core.storage.dagster_run import DagsterRun, DagsterRunStatsSnapshot + from datahub.api.entities.datajob import DataFlow, DataJob from datahub.api.entities.dataprocess.dataprocess_instance import ( DataProcessInstance, diff --git a/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/sensors/datahub_sensors.py b/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/sensors/datahub_sensors.py index bccdb4ac7922a5..b91a9cfa56d398 100644 --- a/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/sensors/datahub_sensors.py +++ b/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/sensors/datahub_sensors.py @@ -35,7 +35,9 @@ try: from dagster._core.definitions.sensor_definition import SensorReturnTypesUnion except ImportError: - from dagster._core.definitions.sensor_definition import RawSensorEvaluationFunctionReturn as SensorReturnTypesUnion # type: ignore + from dagster._core.definitions.sensor_definition import ( # type: ignore + RawSensorEvaluationFunctionReturn as SensorReturnTypesUnion, + ) from dagster._core.definitions.target import ExecutableDefinition from dagster._core.definitions.unresolved_asset_job_definition import ( @@ -43,6 +45,7 @@ ) from dagster._core.events import DagsterEventType, HandledOutputData, LoadedInputData from dagster._core.execution.stats import RunStepKeyStatsSnapshot + from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph from datahub.metadata.schema_classes import SubTypesClass @@ -52,7 +55,6 @@ ) from datahub.utilities.urns.dataset_urn import DatasetUrn from datahub.utilities.urns.error import InvalidUrnError - from datahub_dagster_plugin.client.dagster_generator import ( DATAHUB_ASSET_GROUP_NAME_CACHE, Constant, diff --git a/metadata-ingestion-modules/dagster-plugin/tests/unit/test_dagster.py b/metadata-ingestion-modules/dagster-plugin/tests/unit/test_dagster.py index c951b959f85d43..9a69822984bb80 100644 --- a/metadata-ingestion-modules/dagster-plugin/tests/unit/test_dagster.py +++ b/metadata-ingestion-modules/dagster-plugin/tests/unit/test_dagster.py @@ -22,11 +22,11 @@ RepositoryDefinition, ) from dagster._core.definitions.resource_definition import ResourceDefinition -from datahub.emitter.mcp import MetadataChangeProposalWrapper -from datahub.ingestion.graph.client import DatahubClientConfig from freezegun import freeze_time from utils.utils import PytestConfig, check_golden_file +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.ingestion.graph.client import DatahubClientConfig from datahub_dagster_plugin.client.dagster_generator import DatahubDagsterSourceConfig from datahub_dagster_plugin.sensors.datahub_sensors import ( DatahubSensors, diff --git a/metadata-ingestion-modules/gx-plugin/pyproject.toml b/metadata-ingestion-modules/gx-plugin/pyproject.toml index fba81486b9f677..bc951452175268 100644 --- a/metadata-ingestion-modules/gx-plugin/pyproject.toml +++ b/metadata-ingestion-modules/gx-plugin/pyproject.toml @@ -14,6 +14,3 @@ include = '\.pyi?$' indent = ' ' profile = 'black' sections = 'FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER' - -[tool.pyright] -extraPaths = ['tests'] \ No newline at end of file diff --git a/metadata-ingestion-modules/prefect-plugin/pyproject.toml b/metadata-ingestion-modules/prefect-plugin/pyproject.toml index fba81486b9f677..bc951452175268 100644 --- a/metadata-ingestion-modules/prefect-plugin/pyproject.toml +++ b/metadata-ingestion-modules/prefect-plugin/pyproject.toml @@ -14,6 +14,3 @@ include = '\.pyi?$' indent = ' ' profile = 'black' sections = 'FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER' - -[tool.pyright] -extraPaths = ['tests'] \ No newline at end of file diff --git a/metadata-ingestion/pyproject.toml b/metadata-ingestion/pyproject.toml index f3a51e135082ee..745547f88bcb93 100644 --- a/metadata-ingestion/pyproject.toml +++ b/metadata-ingestion/pyproject.toml @@ -11,6 +11,7 @@ extend-exclude = ''' include = '\.pyi?$' target-version = ['py38', 'py39', 'py310', 'py311'] + [tool.ruff.lint.isort] combine-as-imports = true known-first-party = ["datahub"] @@ -28,16 +29,6 @@ from-first = false required-imports = [] classes = ["typing"] -[tool.pyright] -extraPaths = ['tests'] - -[tool.vulture] -exclude = ["src/datahub/metadata/"] -ignore_decorators = ["@click.*", "@validator", "@root_validator", "@pydantic.validator", "@pydantic.root_validator", "@pytest.fixture"] -ignore_names = ["*Source", "*Sink", "*Report"] -paths = ["src"] -sort_by_size = true - [tool.ruff] # Same as Black. line-length = 88 @@ -70,7 +61,6 @@ ignore = [ "B008", # TODO: Enable these later "B006", # Mutable args - "B007", # Unused loop control variable "B017", # Do not assert blind exception "B904", # Checks for raise statements in exception handlers that lack a from clause ] diff --git a/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py b/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py index 2097922c151366..39de4d7f80558e 100644 --- a/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py +++ b/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py @@ -438,7 +438,7 @@ def _patch_ownership( for replace_index, replace_value in patches_replace.items(): list_to_manipulate[replace_index] = replace_value - for drop_index, drop_value in patches_drop.items(): + for drop_value in patches_drop.values(): list_to_manipulate.remove(drop_value) for add_value in patches_add: diff --git a/metadata-ingestion/src/datahub/ingestion/source/abs/source.py b/metadata-ingestion/src/datahub/ingestion/source/abs/source.py index ad2bc36cf558b5..e4f9cd0ee7e018 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/abs/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/abs/source.py @@ -613,7 +613,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: table_data.table_path ].timestamp = table_data.timestamp - for guid, table_data in table_dict.items(): + for _, table_data in table_dict.items(): yield from self.ingest_table(table_data, path_spec) def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py index d913b7e42065d2..072995c10ebcef 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py @@ -181,7 +181,7 @@ def authenticate(self, connection_args: "DremioSourceConfig") -> None: return # On-prem Dremio authentication (PAT or Basic Auth) - for retry in range(1, self._retry_count + 1): + for _ in range(1, self._retry_count + 1): try: if connection_args.authentication_method == "PAT": self.session.headers.update( diff --git a/metadata-ingestion/src/datahub/ingestion/source/neo4j/neo4j_source.py b/metadata-ingestion/src/datahub/ingestion/source/neo4j/neo4j_source.py index 2c9107b967e4f8..8cdd4b17733e01 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/neo4j/neo4j_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/neo4j/neo4j_source.py @@ -286,7 +286,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: df = self.get_neo4j_metadata( "CALL apoc.meta.schema() YIELD value UNWIND keys(value) AS key RETURN key, value[key] AS value;" ) - for index, row in df.iterrows(): + for _, row in df.iterrows(): try: yield MetadataWorkUnit( id=row["key"], diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py index 989d0d734352a2..3173423f86a2ea 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py @@ -1124,7 +1124,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: table_data.table_path ].timestamp = table_data.timestamp - for guid, table_data in table_dict.items(): + for _, table_data in table_dict.items(): yield from self.ingest_table(table_data, path_spec) if not self.source_config.is_profiling_enabled(): diff --git a/metadata-ingestion/src/datahub/ingestion/source/schema/json_schema.py b/metadata-ingestion/src/datahub/ingestion/source/schema/json_schema.py index 635e894d18c7e5..a50e99393fdc27 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/schema/json_schema.py +++ b/metadata-ingestion/src/datahub/ingestion/source/schema/json_schema.py @@ -354,7 +354,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: browse_prefix = f"/{self.config.env.lower()}/{self.config.platform}/{self.config.platform_instance}" if os.path.isdir(self.config.path): - for root, dirs, files in os.walk(self.config.path, topdown=False): + for root, _, files in os.walk(self.config.path, topdown=False): for file_name in [f for f in files if f.endswith(".json")]: try: yield from self._load_one_file( diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py b/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py index aeb21e88d04437..2899bcc2de37b0 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py @@ -268,7 +268,7 @@ def _get_table_or_view_names(self, relkind, connection, schema=None, **kw): info_cache = kw.get("info_cache") all_relations = self._get_all_relation_info(connection, info_cache=info_cache) relation_names = [] - for key, relation in all_relations.items(): + for _, relation in all_relations.items(): if relation.database == schema and relation.relkind == relkind: relation_names.append(relation.relname) return relation_names diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py index ee841a2a201863..8187fff559208e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py @@ -3605,7 +3605,7 @@ def emit_project_in_topological_order( parent_container_key=parent_project_key, ) - for id_, project in self.tableau_project_registry.items(): + for project in self.tableau_project_registry.values(): logger.debug( f"project {project.name} and it's parent {project.parent_name} and parent id {project.parent_id}" ) diff --git a/metadata-ingestion/src/datahub/testing/mcp_diff.py b/metadata-ingestion/src/datahub/testing/mcp_diff.py index 5e669a718e9ad3..b58afc10148edc 100644 --- a/metadata-ingestion/src/datahub/testing/mcp_diff.py +++ b/metadata-ingestion/src/datahub/testing/mcp_diff.py @@ -246,7 +246,7 @@ def pretty(self, verbose: bool = False) -> str: for urn in self.aspect_changes.keys() - self.urns_added - self.urns_removed: aspect_map = self.aspect_changes[urn] s.append(f"Urn changed, {urn}:") - for aspect_name, aspect_diffs in aspect_map.items(): + for aspect_diffs in aspect_map.values(): for i, ga in aspect_diffs.aspects_added.items(): s.append(self.report_aspect(ga, i, "added")) if verbose: diff --git a/metadata-ingestion/src/datahub/utilities/sqllineage_patch.py b/metadata-ingestion/src/datahub/utilities/sqllineage_patch.py index afcd02478ae687..4c237d02727f72 100644 --- a/metadata-ingestion/src/datahub/utilities/sqllineage_patch.py +++ b/metadata-ingestion/src/datahub/utilities/sqllineage_patch.py @@ -8,7 +8,7 @@ # Patch based on sqllineage v1.3.3 def end_of_query_cleanup_patch(self, holder: SubQueryLineageHolder) -> None: # type: ignore - for i, tbl in enumerate(self.tables): + for tbl in self.tables: holder.add_read(tbl) self.union_barriers.append((len(self.columns), len(self.tables))) diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py index 832d00d9c54702..6f7a9c7833ba1a 100644 --- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py +++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py @@ -1070,7 +1070,7 @@ def test_unsupported_data_platform(): ) # type :ignore is_entry_present: bool = False - for key, entry in info_entries.items(): + for entry in info_entries.values(): if entry.title == "Non-Data Platform Expression": is_entry_present = True break @@ -1163,7 +1163,7 @@ def test_m_query_timeout(mock_get_lark_parser): ) # type :ignore is_entry_present: bool = False - for key, entry in warn_entries.items(): + for entry in warn_entries.values(): if entry.title == "M-Query Parsing Timeout": is_entry_present = True break diff --git a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py index 739be7cc8408dd..911d8a9f35139f 100644 --- a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py +++ b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py @@ -1438,7 +1438,7 @@ def test_powerbi_cross_workspace_reference_info_message( is_entry_present: bool = False # Printing INFO entries - for key, entry in info_entries.items(): + for entry in info_entries.values(): if entry.title == "Missing Lineage For Tile": is_entry_present = True break @@ -1563,7 +1563,7 @@ def test_powerbi_app_ingest_info_message( is_entry_present: bool = False # Printing INFO entries - for key, entry in info_entries.items(): + for entry in info_entries.values(): if entry.title == "App Ingestion Is Disabled": is_entry_present = True break diff --git a/metadata-ingestion/tests/performance/data_generation.py b/metadata-ingestion/tests/performance/data_generation.py index fcff13edf59363..266c0d9af03224 100644 --- a/metadata-ingestion/tests/performance/data_generation.py +++ b/metadata-ingestion/tests/performance/data_generation.py @@ -198,7 +198,7 @@ def generate_queries( all_tables = seed_metadata.tables + seed_metadata.views users = [f"user_{i}@xyz.com" for i in range(num_users)] - for i in range(num_selects): # Pure SELECT statements + for _ in range(num_selects): # Pure SELECT statements tables = _sample_list(all_tables, tables_per_select) all_columns = [ FieldAccess(column, table) for table in tables for column in table.columns @@ -213,7 +213,7 @@ def generate_queries( fields_accessed=_sample_list(all_columns, columns_per_select), ) - for i in range(num_operations): + for _ in range(num_operations): modified_table = random.choice(seed_metadata.tables) n_col = len(modified_table.columns) num_columns_modified = NormalDistribution(n_col / 2, n_col / 2) diff --git a/smoke-test/pyproject.toml b/smoke-test/pyproject.toml index c7745d0e9a3640..aeb3c03b6466dd 100644 --- a/smoke-test/pyproject.toml +++ b/smoke-test/pyproject.toml @@ -42,5 +42,3 @@ warn_unused_configs = true disallow_incomplete_defs = false disallow_untyped_defs = false -[tool.pyright] -extraPaths = ['tests'] From 0c597d35af83e09b3ca4f310bbe2cbab0c44eda3 Mon Sep 17 00:00:00 2001 From: Pedro Silva Date: Fri, 17 Jan 2025 18:15:56 +0000 Subject: [PATCH 34/48] feat(docs): Add release docs for 0.15.0 (#12374) --- .../ingest/source/builder/NameSourceStep.tsx | 2 +- docs/api/datahub-apis.md | 4 ++ docs/how/updating-datahub.md | 60 ++++++++++--------- gradle/versioning/versioning.gradle | 2 +- 4 files changed, 39 insertions(+), 29 deletions(-) diff --git a/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx b/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx index 898fbd6a6d9268..68e6c8d3436fb9 100644 --- a/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx +++ b/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx @@ -200,7 +200,7 @@ export const NameSourceStep = ({ state, updateState, prev, submit }: StepProps) setVersion(event.target.value)} onBlur={(event) => handleBlur(event, setVersion)} diff --git a/docs/api/datahub-apis.md b/docs/api/datahub-apis.md index c46aacde3a0cb5..62136406e6ff66 100644 --- a/docs/api/datahub-apis.md +++ b/docs/api/datahub-apis.md @@ -12,6 +12,10 @@ DataHub has several APIs to manipulate metadata on the platform. Here's the list In general, **Python and Java SDKs** are our most recommended tools for extending and customizing the behavior of your DataHub instance. We don't recommend using the **OpenAPI** directly, as it's more complex and less user-friendly than the other APIs. +:::warning +About async usage of APIs - DataHub's asynchronous APIs perform only basic schema validation when receiving MCP requests, similar to direct production to MCP Kafka topics. While requests must conform to the MCP schema to be accepted, actual processing happens later in the pipeline. Any processing failures that occur after the initial acceptance are captured in the Failed MCP topic, but these failures are not immediately surfaced to the API caller since they happen asynchronously. +::: + ## Python and Java SDK We offer an SDK for both Python and Java that provide full functionality when it comes to CRUD operations and any complex functionality you may want to build into DataHub. We recommend using the SDKs for most use cases. Here are the examples of how to use the SDKs: diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index eb5a792216d981..b887ca999c4046 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -1,8 +1,3 @@ -# Known Issues - -- Async APIs - DataHub's asynchronous APIs perform only basic schema validation when receiving MCP requests, similar to direct production to MCP Kafka topics. While requests must conform to the MCP schema to be accepted, actual processing happens later in the pipeline. Any processing failures that occur after the initial acceptance are captured in the Failed MCP topic, but these failures are not immediately surfaced to the API caller since they happen asynchronously. - - # Updating DataHub